Add endnote XML reader.
New input format: endnotexml New reader module: Text.Pandoc.Readers.EndNote, exporting `readEndNoteXML` and `readEndNoteXMLReferences`. [API change] This reader is still a bit rudimentary, but it should get be good enough to be helpful.
This commit is contained in:
parent
e07c0e74ce
commit
34897031f4
5 changed files with 209 additions and 1 deletions
|
@ -235,6 +235,7 @@ header when requesting a document from a URL:
|
|||
- `docbook` ([DocBook])
|
||||
- `docx` ([Word docx])
|
||||
- `dokuwiki` ([DokuWiki markup])
|
||||
- `endnotexml` ([EndNote XML bibliography])
|
||||
- `epub` ([EPUB])
|
||||
- `fb2` ([FictionBook2] e-book)
|
||||
- `gfm` ([GitHub-Flavored Markdown]),
|
||||
|
@ -504,6 +505,7 @@ header when requesting a document from a URL:
|
|||
[BibTeX]: https://ctan.org/pkg/bibtex
|
||||
[BibLaTeX]: https://ctan.org/pkg/biblatex
|
||||
[Markua]: https://leanpub.com/markua/read
|
||||
[EndNote XML bibliography]: https://support.clarivate.com/Endnote/s/article/EndNote-XML-Document-Type-Definition
|
||||
|
||||
## Reader options {.options}
|
||||
|
||||
|
|
|
@ -545,6 +545,7 @@ library
|
|||
Text.Pandoc.Readers.CommonMark,
|
||||
Text.Pandoc.Readers.Creole,
|
||||
Text.Pandoc.Readers.BibTeX,
|
||||
Text.Pandoc.Readers.EndNote,
|
||||
Text.Pandoc.Readers.CslJson,
|
||||
Text.Pandoc.Readers.MediaWiki,
|
||||
Text.Pandoc.Readers.Vimwiki,
|
||||
|
|
|
@ -19,6 +19,7 @@ module Text.Pandoc.Citeproc.BibTeX
|
|||
( Variant(..)
|
||||
, readBibtexString
|
||||
, writeBibtexString
|
||||
, toName
|
||||
)
|
||||
where
|
||||
|
||||
|
@ -1173,7 +1174,7 @@ emptyName =
|
|||
, nameStaticOrdering = False
|
||||
}
|
||||
|
||||
toName :: Options -> [Inline] -> Bib Name
|
||||
toName :: MonadPlus m => Options -> [Inline] -> m Name
|
||||
toName _ [Str "others"] =
|
||||
return emptyName{ nameLiteral = Just "others" }
|
||||
toName _ [Span ("",[],[]) ils] = -- corporate author
|
||||
|
|
|
@ -55,6 +55,7 @@ module Text.Pandoc.Readers
|
|||
, readCslJson
|
||||
, readBibTeX
|
||||
, readBibLaTeX
|
||||
, readEndNoteXML
|
||||
, readRTF
|
||||
-- * Miscellaneous
|
||||
, getReader
|
||||
|
@ -103,6 +104,7 @@ import Text.Pandoc.Readers.Man
|
|||
import Text.Pandoc.Readers.CSV
|
||||
import Text.Pandoc.Readers.CslJson
|
||||
import Text.Pandoc.Readers.BibTeX
|
||||
import Text.Pandoc.Readers.EndNote
|
||||
import Text.Pandoc.Readers.RTF
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
@ -151,6 +153,7 @@ readers = [("native" , TextReader readNative)
|
|||
,("csljson" , TextReader readCslJson)
|
||||
,("bibtex" , TextReader readBibTeX)
|
||||
,("biblatex" , TextReader readBibLaTeX)
|
||||
,("endnotexml" , TextReader readEndNoteXML)
|
||||
,("rtf" , TextReader readRTF)
|
||||
]
|
||||
|
||||
|
|
201
src/Text/Pandoc/Readers/EndNote.hs
Normal file
201
src/Text/Pandoc/Readers/EndNote.hs
Normal file
|
@ -0,0 +1,201 @@
|
|||
{-# LANGUAGE TupleSections #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.EndNote
|
||||
Copyright : Copyright (C) 2022 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Parses EndNote XML bibliographies into a Pandoc document
|
||||
with empty body and `references` and `nocite` fields
|
||||
in the metadata. A wildcard `nocite` is used so that
|
||||
if the document is rendered in another format, the
|
||||
entire bibliography will be printed.
|
||||
-}
|
||||
module Text.Pandoc.Readers.EndNote
|
||||
( readEndNoteXML
|
||||
, readEndNoteXMLReferences
|
||||
)
|
||||
where
|
||||
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Definition
|
||||
import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..))
|
||||
import Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Error (PandocError(..))
|
||||
import Text.Pandoc.Class (PandocMonad)
|
||||
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
|
||||
import Text.Pandoc.Sources (Sources(..), ToSources(..), sourcesToText)
|
||||
import Text.Pandoc.Citeproc.BibTeX (toName)
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad (mzero)
|
||||
import Text.Pandoc.XML.Light
|
||||
( filterElementName,
|
||||
strContent,
|
||||
QName(qName),
|
||||
Element(..),
|
||||
Content(..),
|
||||
CData(..),
|
||||
filterElementsName,
|
||||
filterChildrenName,
|
||||
findAttrBy,
|
||||
parseXMLElement )
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text as T
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Map as M
|
||||
import Safe (readMay)
|
||||
|
||||
-- | Read EndNote XML from an input string and return a Pandoc document.
|
||||
-- The document will have only metadata, with an empty body.
|
||||
-- The metadata will contain a `references` field with the
|
||||
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
|
||||
readEndNoteXML :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readEndNoteXML _opts inp = do
|
||||
let sources = toSources inp
|
||||
refs <- readEndNoteXMLReferences sources
|
||||
return $ setMeta "references" (map referenceToMetaValue refs) $ B.doc mempty
|
||||
|
||||
readEndNoteXMLReferences :: PandocMonad m
|
||||
=> Sources -> m [Reference Inlines]
|
||||
readEndNoteXMLReferences sources = do
|
||||
tree <- either (throwError . PandocXMLError "") return $
|
||||
parseXMLElement (TL.fromStrict . sourcesToText $ sources)
|
||||
let records = filterElementsName ((== "record") . qName) tree
|
||||
return $ map recordToReference records
|
||||
|
||||
|
||||
recordToReference :: Element -> Reference Inlines
|
||||
recordToReference e =
|
||||
Reference{ referenceId = ItemId refid,
|
||||
referenceType = reftype,
|
||||
referenceDisambiguation = Nothing,
|
||||
referenceVariables = refvars }
|
||||
|
||||
where
|
||||
-- get strContent, recursing inside style elements:
|
||||
getText el = getText' (Elem el)
|
||||
getText' (Elem el) = mconcat $ map getText' $ elContent el
|
||||
getText' (Text cd) = cdData cd
|
||||
getText' (CRef _) = mempty
|
||||
-- mconcat . map cdData . onlyText . elContent
|
||||
name t = (== t) . qName
|
||||
refid = maybe mempty (T.strip . strContent)
|
||||
(filterElementName (name "key") e
|
||||
<|> filterElementName (name "rec-number") e)
|
||||
reftype = maybe "document" toCslReferenceType
|
||||
(filterElementName (name "ref-type") e >>=
|
||||
findAttrBy (name "name"))
|
||||
authors =
|
||||
filterChildrenName (name "contributors") e >>=
|
||||
filterChildrenName (name "authors") >>=
|
||||
filterChildrenName (name "author") >>=
|
||||
toName [] . B.toList . B.text . T.strip . getText
|
||||
titles = do
|
||||
x <- filterChildrenName (name "titles") e
|
||||
(key, name') <- [("title", "title"),
|
||||
("container-title", "secondary-title")]
|
||||
(key,) . FancyVal . B.text . T.strip . getText <$>
|
||||
filterChildrenName (name name') x
|
||||
pages = ("pages",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "pages") e
|
||||
volume = ("volume",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "volume") e
|
||||
number = ("number",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "number") e
|
||||
isbn = ("isbn",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "isbn") e
|
||||
publisher = ("publisher",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "publisher") e
|
||||
originalPublisher =
|
||||
("original-publisher",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "orig-pub") e
|
||||
publisherPlace =
|
||||
("publisher-place",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "pub-location") e
|
||||
abstract = ("abstract",) . FancyVal . B.text. T.strip . getText <$>
|
||||
filterChildrenName (name "abstract") e
|
||||
dates = ("issued",) . toDate <$> filterChildrenName (name "dates") e
|
||||
toDate e' = DateVal $
|
||||
Date { dateParts = toDateParts e'
|
||||
, dateCirca = False
|
||||
, dateSeason = Nothing
|
||||
, dateLiteral = Nothing }
|
||||
toDateParts e' = do
|
||||
x <- filterChildrenName (name "year") e'
|
||||
case readMay . T.unpack . T.strip . getText $ x of
|
||||
Nothing -> mzero
|
||||
Just y -> return $ DateParts [y]
|
||||
|
||||
refvars = M.fromList $
|
||||
[ ("author", NamesVal authors) | not (null authors) ] ++
|
||||
titles ++
|
||||
pages ++
|
||||
volume ++
|
||||
number ++
|
||||
isbn ++
|
||||
dates ++
|
||||
publisher ++
|
||||
originalPublisher ++
|
||||
publisherPlace ++
|
||||
abstract
|
||||
|
||||
toCslReferenceType :: Text -> Text
|
||||
toCslReferenceType t =
|
||||
case t of
|
||||
"Aggregated Database" -> "dataset"
|
||||
"Ancient Text" -> "classic"
|
||||
"Artwork" -> "graphic"
|
||||
"Audiovisual Material" -> "graphic"
|
||||
"Bill" -> "legislation"
|
||||
"Blog" -> "post-weblog"
|
||||
"Book" -> "book"
|
||||
"Book Section" -> "chapter"
|
||||
"Case" -> "legal_case"
|
||||
"Catalog" -> "document"
|
||||
"Chart or Table" -> "graphic"
|
||||
"Classical Work" -> "classic"
|
||||
"Computer program" -> "software"
|
||||
"Conference Paper" -> "article"
|
||||
"Conference Proceedings" -> "periodical"
|
||||
"Dataset" -> "dataset"
|
||||
"Dictionary" -> "book"
|
||||
"Edited Book" -> "book"
|
||||
"Electronic Article" -> "article"
|
||||
"Electronic Book" -> "book"
|
||||
"Electronic Book Section" -> "chapter"
|
||||
"Encyclopedia" -> "book"
|
||||
"Equation" -> "document"
|
||||
"Figure" -> "graphic"
|
||||
"Film or Broadcast" -> "motion_picture"
|
||||
"Government Document" -> "document"
|
||||
"Grant" -> "document"
|
||||
"Hearing" -> "hearing"
|
||||
"Interview" -> "interview"
|
||||
"Journal Article" -> "article-journal"
|
||||
"Legal Rule or Regulation" -> "regulation"
|
||||
"Magazine Article" -> "article-magazine"
|
||||
"Manuscript" -> "manuscript"
|
||||
"Map" -> "map"
|
||||
"Music" -> "musical_score"
|
||||
"Newspaper Article" -> "article-newspaper"
|
||||
"Online Database" -> "dataset"
|
||||
"Online Multimedia" -> "webpage"
|
||||
"Pamphlet" -> "pamphlet"
|
||||
"Patent" -> "patent"
|
||||
"Personal Communication" -> "personal_communication"
|
||||
"Podcast" -> "document"
|
||||
"Press Release" -> "report"
|
||||
"Report" -> "report"
|
||||
"Serial" -> "periodical"
|
||||
"Standard" -> "standard"
|
||||
"Statute" -> "legislation"
|
||||
"Thesis" -> "thesis"
|
||||
"Unpublished Work" -> "unpublished"
|
||||
"Web Page" -> "webpage"
|
||||
_ -> "document"
|
Loading…
Add table
Reference in a new issue