Add RIS bibliography format reader.
New module, Text.Pandoc.Readers.RIS, exporting readRIS. New input format `ris`. Closes #7894.
This commit is contained in:
parent
2fca57522b
commit
3da5440858
6 changed files with 365 additions and 0 deletions
|
@ -260,6 +260,7 @@ header when requesting a document from a URL:
|
||||||
- `odt` ([ODT])
|
- `odt` ([ODT])
|
||||||
- `opml` ([OPML])
|
- `opml` ([OPML])
|
||||||
- `org` ([Emacs Org mode])
|
- `org` ([Emacs Org mode])
|
||||||
|
- `ris` ([RIS] bibliography)
|
||||||
- `rtf` ([Rich Text Format])
|
- `rtf` ([Rich Text Format])
|
||||||
- `rst` ([reStructuredText])
|
- `rst` ([reStructuredText])
|
||||||
- `t2t` ([txt2tags])
|
- `t2t` ([txt2tags])
|
||||||
|
@ -487,6 +488,7 @@ header when requesting a document from a URL:
|
||||||
[roff ms]: https://man.cx/groff_ms(7)
|
[roff ms]: https://man.cx/groff_ms(7)
|
||||||
[Haskell]: https://www.haskell.org
|
[Haskell]: https://www.haskell.org
|
||||||
[GNU Texinfo]: https://www.gnu.org/software/texinfo/
|
[GNU Texinfo]: https://www.gnu.org/software/texinfo/
|
||||||
|
[RIS]: https://en.wikipedia.org/wiki/RIS_(file_format)
|
||||||
[Emacs Org mode]: https://orgmode.org
|
[Emacs Org mode]: https://orgmode.org
|
||||||
[AsciiDoc]: https://www.methods.co.nz/asciidoc/
|
[AsciiDoc]: https://www.methods.co.nz/asciidoc/
|
||||||
[AsciiDoctor]: https://asciidoctor.org/
|
[AsciiDoctor]: https://asciidoctor.org/
|
||||||
|
|
|
@ -547,6 +547,7 @@ library
|
||||||
Text.Pandoc.Readers.Creole,
|
Text.Pandoc.Readers.Creole,
|
||||||
Text.Pandoc.Readers.BibTeX,
|
Text.Pandoc.Readers.BibTeX,
|
||||||
Text.Pandoc.Readers.EndNote,
|
Text.Pandoc.Readers.EndNote,
|
||||||
|
Text.Pandoc.Readers.RIS,
|
||||||
Text.Pandoc.Readers.CslJson,
|
Text.Pandoc.Readers.CslJson,
|
||||||
Text.Pandoc.Readers.MediaWiki,
|
Text.Pandoc.Readers.MediaWiki,
|
||||||
Text.Pandoc.Readers.Vimwiki,
|
Text.Pandoc.Readers.Vimwiki,
|
||||||
|
|
|
@ -69,6 +69,7 @@ formatFromFilePath x =
|
||||||
".org" -> Just "org"
|
".org" -> Just "org"
|
||||||
".pdf" -> Just "pdf" -- so we get an "unknown reader" error
|
".pdf" -> Just "pdf" -- so we get an "unknown reader" error
|
||||||
".pptx" -> Just "pptx"
|
".pptx" -> Just "pptx"
|
||||||
|
".ris" -> Just "ris"
|
||||||
".roff" -> Just "ms"
|
".roff" -> Just "ms"
|
||||||
".rst" -> Just "rst"
|
".rst" -> Just "rst"
|
||||||
".rtf" -> Just "rtf"
|
".rtf" -> Just "rtf"
|
||||||
|
|
|
@ -56,6 +56,7 @@ module Text.Pandoc.Readers
|
||||||
, readBibTeX
|
, readBibTeX
|
||||||
, readBibLaTeX
|
, readBibLaTeX
|
||||||
, readEndNoteXML
|
, readEndNoteXML
|
||||||
|
, readRIS
|
||||||
, readRTF
|
, readRTF
|
||||||
-- * Miscellaneous
|
-- * Miscellaneous
|
||||||
, getReader
|
, getReader
|
||||||
|
@ -105,6 +106,7 @@ import Text.Pandoc.Readers.CSV
|
||||||
import Text.Pandoc.Readers.CslJson
|
import Text.Pandoc.Readers.CslJson
|
||||||
import Text.Pandoc.Readers.BibTeX
|
import Text.Pandoc.Readers.BibTeX
|
||||||
import Text.Pandoc.Readers.EndNote
|
import Text.Pandoc.Readers.EndNote
|
||||||
|
import Text.Pandoc.Readers.RIS
|
||||||
import Text.Pandoc.Readers.RTF
|
import Text.Pandoc.Readers.RTF
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8
|
import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||||
|
@ -154,6 +156,7 @@ readers = [("native" , TextReader readNative)
|
||||||
,("bibtex" , TextReader readBibTeX)
|
,("bibtex" , TextReader readBibTeX)
|
||||||
,("biblatex" , TextReader readBibLaTeX)
|
,("biblatex" , TextReader readBibLaTeX)
|
||||||
,("endnotexml" , TextReader readEndNoteXML)
|
,("endnotexml" , TextReader readEndNoteXML)
|
||||||
|
,("ris" , TextReader readRIS)
|
||||||
,("rtf" , TextReader readRTF)
|
,("rtf" , TextReader readRTF)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
250
src/Text/Pandoc/Readers/RIS.hs
Normal file
250
src/Text/Pandoc/Readers/RIS.hs
Normal file
|
@ -0,0 +1,250 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Readers.RIS
|
||||||
|
Copyright : Copyright (C) 2022 John MacFarlane
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||||
|
Stability : alpha
|
||||||
|
Portability : portable
|
||||||
|
|
||||||
|
Parses RIS bibliographies into a Pandoc document
|
||||||
|
with empty body and `references` and `nocite` fields
|
||||||
|
in the metadata. A wildcard `nocite` is used so that
|
||||||
|
if the document is rendered in another format, the
|
||||||
|
entire bibliography will be printed.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Readers.RIS
|
||||||
|
( readRIS
|
||||||
|
)
|
||||||
|
where
|
||||||
|
|
||||||
|
import Text.Pandoc.Options
|
||||||
|
import Text.Pandoc.Definition
|
||||||
|
import Text.Pandoc.Parsing
|
||||||
|
import Data.Char (isAsciiUpper, isDigit, isSpace, ord, chr)
|
||||||
|
import Data.List (foldl')
|
||||||
|
import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..),
|
||||||
|
toVariable)
|
||||||
|
import Text.Pandoc.Builder as B
|
||||||
|
import Text.Pandoc.Class (PandocMonad)
|
||||||
|
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
|
||||||
|
import Text.Pandoc.Citeproc.BibTeX (toName)
|
||||||
|
import Control.Monad.Except (throwError)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import Safe (readMay)
|
||||||
|
|
||||||
|
-- | Read RIS from an input string and return a Pandoc document.
|
||||||
|
-- The document will have only metadata, with an empty body.
|
||||||
|
-- The metadata will contain a `references` field with the
|
||||||
|
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
|
||||||
|
readRIS :: (PandocMonad m, ToSources a)
|
||||||
|
=> ReaderOptions -> a -> m Pandoc
|
||||||
|
readRIS _opts inp = do
|
||||||
|
parsed <- readWithM risReferences () inp
|
||||||
|
case parsed of
|
||||||
|
Right refs -> do
|
||||||
|
refs' <- mapM (traverse (return . text)) refs
|
||||||
|
return $
|
||||||
|
setMeta "nocite" (cite [Citation {citationId = "*"
|
||||||
|
, citationPrefix = []
|
||||||
|
, citationSuffix = []
|
||||||
|
, citationMode = NormalCitation
|
||||||
|
, citationNoteNum = 0
|
||||||
|
, citationHash = 0}] (str "[@*]")) $
|
||||||
|
setMeta "references" (map referenceToMetaValue refs') $
|
||||||
|
B.doc mempty
|
||||||
|
Left e -> throwError e
|
||||||
|
|
||||||
|
type RISParser m = ParserT Sources () m
|
||||||
|
|
||||||
|
risLine :: PandocMonad m => RISParser m (Text, Text)
|
||||||
|
risLine = do
|
||||||
|
key <- T.pack <$> count 2 (satisfy (\c -> isAsciiUpper c || isDigit c))
|
||||||
|
_ <- many1 spaceChar
|
||||||
|
char '-'
|
||||||
|
_ <- many1 spaceChar
|
||||||
|
val <- anyLine
|
||||||
|
return (key, T.strip val)
|
||||||
|
|
||||||
|
risSeparator :: PandocMonad m => RISParser m ()
|
||||||
|
risSeparator = do
|
||||||
|
try $ string "ER"
|
||||||
|
_ <- many1 spaceChar
|
||||||
|
char '-'
|
||||||
|
_ <- anyLine
|
||||||
|
return ()
|
||||||
|
|
||||||
|
risRecord :: PandocMonad m => RISParser m [(Text, Text)]
|
||||||
|
risRecord = manyTill risLine risSeparator
|
||||||
|
|
||||||
|
risRecordToReference :: [(Text, Text)] -> Reference Text
|
||||||
|
risRecordToReference keys = addId $ foldr go defref keys
|
||||||
|
where
|
||||||
|
go (key, val) =
|
||||||
|
case key of
|
||||||
|
"TY" -> \ref -> ref{ referenceType =
|
||||||
|
fromMaybe "misc" (M.lookup val risTypes) }
|
||||||
|
"VL" -> addVar "volume" val
|
||||||
|
"KW" -> \ref ->
|
||||||
|
ref{ referenceVariables =
|
||||||
|
M.alter (\x -> case x of
|
||||||
|
Nothing -> Just $ TextVal val
|
||||||
|
Just (TextVal kws)
|
||||||
|
-> Just (TextVal (kws <> ", " <> val))
|
||||||
|
_ -> x)
|
||||||
|
"keyword"
|
||||||
|
(referenceVariables ref) }
|
||||||
|
"PB" -> addVar "publisher" val
|
||||||
|
"PP" -> addVar "publisher-place" val
|
||||||
|
"SP" -> \ref ->
|
||||||
|
case M.lookup "page" (referenceVariables ref) of
|
||||||
|
Nothing -> addVar "page" val ref
|
||||||
|
Just (FancyVal eg) -> addVar "page" (val <> eg) ref
|
||||||
|
_ -> ref
|
||||||
|
"EP" -> \ref ->
|
||||||
|
case M.lookup "page" (referenceVariables ref) of
|
||||||
|
Nothing -> addVar "page" ("-" <> val) ref
|
||||||
|
Just (FancyVal eg) -> addVar "page" (val <> "-" <> eg) ref
|
||||||
|
_ -> ref
|
||||||
|
"AU" -> addName "author" val
|
||||||
|
"A1" -> addName "author" val
|
||||||
|
"ED" -> addName "editor" val
|
||||||
|
"A2" -> addName "editor" val
|
||||||
|
"TI" -> addVar "title" val
|
||||||
|
"T1" -> addVar "title" val
|
||||||
|
"CT" -> addVar "title" val
|
||||||
|
"BT" -> \ref ->
|
||||||
|
if referenceType ref == "book"
|
||||||
|
then addVar "title" val ref
|
||||||
|
else addVar "container-title" val ref
|
||||||
|
"JO" -> addVar "container-title" val
|
||||||
|
"JF" -> addVar "container-title" val
|
||||||
|
"T2" -> addVar "container-title" val
|
||||||
|
"ET" -> addVar "edition" val
|
||||||
|
"NV" -> addVar "number-of-volumes" val
|
||||||
|
"AB" -> addVar "abstract" val
|
||||||
|
"PY" -> addYear "issued" val
|
||||||
|
"Y1" -> addYear "issued" val
|
||||||
|
"IS" -> addVar "issue" val
|
||||||
|
"SN" -> addVar "ISSN" val
|
||||||
|
"LA" -> addVar "language" val
|
||||||
|
"UR" -> addVar "url" val
|
||||||
|
"LK" -> addVar "url" val
|
||||||
|
_ -> id -- TODO
|
||||||
|
addVar k v r = r{ referenceVariables =
|
||||||
|
M.insert (toVariable k) (FancyVal v)
|
||||||
|
(referenceVariables r) }
|
||||||
|
addName k v r =
|
||||||
|
let new = toName [] . B.toList . B.text $ v
|
||||||
|
f Nothing = Just (NamesVal new)
|
||||||
|
f (Just (NamesVal ns)) = Just (NamesVal (ns ++ new))
|
||||||
|
f (Just x) = Just x
|
||||||
|
in r{ referenceVariables =
|
||||||
|
M.alter f k (referenceVariables r) }
|
||||||
|
addYear k v r =
|
||||||
|
let d = DateVal $
|
||||||
|
case readMay (T.unpack v) of
|
||||||
|
Nothing ->
|
||||||
|
Date { dateParts = []
|
||||||
|
, dateCirca = False
|
||||||
|
, dateSeason = Nothing
|
||||||
|
, dateLiteral = Just v }
|
||||||
|
Just y ->
|
||||||
|
Date { dateParts = [DateParts [y]]
|
||||||
|
, dateCirca = False
|
||||||
|
, dateSeason = Nothing
|
||||||
|
, dateLiteral = Nothing }
|
||||||
|
in r{ referenceVariables = M.insert k d (referenceVariables r) }
|
||||||
|
|
||||||
|
defref = Reference{
|
||||||
|
referenceId = mempty
|
||||||
|
, referenceType = mempty
|
||||||
|
, referenceDisambiguation = Nothing
|
||||||
|
, referenceVariables = mempty }
|
||||||
|
addId rec = rec{ referenceId = ItemId (authors <> pubdate) }
|
||||||
|
authors = T.intercalate "_" $
|
||||||
|
[T.takeWhile (\c -> c /= ',' && not (isSpace c)) n
|
||||||
|
| (k, n) <- keys, k == "AU" || k == "A1"]
|
||||||
|
pubdate = mconcat ["_" <> d | (k, d) <- keys, k == "PY" || k == "Y1"]
|
||||||
|
|
||||||
|
risReferences :: PandocMonad m => RISParser m [Reference Text]
|
||||||
|
risReferences = do
|
||||||
|
recs <- many risRecord
|
||||||
|
spaces
|
||||||
|
eof
|
||||||
|
return $ fixDuplicateIds $ map risRecordToReference recs
|
||||||
|
|
||||||
|
fixDuplicateIds :: [Reference Text] -> [Reference Text]
|
||||||
|
fixDuplicateIds = reverse . snd . foldl' go (mempty, [])
|
||||||
|
where
|
||||||
|
go (ids_seen, refs) ref =
|
||||||
|
case M.lookup (referenceId ref) ids_seen of
|
||||||
|
Nothing -> (M.insert (referenceId ref) (ord 'a') ids_seen, ref:refs)
|
||||||
|
Just n -> (M.insert (referenceId ref) (n+1) ids_seen,
|
||||||
|
ref{ referenceId =
|
||||||
|
ItemId . (<> T.singleton (chr n)) . unItemId $
|
||||||
|
referenceId ref }
|
||||||
|
: refs)
|
||||||
|
|
||||||
|
risTypes :: M.Map Text Text
|
||||||
|
risTypes = M.fromList
|
||||||
|
[ ("ABST", "article")
|
||||||
|
, ("ADVS", "motion-picture")
|
||||||
|
, ("AGGR", "dataset")
|
||||||
|
, ("ANCIENT", "book")
|
||||||
|
, ("ART", "graphic")
|
||||||
|
, ("BILL", "bill")
|
||||||
|
, ("BLOG", "post-weblog")
|
||||||
|
, ("BOOK", "book")
|
||||||
|
, ("CASE", "legal_case")
|
||||||
|
, ("CHAP", "chapter")
|
||||||
|
, ("CHART", "graphic")
|
||||||
|
, ("CLSWK", "book")
|
||||||
|
, ("COMP", "program")
|
||||||
|
, ("CONF", "paper-conference")
|
||||||
|
, ("CPAPER", "paper-conference")
|
||||||
|
, ("CTLG", "catalog")
|
||||||
|
, ("DATA", "dataset")
|
||||||
|
, ("DBASE", "dataset")
|
||||||
|
, ("DICT", "book")
|
||||||
|
, ("EBOOK", "book")
|
||||||
|
, ("ECHAP", "chapter")
|
||||||
|
, ("EDBOOK", "book")
|
||||||
|
, ("EJOUR", "article")
|
||||||
|
, ("WEB", "webpage")
|
||||||
|
, ("ENCYC", "entry-encyclopedia")
|
||||||
|
, ("EQUA", "figure")
|
||||||
|
, ("FIGURE", "figure")
|
||||||
|
, ("GEN", "entry")
|
||||||
|
, ("GOVDOC", "report")
|
||||||
|
, ("GRANT", "report")
|
||||||
|
, ("HEAR", "report")
|
||||||
|
, ("ICOMM", "personal_communication")
|
||||||
|
, ("INPR", "article-journal")
|
||||||
|
, ("JFULL", "article-journal")
|
||||||
|
, ("JOUR", "article-journal")
|
||||||
|
, ("LEGAL", "legal_case")
|
||||||
|
, ("MANSCPT", "manuscript")
|
||||||
|
, ("MAP", "map")
|
||||||
|
, ("MGZN", "article-magazine")
|
||||||
|
, ("MPCT", "motion-picture")
|
||||||
|
, ("MULTI", "webpage")
|
||||||
|
, ("MUSIC", "musical_score")
|
||||||
|
, ("NEWS", "article-newspaper")
|
||||||
|
, ("PAMP", "pamphlet")
|
||||||
|
, ("PAT", "patent")
|
||||||
|
, ("PCOMM", "personal_communication")
|
||||||
|
, ("RPRT", "report")
|
||||||
|
, ("SER", "article")
|
||||||
|
, ("SLIDE", "graphic")
|
||||||
|
, ("SOUND", "musical_score")
|
||||||
|
, ("STAND", "report")
|
||||||
|
, ("STAT", "legislation")
|
||||||
|
, ("THES", "thesis")
|
||||||
|
, ("UNBILL", "bill")
|
||||||
|
, ("UNPB", "unpublished")
|
||||||
|
, ("VIDEO", "graphic") ]
|
108
test/command/7894.md
Normal file
108
test/command/7894.md
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
```
|
||||||
|
% pandoc -f ris -t csljson
|
||||||
|
TY - BOOK
|
||||||
|
AU - Chang, C. C.
|
||||||
|
AU - Keisler, H. Jerome
|
||||||
|
PY - 1990
|
||||||
|
ET - 3
|
||||||
|
TI - Model Theory
|
||||||
|
PU - North-Holland Press
|
||||||
|
PP - Amsterdam
|
||||||
|
KW - model theory
|
||||||
|
KW - logic
|
||||||
|
ER -
|
||||||
|
TY - JOUR
|
||||||
|
AU - Shannon, Claude E.
|
||||||
|
PY - 1948
|
||||||
|
DA - July
|
||||||
|
TI - A Mathematical Theory of Communication
|
||||||
|
T2 - Bell System Technical Journal
|
||||||
|
SP - 379
|
||||||
|
EP - 423
|
||||||
|
VL - 27
|
||||||
|
ER -
|
||||||
|
TY - JOUR
|
||||||
|
T1 - On computable numbers, with an application to the Entscheidungsproblem
|
||||||
|
A1 - Turing, Alan Mathison
|
||||||
|
JO - Proc. of London Mathematical Society
|
||||||
|
VL - 47
|
||||||
|
IS - 1
|
||||||
|
KW - decidability
|
||||||
|
KW - computability
|
||||||
|
SP - 230
|
||||||
|
EP - 265
|
||||||
|
Y1 - 1937
|
||||||
|
ER -
|
||||||
|
^D
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"family": "Keisler",
|
||||||
|
"given": "H. Jerome"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"family": "Chang",
|
||||||
|
"given": "C. C."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"edition": "3",
|
||||||
|
"id": "Chang_Keisler_1990",
|
||||||
|
"issued": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
1990
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"keyword": "logic, model theory",
|
||||||
|
"publisher-place": "Amsterdam",
|
||||||
|
"title": "Model Theory",
|
||||||
|
"type": "book"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"family": "Shannon",
|
||||||
|
"given": "Claude E."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"container-title": "Bell System Technical Journal",
|
||||||
|
"id": "Shannon_1948",
|
||||||
|
"issued": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
1948
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"page": "379-423",
|
||||||
|
"title": "A Mathematical Theory of Communication",
|
||||||
|
"type": "article-journal",
|
||||||
|
"volume": "27"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"family": "Turing",
|
||||||
|
"given": "Alan Mathison"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"container-title": "Proc. of London Mathematical Society",
|
||||||
|
"id": "Turing_1937",
|
||||||
|
"issue": "1",
|
||||||
|
"issued": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
1937
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"keyword": "computability, decidability",
|
||||||
|
"page": "230-265",
|
||||||
|
"title": "On computable numbers, with an application to the Entscheidungsproblem",
|
||||||
|
"type": "article-journal",
|
||||||
|
"volume": "47"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
Loading…
Reference in a new issue