Add RIS bibliography format reader.
New module, Text.Pandoc.Readers.RIS, exporting readRIS. New input format `ris`. Closes #7894.
This commit is contained in:
parent
2fca57522b
commit
3da5440858
6 changed files with 365 additions and 0 deletions
|
@ -260,6 +260,7 @@ header when requesting a document from a URL:
|
|||
- `odt` ([ODT])
|
||||
- `opml` ([OPML])
|
||||
- `org` ([Emacs Org mode])
|
||||
- `ris` ([RIS] bibliography)
|
||||
- `rtf` ([Rich Text Format])
|
||||
- `rst` ([reStructuredText])
|
||||
- `t2t` ([txt2tags])
|
||||
|
@ -487,6 +488,7 @@ header when requesting a document from a URL:
|
|||
[roff ms]: https://man.cx/groff_ms(7)
|
||||
[Haskell]: https://www.haskell.org
|
||||
[GNU Texinfo]: https://www.gnu.org/software/texinfo/
|
||||
[RIS]: https://en.wikipedia.org/wiki/RIS_(file_format)
|
||||
[Emacs Org mode]: https://orgmode.org
|
||||
[AsciiDoc]: https://www.methods.co.nz/asciidoc/
|
||||
[AsciiDoctor]: https://asciidoctor.org/
|
||||
|
|
|
@ -547,6 +547,7 @@ library
|
|||
Text.Pandoc.Readers.Creole,
|
||||
Text.Pandoc.Readers.BibTeX,
|
||||
Text.Pandoc.Readers.EndNote,
|
||||
Text.Pandoc.Readers.RIS,
|
||||
Text.Pandoc.Readers.CslJson,
|
||||
Text.Pandoc.Readers.MediaWiki,
|
||||
Text.Pandoc.Readers.Vimwiki,
|
||||
|
|
|
@ -69,6 +69,7 @@ formatFromFilePath x =
|
|||
".org" -> Just "org"
|
||||
".pdf" -> Just "pdf" -- so we get an "unknown reader" error
|
||||
".pptx" -> Just "pptx"
|
||||
".ris" -> Just "ris"
|
||||
".roff" -> Just "ms"
|
||||
".rst" -> Just "rst"
|
||||
".rtf" -> Just "rtf"
|
||||
|
|
|
@ -56,6 +56,7 @@ module Text.Pandoc.Readers
|
|||
, readBibTeX
|
||||
, readBibLaTeX
|
||||
, readEndNoteXML
|
||||
, readRIS
|
||||
, readRTF
|
||||
-- * Miscellaneous
|
||||
, getReader
|
||||
|
@ -105,6 +106,7 @@ import Text.Pandoc.Readers.CSV
|
|||
import Text.Pandoc.Readers.CslJson
|
||||
import Text.Pandoc.Readers.BibTeX
|
||||
import Text.Pandoc.Readers.EndNote
|
||||
import Text.Pandoc.Readers.RIS
|
||||
import Text.Pandoc.Readers.RTF
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
@ -154,6 +156,7 @@ readers = [("native" , TextReader readNative)
|
|||
,("bibtex" , TextReader readBibTeX)
|
||||
,("biblatex" , TextReader readBibLaTeX)
|
||||
,("endnotexml" , TextReader readEndNoteXML)
|
||||
,("ris" , TextReader readRIS)
|
||||
,("rtf" , TextReader readRTF)
|
||||
]
|
||||
|
||||
|
|
250
src/Text/Pandoc/Readers/RIS.hs
Normal file
250
src/Text/Pandoc/Readers/RIS.hs
Normal file
|
@ -0,0 +1,250 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.RIS
|
||||
Copyright : Copyright (C) 2022 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Parses RIS bibliographies into a Pandoc document
|
||||
with empty body and `references` and `nocite` fields
|
||||
in the metadata. A wildcard `nocite` is used so that
|
||||
if the document is rendered in another format, the
|
||||
entire bibliography will be printed.
|
||||
-}
|
||||
module Text.Pandoc.Readers.RIS
|
||||
( readRIS
|
||||
)
|
||||
where
|
||||
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Parsing
|
||||
import Data.Char (isAsciiUpper, isDigit, isSpace, ord, chr)
|
||||
import Data.List (foldl')
|
||||
import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..),
|
||||
toVariable)
|
||||
import Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class (PandocMonad)
|
||||
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
|
||||
import Text.Pandoc.Citeproc.BibTeX (toName)
|
||||
import Control.Monad.Except (throwError)
|
||||
import qualified Data.Text as T
|
||||
import Data.Text (Text)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import qualified Data.Map as M
|
||||
import Safe (readMay)
|
||||
|
||||
-- | Read RIS from an input string and return a Pandoc document.
|
||||
-- The document will have only metadata, with an empty body.
|
||||
-- The metadata will contain a `references` field with the
|
||||
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
|
||||
readRIS :: (PandocMonad m, ToSources a)
|
||||
=> ReaderOptions -> a -> m Pandoc
|
||||
readRIS _opts inp = do
|
||||
parsed <- readWithM risReferences () inp
|
||||
case parsed of
|
||||
Right refs -> do
|
||||
refs' <- mapM (traverse (return . text)) refs
|
||||
return $
|
||||
setMeta "nocite" (cite [Citation {citationId = "*"
|
||||
, citationPrefix = []
|
||||
, citationSuffix = []
|
||||
, citationMode = NormalCitation
|
||||
, citationNoteNum = 0
|
||||
, citationHash = 0}] (str "[@*]")) $
|
||||
setMeta "references" (map referenceToMetaValue refs') $
|
||||
B.doc mempty
|
||||
Left e -> throwError e
|
||||
|
||||
type RISParser m = ParserT Sources () m
|
||||
|
||||
risLine :: PandocMonad m => RISParser m (Text, Text)
|
||||
risLine = do
|
||||
key <- T.pack <$> count 2 (satisfy (\c -> isAsciiUpper c || isDigit c))
|
||||
_ <- many1 spaceChar
|
||||
char '-'
|
||||
_ <- many1 spaceChar
|
||||
val <- anyLine
|
||||
return (key, T.strip val)
|
||||
|
||||
risSeparator :: PandocMonad m => RISParser m ()
|
||||
risSeparator = do
|
||||
try $ string "ER"
|
||||
_ <- many1 spaceChar
|
||||
char '-'
|
||||
_ <- anyLine
|
||||
return ()
|
||||
|
||||
risRecord :: PandocMonad m => RISParser m [(Text, Text)]
|
||||
risRecord = manyTill risLine risSeparator
|
||||
|
||||
risRecordToReference :: [(Text, Text)] -> Reference Text
|
||||
risRecordToReference keys = addId $ foldr go defref keys
|
||||
where
|
||||
go (key, val) =
|
||||
case key of
|
||||
"TY" -> \ref -> ref{ referenceType =
|
||||
fromMaybe "misc" (M.lookup val risTypes) }
|
||||
"VL" -> addVar "volume" val
|
||||
"KW" -> \ref ->
|
||||
ref{ referenceVariables =
|
||||
M.alter (\x -> case x of
|
||||
Nothing -> Just $ TextVal val
|
||||
Just (TextVal kws)
|
||||
-> Just (TextVal (kws <> ", " <> val))
|
||||
_ -> x)
|
||||
"keyword"
|
||||
(referenceVariables ref) }
|
||||
"PB" -> addVar "publisher" val
|
||||
"PP" -> addVar "publisher-place" val
|
||||
"SP" -> \ref ->
|
||||
case M.lookup "page" (referenceVariables ref) of
|
||||
Nothing -> addVar "page" val ref
|
||||
Just (FancyVal eg) -> addVar "page" (val <> eg) ref
|
||||
_ -> ref
|
||||
"EP" -> \ref ->
|
||||
case M.lookup "page" (referenceVariables ref) of
|
||||
Nothing -> addVar "page" ("-" <> val) ref
|
||||
Just (FancyVal eg) -> addVar "page" (val <> "-" <> eg) ref
|
||||
_ -> ref
|
||||
"AU" -> addName "author" val
|
||||
"A1" -> addName "author" val
|
||||
"ED" -> addName "editor" val
|
||||
"A2" -> addName "editor" val
|
||||
"TI" -> addVar "title" val
|
||||
"T1" -> addVar "title" val
|
||||
"CT" -> addVar "title" val
|
||||
"BT" -> \ref ->
|
||||
if referenceType ref == "book"
|
||||
then addVar "title" val ref
|
||||
else addVar "container-title" val ref
|
||||
"JO" -> addVar "container-title" val
|
||||
"JF" -> addVar "container-title" val
|
||||
"T2" -> addVar "container-title" val
|
||||
"ET" -> addVar "edition" val
|
||||
"NV" -> addVar "number-of-volumes" val
|
||||
"AB" -> addVar "abstract" val
|
||||
"PY" -> addYear "issued" val
|
||||
"Y1" -> addYear "issued" val
|
||||
"IS" -> addVar "issue" val
|
||||
"SN" -> addVar "ISSN" val
|
||||
"LA" -> addVar "language" val
|
||||
"UR" -> addVar "url" val
|
||||
"LK" -> addVar "url" val
|
||||
_ -> id -- TODO
|
||||
addVar k v r = r{ referenceVariables =
|
||||
M.insert (toVariable k) (FancyVal v)
|
||||
(referenceVariables r) }
|
||||
addName k v r =
|
||||
let new = toName [] . B.toList . B.text $ v
|
||||
f Nothing = Just (NamesVal new)
|
||||
f (Just (NamesVal ns)) = Just (NamesVal (ns ++ new))
|
||||
f (Just x) = Just x
|
||||
in r{ referenceVariables =
|
||||
M.alter f k (referenceVariables r) }
|
||||
addYear k v r =
|
||||
let d = DateVal $
|
||||
case readMay (T.unpack v) of
|
||||
Nothing ->
|
||||
Date { dateParts = []
|
||||
, dateCirca = False
|
||||
, dateSeason = Nothing
|
||||
, dateLiteral = Just v }
|
||||
Just y ->
|
||||
Date { dateParts = [DateParts [y]]
|
||||
, dateCirca = False
|
||||
, dateSeason = Nothing
|
||||
, dateLiteral = Nothing }
|
||||
in r{ referenceVariables = M.insert k d (referenceVariables r) }
|
||||
|
||||
defref = Reference{
|
||||
referenceId = mempty
|
||||
, referenceType = mempty
|
||||
, referenceDisambiguation = Nothing
|
||||
, referenceVariables = mempty }
|
||||
addId rec = rec{ referenceId = ItemId (authors <> pubdate) }
|
||||
authors = T.intercalate "_" $
|
||||
[T.takeWhile (\c -> c /= ',' && not (isSpace c)) n
|
||||
| (k, n) <- keys, k == "AU" || k == "A1"]
|
||||
pubdate = mconcat ["_" <> d | (k, d) <- keys, k == "PY" || k == "Y1"]
|
||||
|
||||
risReferences :: PandocMonad m => RISParser m [Reference Text]
|
||||
risReferences = do
|
||||
recs <- many risRecord
|
||||
spaces
|
||||
eof
|
||||
return $ fixDuplicateIds $ map risRecordToReference recs
|
||||
|
||||
fixDuplicateIds :: [Reference Text] -> [Reference Text]
|
||||
fixDuplicateIds = reverse . snd . foldl' go (mempty, [])
|
||||
where
|
||||
go (ids_seen, refs) ref =
|
||||
case M.lookup (referenceId ref) ids_seen of
|
||||
Nothing -> (M.insert (referenceId ref) (ord 'a') ids_seen, ref:refs)
|
||||
Just n -> (M.insert (referenceId ref) (n+1) ids_seen,
|
||||
ref{ referenceId =
|
||||
ItemId . (<> T.singleton (chr n)) . unItemId $
|
||||
referenceId ref }
|
||||
: refs)
|
||||
|
||||
risTypes :: M.Map Text Text
|
||||
risTypes = M.fromList
|
||||
[ ("ABST", "article")
|
||||
, ("ADVS", "motion-picture")
|
||||
, ("AGGR", "dataset")
|
||||
, ("ANCIENT", "book")
|
||||
, ("ART", "graphic")
|
||||
, ("BILL", "bill")
|
||||
, ("BLOG", "post-weblog")
|
||||
, ("BOOK", "book")
|
||||
, ("CASE", "legal_case")
|
||||
, ("CHAP", "chapter")
|
||||
, ("CHART", "graphic")
|
||||
, ("CLSWK", "book")
|
||||
, ("COMP", "program")
|
||||
, ("CONF", "paper-conference")
|
||||
, ("CPAPER", "paper-conference")
|
||||
, ("CTLG", "catalog")
|
||||
, ("DATA", "dataset")
|
||||
, ("DBASE", "dataset")
|
||||
, ("DICT", "book")
|
||||
, ("EBOOK", "book")
|
||||
, ("ECHAP", "chapter")
|
||||
, ("EDBOOK", "book")
|
||||
, ("EJOUR", "article")
|
||||
, ("WEB", "webpage")
|
||||
, ("ENCYC", "entry-encyclopedia")
|
||||
, ("EQUA", "figure")
|
||||
, ("FIGURE", "figure")
|
||||
, ("GEN", "entry")
|
||||
, ("GOVDOC", "report")
|
||||
, ("GRANT", "report")
|
||||
, ("HEAR", "report")
|
||||
, ("ICOMM", "personal_communication")
|
||||
, ("INPR", "article-journal")
|
||||
, ("JFULL", "article-journal")
|
||||
, ("JOUR", "article-journal")
|
||||
, ("LEGAL", "legal_case")
|
||||
, ("MANSCPT", "manuscript")
|
||||
, ("MAP", "map")
|
||||
, ("MGZN", "article-magazine")
|
||||
, ("MPCT", "motion-picture")
|
||||
, ("MULTI", "webpage")
|
||||
, ("MUSIC", "musical_score")
|
||||
, ("NEWS", "article-newspaper")
|
||||
, ("PAMP", "pamphlet")
|
||||
, ("PAT", "patent")
|
||||
, ("PCOMM", "personal_communication")
|
||||
, ("RPRT", "report")
|
||||
, ("SER", "article")
|
||||
, ("SLIDE", "graphic")
|
||||
, ("SOUND", "musical_score")
|
||||
, ("STAND", "report")
|
||||
, ("STAT", "legislation")
|
||||
, ("THES", "thesis")
|
||||
, ("UNBILL", "bill")
|
||||
, ("UNPB", "unpublished")
|
||||
, ("VIDEO", "graphic") ]
|
108
test/command/7894.md
Normal file
108
test/command/7894.md
Normal file
|
@ -0,0 +1,108 @@
|
|||
```
|
||||
% pandoc -f ris -t csljson
|
||||
TY - BOOK
|
||||
AU - Chang, C. C.
|
||||
AU - Keisler, H. Jerome
|
||||
PY - 1990
|
||||
ET - 3
|
||||
TI - Model Theory
|
||||
PU - North-Holland Press
|
||||
PP - Amsterdam
|
||||
KW - model theory
|
||||
KW - logic
|
||||
ER -
|
||||
TY - JOUR
|
||||
AU - Shannon, Claude E.
|
||||
PY - 1948
|
||||
DA - July
|
||||
TI - A Mathematical Theory of Communication
|
||||
T2 - Bell System Technical Journal
|
||||
SP - 379
|
||||
EP - 423
|
||||
VL - 27
|
||||
ER -
|
||||
TY - JOUR
|
||||
T1 - On computable numbers, with an application to the Entscheidungsproblem
|
||||
A1 - Turing, Alan Mathison
|
||||
JO - Proc. of London Mathematical Society
|
||||
VL - 47
|
||||
IS - 1
|
||||
KW - decidability
|
||||
KW - computability
|
||||
SP - 230
|
||||
EP - 265
|
||||
Y1 - 1937
|
||||
ER -
|
||||
^D
|
||||
[
|
||||
{
|
||||
"author": [
|
||||
{
|
||||
"family": "Keisler",
|
||||
"given": "H. Jerome"
|
||||
},
|
||||
{
|
||||
"family": "Chang",
|
||||
"given": "C. C."
|
||||
}
|
||||
],
|
||||
"edition": "3",
|
||||
"id": "Chang_Keisler_1990",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
1990
|
||||
]
|
||||
]
|
||||
},
|
||||
"keyword": "logic, model theory",
|
||||
"publisher-place": "Amsterdam",
|
||||
"title": "Model Theory",
|
||||
"type": "book"
|
||||
},
|
||||
{
|
||||
"author": [
|
||||
{
|
||||
"family": "Shannon",
|
||||
"given": "Claude E."
|
||||
}
|
||||
],
|
||||
"container-title": "Bell System Technical Journal",
|
||||
"id": "Shannon_1948",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
1948
|
||||
]
|
||||
]
|
||||
},
|
||||
"page": "379-423",
|
||||
"title": "A Mathematical Theory of Communication",
|
||||
"type": "article-journal",
|
||||
"volume": "27"
|
||||
},
|
||||
{
|
||||
"author": [
|
||||
{
|
||||
"family": "Turing",
|
||||
"given": "Alan Mathison"
|
||||
}
|
||||
],
|
||||
"container-title": "Proc. of London Mathematical Society",
|
||||
"id": "Turing_1937",
|
||||
"issue": "1",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
1937
|
||||
]
|
||||
]
|
||||
},
|
||||
"keyword": "computability, decidability",
|
||||
"page": "230-265",
|
||||
"title": "On computable numbers, with an application to the Entscheidungsproblem",
|
||||
"type": "article-journal",
|
||||
"volume": "47"
|
||||
}
|
||||
]
|
||||
```
|
Loading…
Reference in a new issue