pandoc/src/Text/Pandoc/Citeproc/BibTeX.hs
John MacFarlane e0984a43a9 Add built-in citation support using new citeproc library.
This deprecates the use of the external pandoc-citeproc
filter; citation processing is now built in to pandoc.

* Add dependency on citeproc library.
* Add Text.Pandoc.Citeproc module (and some associated unexported
  modules under Text.Pandoc.Citeproc).  Exports `processCitations`.
  [API change]
* Add data files needed for Text.Pandoc.Citeproc:  default.csl
  in the data directory, and a citeproc directory that is just
  used at compile-time.  Note that we've added file-embed as a mandatory
  rather than a conditional depedency, because of the biblatex
  localization files. We might eventually want to use readDataFile
  for this, but it would take some code reorganization.
* Text.Pandoc.Loging: Add `CiteprocWarning` to `LogMessage` and use it
  in `processCitations`. [API change]
* Add tests from the pandoc-citeproc package as command tests (including
  some tests pandoc-citeproc did not pass).
* Remove instructions for building pandoc-citeproc from CI and
  release binary build instructions.  We will no longer distribute
  pandoc-citeproc.
* Markdown reader: tweak abbreviation support.  Don't insert a
  nonbreaking space after a potential abbreviation if it comes right before
  a note or citation.  This messes up several things, including citeproc's
  moving of note citations.
* Add `csljson` as and input and output format. This allows pandoc
  to convert between `csljson` and other bibliography formats,
  and to generate formatted versions of CSL JSON bibliographies.
* Add module Text.Pandoc.Writers.CslJson, exporting `writeCslJson`. [API
  change]
* Add module Text.Pandoc.Readers.CslJson, exporting `readCslJson`. [API
  change]
* Added `bibtex`, `biblatex` as input formats.  This allows pandoc
  to convert between BibLaTeX and BibTeX and other bibliography formats,
  and to generated formatted versions of BibTeX/BibLaTeX bibliographies.
* Add module Text.Pandoc.Readers.BibTeX, exporting `readBibTeX` and
  `readBibLaTeX`. [API change]
* Make "standalone" implicit if output format is a bibliography format.
  This is needed because pandoc readers for bibliography formats put
  the bibliographic information in the `references` field of metadata;
  and unless standalone is specified, metadata gets ignored.
  (TODO: This needs improvement. We should trigger standalone for the
  reader when the input format is bibliographic, and for the writer
  when the output format is markdown.)
* Carry over `citationNoteNum` to `citationNoteNumber`.  This was just
  ignored in pandoc-citeproc.
* Text.Pandoc.Filter: Add `CiteprocFilter` constructor to Filter.
  [API change] This runs the processCitations transformation.
  We need to treat it like a filter so it can be placed
  in the sequence of filter runs (after some, before others).
  In FromYAML, this is parsed from `citeproc` or `{type: citeproc}`,
  so this special filter may be specified either way in a defaults file
  (or by `citeproc: true`, though this gives no control of positioning
  relative to other filters).  TODO: we need to add something to the
  manual section on defaults files for this.
* Add deprecation warning if `upandoc-citeproc` filter is used.
* Add `--citeproc/-C` option to trigger citation processing.
  This behaves like a filter and will be positioned
  relative to filters as they appear on the command line.
* Rewrote the manual on citatations, adding a dedicated Citations
  section which also includes some information formerly found in
  the pandoc-citeproc man page.
* Look for CSL styles in the `csl` subdirectory of the pandoc user data
  directory.  This changes the old pandoc-citeproc behavior, which looked
  in `~/.csl`.  Users can simply symlink `~/.csl` to the `csl`
  subdirectory of their pandoc user data directory if they want
  the old behavior.
* Add support for CSL bibliography entry formatting to LaTeX, HTML,
  Ms writers.  Added CSL-related CSS to styles.html.
2020-09-21 10:15:50 -07:00

1237 lines
48 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
-----------------------------------------------------------------------------
-- |
-- Module : Text.CSL.Input.Bibtex
-- Copyright : (c) John MacFarlane
-- License : BSD-style (see LICENSE)
--
-- Maintainer : John MacFarlane <fiddlosopher@gmail.com>
-- Stability : unstable-- Portability : unportable
--
-----------------------------------------------------------------------------
module Text.Pandoc.Citeproc.BibTeX
( Variant(..)
, readBibtexString
)
where
import Text.Pandoc.Definition
import Text.Pandoc.Builder as B
import Text.Pandoc.Readers.LaTeX (readLaTeX)
import Text.Pandoc.Extensions (Extension(..), extensionsFromList)
import Text.Pandoc.Options (ReaderOptions(..))
import Text.Pandoc.Class (runPure)
import Text.Pandoc.Error (PandocError)
import Text.Pandoc.Shared (stringify)
import qualified Text.Pandoc.Walk as Walk
import Citeproc.Types
import Citeproc.CaseTransform (withSentenceCase)
import Citeproc.Pandoc (caseTransform)
import Text.Pandoc.Citeproc.Util (toIETF)
import Text.Pandoc.Citeproc.Data (biblatexStringMap)
import Data.Default
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Map as Map
import Data.Maybe
import Text.Parsec hiding (State, many, (<|>))
import Control.Applicative
import Data.List.Split (splitOn, splitWhen, wordsBy)
import Control.Monad.RWS hiding ((<>))
import qualified Data.Sequence as Seq
import Data.Char (isAlphaNum, isDigit, isLetter,
isUpper, toLower, toUpper,
isLower, isPunctuation)
import Data.List (foldl', intercalate)
import Safe (readMay)
data Variant = Bibtex | Biblatex
deriving (Show, Eq, Ord)
-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
readBibtexString :: Variant -- ^ bibtex or biblatex
-> Locale -- ^ Locale
-> (Text -> Bool) -- ^ Filter on citation ids
-> Text -- ^ bibtex/biblatex text
-> Either ParseError [Reference Inlines]
readBibtexString variant locale idpred contents = do
case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
mapM (itemToReference locale variant) .
filter (idpred . identifier))
(fromMaybe defaultLang $ localeLanguage locale, Map.empty)
"" contents of
Left err -> Left err
Right xs -> return xs
defaultLang :: Lang
defaultLang = Lang "en" (Just "US")
-- a map of bibtex "string" macros
type StringMap = Map.Map Text Text
type BibParser = Parsec Text (Lang, StringMap)
data Item = Item{ identifier :: Text
, sourcePos :: SourcePos
, entryType :: Text
, fields :: Map.Map Text Text
}
deriving (Show, Ord, Eq)
itemToReference :: Locale -> Variant -> Item -> BibParser (Reference Inlines)
itemToReference locale variant item = do
setPosition (sourcePos item)
bib item $ do
let lang = fromMaybe defaultLang $ localeLanguage locale
modify $ \st -> st{ localeLang = lang,
untitlecase = case lang of
(Lang "en" _) -> True
_ -> False }
id' <- asks identifier
otherIds <- (Just <$> getRawField "ids")
<|> return Nothing
(reftype, genre) <- getTypeAndGenre
-- hyphenation:
let getLangId = do
langid <- T.strip . T.toLower <$> getRawField "langid"
idopts <- T.strip . T.toLower . stringify <$>
getField "langidopts" <|> return ""
case (langid, idopts) of
("english","variant=british") -> return "british"
("english","variant=american") -> return "american"
("english","variant=us") -> return "american"
("english","variant=usmax") -> return "american"
("english","variant=uk") -> return "british"
("english","variant=australian") -> return "australian"
("english","variant=newzealand") -> return "newzealand"
(x,_) -> return x
hyphenation <- (Just . toIETF . T.toLower <$>
(getLangId <|> getRawField "hyphenation"))
<|> return Nothing
modify $ \s -> s{ untitlecase = untitlecase s &&
case hyphenation of
Just x -> "en-" `T.isPrefixOf` x
_ -> True }
opts <- (parseOptions <$> getRawField "options") <|> return []
et <- asks entryType
-- titles
let isArticle = et `elem`
["article", "periodical", "suppperiodical", "review"]
let isPeriodical = et == "periodical"
let isChapterlike = et `elem`
["inbook","incollection","inproceedings","inreference","bookinbook"]
let getFieldMaybe f = (Just <$> getField f) <|> return Nothing
-- names
let getNameList' f = Just <$>
getNameList (("bibtex", case variant of
Bibtex -> "true"
Biblatex -> "false") : opts) f
author' <- getNameList' "author" <|> return Nothing
containerAuthor' <- getNameList' "bookauthor" <|> return Nothing
translator' <- getNameList' "translator" <|> return Nothing
editortype <- getRawField "editortype" <|> return mempty
editor'' <- getNameList' "editor" <|> return Nothing
director'' <- getNameList' "director" <|> return Nothing
let (editor', director') = case editortype of
"director" -> (Nothing, editor'')
_ -> (editor'', director'')
-- FIXME: add same for editora, editorb, editorc
-- dates
issued' <- (Just <$> (getDate "date" <|> getOldDate mempty)) <|>
return Nothing
eventDate' <- (Just <$> (getDate "eventdate" <|> getOldDate "event")) <|>
return Nothing
origDate' <- (Just <$> (getDate "origdate" <|> getOldDate "orig")) <|>
return Nothing
accessed' <- (Just <$> (getDate "urldate" <|> getOldDate "url")) <|>
return Nothing
-- locators
pages' <- getFieldMaybe "pages"
volume' <- getFieldMaybe "volume"
part' <- getFieldMaybe "part"
volumes' <- getFieldMaybe "volumes"
pagetotal' <- getFieldMaybe "pagetotal"
chapter' <- getFieldMaybe "chapter"
edition' <- getFieldMaybe "edition"
version' <- getFieldMaybe "version"
(number', collectionNumber', issue') <-
(getField "number" >>= \x ->
if et `elem` ["book","collection","proceedings","reference",
"mvbook","mvcollection","mvproceedings", "mvreference",
"bookinbook","inbook", "incollection","inproceedings",
"inreference", "suppbook","suppcollection"]
then return (Nothing, Just x, Nothing)
else if isArticle
then (getField "issue" >>= \y ->
return (Nothing, Nothing, Just $ concatWith ',' [x,y]))
<|> return (Nothing, Nothing, Just x)
else return (Just x, Nothing, Nothing))
<|> return (Nothing, Nothing, Nothing)
-- titles
hasMaintitle <- (True <$ getRawField "maintitle") <|> return False
title' <- Just <$>
((guard isPeriodical >> getTitle "issuetitle")
<|> (guard hasMaintitle >>
guard (not isChapterlike) >>
getTitle "maintitle")
<|> getTitle "title")
<|> return Nothing
subtitle' <- (guard isPeriodical >> getTitle "issuesubtitle")
<|> (guard hasMaintitle >>
guard (not isChapterlike) >>
getTitle "mainsubtitle")
<|> getTitle "subtitle"
<|> return mempty
titleaddon' <- (guard hasMaintitle >>
guard (not isChapterlike) >>
getTitle "maintitleaddon")
<|> getTitle "titleaddon"
<|> return mempty
volumeTitle' <- Just <$>
((guard hasMaintitle >>
guard (not isChapterlike) >>
getTitle "title")
<|> (guard hasMaintitle >>
guard isChapterlike >>
getTitle "booktitle"))
<|> return Nothing
volumeSubtitle' <- (guard hasMaintitle >>
guard (not isChapterlike) >>
getTitle "subtitle")
<|> (guard hasMaintitle >>
guard isChapterlike >>
getTitle "booksubtitle")
<|> return mempty
volumeTitleAddon' <- (guard hasMaintitle >>
guard (not isChapterlike) >>
getTitle "titleaddon")
<|> (guard hasMaintitle >>
guard isChapterlike >>
getTitle "booktitleaddon")
<|> return mempty
containerTitle' <- Just <$>
((guard isPeriodical >> getPeriodicalTitle "title")
<|> (guard isChapterlike >> getTitle "maintitle")
<|> (guard isChapterlike >> getTitle "booktitle")
<|> getPeriodicalTitle "journaltitle"
<|> getPeriodicalTitle "journal")
<|> return Nothing
containerSubtitle' <- (guard isPeriodical >> getPeriodicalTitle "subtitle")
<|> (guard isChapterlike >> getTitle "mainsubtitle")
<|> (guard isChapterlike >> getTitle "booksubtitle")
<|> getPeriodicalTitle "journalsubtitle"
<|> return mempty
containerTitleAddon' <- (guard isPeriodical >>
getPeriodicalTitle "titleaddon")
<|> (guard isChapterlike >>
getTitle "maintitleaddon")
<|> (guard isChapterlike >>
getTitle "booktitleaddon")
<|> return mempty
containerTitleShort' <- Just <$>
((guard isPeriodical >>
guard (not hasMaintitle) >>
getField "shorttitle")
<|> getPeriodicalTitle "shortjournal")
<|> return Nothing
-- change numerical series title to e.g. 'series 3'
let fixSeriesTitle [Str xs] | isNumber xs =
[Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")]
fixSeriesTitle xs = xs
seriesTitle' <- (Just . B.fromList . fixSeriesTitle .
B.toList . resolveKey lang <$>
getTitle "series") <|>
return Nothing
shortTitle' <- Just <$>
((guard (not hasMaintitle || isChapterlike) >>
getTitle "shorttitle")
<|> if (subtitle' /= mempty || titleaddon' /= mempty) &&
not hasMaintitle
then getShortTitle False "title"
else getShortTitle True "title")
<|> return Nothing
eventTitle' <- Just <$> getTitle "eventtitle" <|> return Nothing
origTitle' <- Just <$> getTitle "origtitle" <|> return Nothing
-- publisher
pubfields <- mapM (\f -> Just `fmap`
(if variant == Bibtex || f == "howpublished"
then getField f
else getLiteralList' f)
<|> return Nothing)
["school","institution","organization", "howpublished","publisher"]
let publisher' = case catMaybes pubfields of
[] -> Nothing
xs -> Just $ concatWith ';' xs
origpublisher' <- (Just <$> getField "origpublisher") <|> return Nothing
-- places
venue' <- (Just <$> getField "venue") <|> return Nothing
address' <- Just <$>
(if variant == Bibtex
then getField "address"
else getLiteralList' "address"
<|> (guard (et /= "patent") >>
getLiteralList' "location"))
<|> return Nothing
origLocation' <- Just <$>
(if variant == Bibtex
then getField "origlocation"
else getLiteralList' "origlocation")
<|> return Nothing
jurisdiction' <- if reftype == "patent"
then Just <$>
(concatWith ';' . map (resolveKey lang) <$>
getLiteralList "location") <|> return Nothing
else return Nothing
-- url, doi, isbn, etc.:
-- note that with eprinttype = arxiv, we take eprint to be a partial url
-- archivePrefix is an alias for eprinttype
url' <- (guard (et == "online" || lookup "url" opts /= Just "false")
>> Just <$> getRawField "url")
<|> (do etype <- getRawField "eprinttype"
eprint <- getRawField "eprint"
let baseUrl =
case T.toLower etype of
"arxiv" -> "http://arxiv.org/abs/"
"jstor" -> "http://www.jstor.org/stable/"
"pubmed" -> "http://www.ncbi.nlm.nih.gov/pubmed/"
"googlebooks" -> "http://books.google.com?id="
_ -> ""
if T.null baseUrl
then mzero
else return $ Just $ baseUrl <> eprint)
<|> return Nothing
doi' <- (guard (lookup "doi" opts /= Just "false") >>
Just <$> getRawField "doi")
<|> return Nothing
isbn' <- Just <$> getRawField "isbn" <|> return Nothing
issn' <- Just <$> getRawField "issn" <|> return Nothing
pmid' <- Just <$> getRawField "pmid" <|> return Nothing
pmcid' <- Just <$> getRawField "pmcid" <|> return Nothing
callNumber' <- Just <$> getRawField "library" <|> return Nothing
-- notes
annotation' <- Just <$>
(getField "annotation" <|> getField "annote")
<|> return Nothing
abstract' <- Just <$> getField "abstract" <|> return Nothing
keywords' <- Just <$> getField "keywords" <|> return Nothing
note' <- if et == "periodical"
then return Nothing
else Just <$> getField "note" <|> return Nothing
addendum' <- if variant == Bibtex
then return Nothing
else Just <$> getField "addendum"
<|> return Nothing
pubstate' <- ( (Just . resolveKey lang <$> getField "pubstate")
<|> case dateLiteral <$> issued' of
Just (Just "forthcoming") ->
return $ Just $ B.str "forthcoming"
_ -> return Nothing
)
let addField (_, Nothing) = id
addField (f, Just x) = Map.insert f x
let vars = foldr addField mempty
[ ("other-ids", TextVal <$> otherIds)
, ("genre", TextVal <$> genre)
, ("language", TextVal <$> hyphenation)
-- dates
, ("accessed", DateVal <$> accessed')
, ("event-date", DateVal <$> eventDate')
, ("issued", DateVal <$> issued')
, ("original-date", DateVal <$> origDate')
-- names
, ("author", NamesVal <$> author')
, ("editor", NamesVal <$> editor')
, ("translator", NamesVal <$> translator')
, ("director", NamesVal <$> director')
, ("container-author", NamesVal <$> containerAuthor')
-- locators
, ("page", FancyVal . Walk.walk convertEnDash <$> pages')
, ("number-of-pages", FancyVal <$> pagetotal')
, ("volume", case (volume', part') of
(Nothing, Nothing) -> Nothing
(Just v, Nothing) -> Just $ FancyVal v
(Nothing, Just p) -> Just $ FancyVal p
(Just v, Just p) ->
Just $ FancyVal $ v <> B.str "." <> p)
, ("number-of-volumes", FancyVal <$> volumes')
, ("chapter-number", FancyVal <$> chapter')
, ("edition", FancyVal <$> edition')
, ("version", FancyVal <$> version')
, ("number", FancyVal <$> number')
, ("collection-number", FancyVal <$> collectionNumber')
, ("issue", FancyVal <$> issue')
-- title
, ("original-title", FancyVal <$> origTitle')
, ("event", FancyVal <$> eventTitle')
, ("title", case title' of
Just t -> Just $ FancyVal $
concatWith '.' [
concatWith ':' [t, subtitle']
, titleaddon' ]
Nothing -> Nothing)
, ("volume-title",
case volumeTitle' of
Just t -> Just $ FancyVal $
concatWith '.' [
concatWith ':' [t, volumeSubtitle']
, volumeTitleAddon' ]
Nothing -> Nothing)
, ("container-title",
case containerTitle' of
Just t -> Just $ FancyVal $
concatWith '.' [
concatWith ':' [t,
containerSubtitle']
, containerTitleAddon' ]
Nothing -> Nothing)
, ("container-title-short", FancyVal <$> containerTitleShort')
, ("collection-title", FancyVal <$> seriesTitle')
, ("title-short", FancyVal <$> shortTitle')
-- publisher
, ("publisher", FancyVal <$> publisher')
, ("original-publisher", FancyVal <$> origpublisher')
-- places
, ("jurisdiction", FancyVal <$> jurisdiction')
, ("event-place", FancyVal <$> venue')
, ("publisher-place", FancyVal <$> address')
, ("original-publisher-place", FancyVal <$> origLocation')
-- urls
, ("url", TextVal <$> url')
, ("doi", TextVal <$> doi')
, ("isbn", TextVal <$> isbn')
, ("issn", TextVal <$> issn')
, ("pmcid", TextVal <$> pmcid')
, ("pmid", TextVal <$> pmid')
, ("call-number", TextVal <$> callNumber')
-- notes
, ("note", case catMaybes [note', addendum'] of
[] -> Nothing
xs -> return $ FancyVal $ concatWith '.' xs)
, ("annote", FancyVal <$> annotation')
, ("abstract", FancyVal <$> abstract')
, ("keyword", FancyVal <$> keywords')
, ("status", FancyVal <$> pubstate')
]
return $ Reference
{ referenceId = ItemId id'
, referenceType = reftype
, referenceDisambiguation = Nothing
, referenceVariables = vars }
bib :: Item -> Bib a -> BibParser a
bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
resolveCrossRefs :: Variant -> [Item] -> [Item]
resolveCrossRefs variant entries =
map (resolveCrossRef variant entries) entries
resolveCrossRef :: Variant -> [Item] -> Item -> Item
resolveCrossRef variant entries entry =
Map.foldrWithKey go entry (fields entry)
where go key val entry' =
if key == "crossref" || key == "xdata"
then entry'{ fields = fields entry' <>
Map.fromList (getXrefFields variant
entry entries val) }
else entry'
getXrefFields :: Variant -> Item -> [Item] -> Text -> [(Text, Text)]
getXrefFields variant baseEntry entries keys = do
let keys' = splitKeys keys
xrefEntry <- [e | e <- entries, identifier e `elem` keys']
(k, v) <- Map.toList $ fields xrefEntry
if k == "crossref" || k == "xdata"
then do
xs <- mapM (getXrefFields variant baseEntry entries)
(splitKeys v)
(x, y) <- xs
guard $ isNothing $ Map.lookup x $ fields xrefEntry
return (x, y)
else do
k' <- case variant of
Bibtex -> return k
Biblatex -> transformKey
(entryType xrefEntry) (entryType baseEntry) k
guard $ isNothing $ Map.lookup k' $ fields baseEntry
return (k',v)
data BibState = BibState{
untitlecase :: Bool
, localeLang :: Lang
}
type Bib = RWST Item () BibState BibParser
blocksToInlines :: [Block] -> Inlines
blocksToInlines bs =
case bs of
[Plain xs] -> B.fromList xs
[Para xs] -> B.fromList xs
_ -> B.fromList $ Walk.query (:[]) bs
adjustSpans :: Lang -> Inline -> Inline
adjustSpans lang (RawInline (Format "latex") s)
| s == "\\hyphen" || s == "\\hyphen " = Str "-"
| otherwise = parseRawLaTeX lang s
adjustSpans _ SoftBreak = Space
adjustSpans _ x = x
parseRawLaTeX :: Lang -> Text -> Inline
parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
case parseLaTeX lang contents of
Right [Para ys] -> f command ys
Right [Plain ys] -> f command ys
Right [] -> f command []
_ -> RawInline (Format "latex") t
where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
command = T.strip command'
contents = T.drop 1 $ T.dropEnd 1 contents'
f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils]
f "mkbibemph" ils = Span nullAttr [Emph ils]
f "mkbibitalic" ils = Span nullAttr [Emph ils]
f "mkbibbold" ils = Span nullAttr [Strong ils]
f "mkbibparens" ils = Span nullAttr $
[Str "("] ++ ils ++ [Str ")"]
f "mkbibbrackets" ils = Span nullAttr $
[Str "["] ++ ils ++ [Str "]"]
-- ... both should be nestable & should work in year fields
f "autocap" ils = Span nullAttr ils
-- TODO: should work in year fields
f "textnormal" ils = Span ("",["nodecor"],[]) ils
f "bibstring" [Str s] = Str $ resolveKey' lang s
f "adddot" [] = Str "."
f "adddotspace" [] = Span nullAttr [Str ".", Space]
f "addabbrvspace" [] = Space
f _ ils = Span nullAttr ils
parseRawLaTeX _ t = RawInline (Format "latex") t
latex' :: Text -> Bib [Block]
latex' t = do
lang <- gets localeLang
case parseLaTeX lang t of
Left _ -> mzero
Right bs -> return bs
parseLaTeX :: Lang -> Text -> Either PandocError [Block]
parseLaTeX lang t =
case runPure (readLaTeX
def{ readerExtensions =
extensionsFromList [Ext_raw_tex, Ext_smart] } t) of
Left e -> Left e
Right (Pandoc _ bs) -> Right $ Walk.walk (adjustSpans lang) bs
latex :: Text -> Bib Inlines
latex = fmap blocksToInlines . latex' . T.strip
type Options = [(Text, Text)]
parseOptions :: Text -> Options
parseOptions = map breakOpt . T.splitOn ","
where breakOpt x = case T.break (=='=') x of
(w,v) -> (T.toLower $ T.strip w,
T.toLower $ T.strip $ T.drop 1 v)
bibEntries :: BibParser [Item]
bibEntries = do
skipMany nonEntry
many (bibItem <* skipMany nonEntry)
where nonEntry = bibSkip <|>
try (char '@' >>
(bibComment <|> bibPreamble <|> bibString))
bibSkip :: BibParser ()
bibSkip = skipMany1 (satisfy (/='@'))
bibComment :: BibParser ()
bibComment = do
cistring "comment"
spaces
void inBraces <|> bibSkip <|> return ()
bibPreamble :: BibParser ()
bibPreamble = do
cistring "preamble"
spaces
void inBraces
bibString :: BibParser ()
bibString = do
cistring "string"
spaces
char '{'
spaces
(k,v) <- entField
char '}'
updateState (\(l,m) -> (l, Map.insert k v m))
return ()
inBraces :: BibParser Text
inBraces = try $ do
char '{'
res <- manyTill
( (T.pack <$> many1 (noneOf "{}\\"))
<|> (char '\\' >> ( (char '{' >> return "\\{")
<|> (char '}' >> return "\\}")
<|> return "\\"))
<|> (braced <$> inBraces)
) (char '}')
return $ T.concat res
braced :: Text -> Text
braced = T.cons '{' . flip T.snoc '}'
inQuotes :: BibParser Text
inQuotes = do
char '"'
T.concat <$> manyTill
( (T.pack <$> many1 (noneOf "\"\\{"))
<|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
<|> braced <$> inBraces
) (char '"')
fieldName :: BibParser Text
fieldName = resolveAlias . T.toLower . T.pack
<$> many1 (letter <|> digit <|> oneOf "-_:+")
isBibtexKeyChar :: Char -> Bool
isBibtexKeyChar c =
isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char])
bibItem :: BibParser Item
bibItem = do
char '@'
pos <- getPosition
enttype <- map toLower <$> many1 letter
spaces
char '{'
spaces
entid <- many1 (satisfy isBibtexKeyChar)
spaces
char ','
spaces
entfields <- entField `sepEndBy` (char ',' >> spaces)
spaces
char '}'
return $ Item (T.pack entid) pos (T.pack enttype) (Map.fromList entfields)
entField :: BibParser (Text, Text)
entField = do
k <- fieldName
spaces
char '='
spaces
vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
try (spaces >> char '#' >> spaces)
spaces
return (k, T.concat vs)
resolveAlias :: Text -> Text
resolveAlias "archiveprefix" = "eprinttype"
resolveAlias "primaryclass" = "eprintclass"
resolveAlias s = s
rawWord :: BibParser Text
rawWord = T.pack <$> many1 alphaNum
expandString :: BibParser Text
expandString = do
k <- fieldName
(lang, strs) <- getState
case Map.lookup k strs of
Just v -> return v
Nothing -> return $ resolveKey' lang k
cistring :: Text -> BibParser Text
cistring s = try (go s)
where go t = case T.uncons t of
Nothing -> return ""
Just (c,cs) -> do
x <- char (toLower c) <|> char (toUpper c)
xs <- go cs
return (T.cons x xs)
splitKeys :: Text -> [Text]
splitKeys = filter (not . T.null) . T.split (\c -> c == ' ' || c == ',')
-- Biblatex Localization Keys (see Biblatex manual)
-- Currently we only map a subset likely to be used in Biblatex *databases*
-- (in fields such as `type`, and via `\bibstring{}` commands).
parseMonth :: Text -> Maybe Int
parseMonth s =
case T.toLower s of
"jan" -> Just 1
"feb" -> Just 2
"mar" -> Just 3
"apr" -> Just 4
"may" -> Just 5
"jun" -> Just 6
"jul" -> Just 7
"aug" -> Just 8
"sep" -> Just 9
"oct" -> Just 10
"nov" -> Just 11
"dec" -> Just 12
_ -> readMay (T.unpack s)
notFound :: Text -> Bib a
notFound f = Prelude.fail $ T.unpack f ++ " not found"
getField :: Text -> Bib Inlines
getField f = do
fs <- asks fields
case Map.lookup f fs of
Just x -> latex x
Nothing -> notFound f
getPeriodicalTitle :: Text -> Bib Inlines
getPeriodicalTitle f = do
ils <- getField f
return ils
protectCase :: (Inlines -> Inlines) -> (Inlines -> Inlines)
protectCase f = Walk.walk unprotect . f . Walk.walk protect
where
protect (Span ("",[],[]) xs) = Span ("",["nocase"],[]) xs
protect x = x
unprotect (Span ("",["nocase"],[]) xs)
| hasLowercaseWord xs = Span ("",["nocase"],[]) xs
| otherwise = Span ("",[],[]) xs
unprotect x = x
hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation
startsWithLowercase (Str (T.uncons -> Just (x,_))) = isLower x
startsWithLowercase _ = False
unTitlecase :: Maybe Lang -> Inlines -> Inlines
unTitlecase mblang = protectCase (caseTransform (withSentenceCase mblang))
getTitle :: Text -> Bib Inlines
getTitle f = do
ils <- getField f
utc <- gets untitlecase
lang <- gets localeLang
let processTitle = if utc then unTitlecase (Just lang) else id
return $ processTitle ils
getShortTitle :: Bool -> Text -> Bib Inlines
getShortTitle requireColon f = do
ils <- splitStrWhen (==':') . B.toList <$> getTitle f
if not requireColon || containsColon ils
then return $ B.fromList $ upToColon ils
else return mempty
containsColon :: [Inline] -> Bool
containsColon xs = Str ":" `elem` xs
upToColon :: [Inline] -> [Inline]
upToColon xs = takeWhile (/= Str ":") xs
isNumber :: Text -> Bool
isNumber t = case T.uncons t of
Just ('-', ds) -> T.all isDigit ds
Just _ -> T.all isDigit t
Nothing -> False
getDate :: Text -> Bib Date
getDate f = do
-- the ~ can used for approx dates, but the latex reader
-- parses this as a nonbreaking space, so we need to convert it back!
let nbspToTilde '\160' = '~'
nbspToTilde c = c
mbd <- rawDateEDTF . T.map nbspToTilde <$> getRawField f
case mbd of
Nothing -> Prelude.fail "expected date"
Just d -> return d
-- A negative (BC) year might be written with -- or --- in bibtex:
fixLeadingDash :: Text -> Text
fixLeadingDash t = case T.uncons t of
Just (c, ds) | (c == '' || c == '—') && firstIsDigit ds -> T.cons '' ds
_ -> t
where firstIsDigit = maybe False (isDigit . fst) . T.uncons
getOldDate :: Text -> Bib Date
getOldDate prefix = do
year' <- (readMay . T.unpack . fixLeadingDash . stringify
<$> getField (prefix <> "year")) <|> return Nothing
month' <- (parseMonth <$> getRawField (prefix <> "month"))
<|> return Nothing
day' <- (readMay . T.unpack <$> getRawField (prefix <> "day"))
<|> return Nothing
endyear' <- (readMay . T.unpack . fixLeadingDash . stringify
<$> getField (prefix <> "endyear")) <|> return Nothing
endmonth' <- (parseMonth . stringify
<$> getField (prefix <> "endmonth")) <|> return Nothing
endday' <- (readMay . T.unpack . stringify <$>
getField (prefix <> "endday")) <|> return Nothing
let toDateParts (y', m', d') =
DateParts $
case y' of
Nothing -> []
Just y ->
case m' of
Nothing -> [y]
Just m ->
case d' of
Nothing -> [y,m]
Just d -> [y,m,d]
let dateparts = filter (\x -> x /= DateParts [])
$ map toDateParts [(year',month',day'),
(endyear',endmonth',endday')]
literal <- if null dateparts
then Just <$> getRawField (prefix <> "year")
else return Nothing
return $
Date { dateParts = dateparts
, dateCirca = False
, dateSeason = Nothing
, dateLiteral = literal }
getRawField :: Text -> Bib Text
getRawField f =
(stringify <$> getField f)
<|> do fs <- asks fields
case Map.lookup f fs of
Just x -> return x
Nothing -> notFound f
getLiteralList :: Text -> Bib [Inlines]
getLiteralList f = do
fs <- asks fields
case Map.lookup f fs of
Just x -> latex' x >>= toLiteralList
Nothing -> notFound f
-- separates items with semicolons
getLiteralList' :: Text -> Bib Inlines
getLiteralList' f = do
fs <- asks fields
case Map.lookup f fs of
Just x -> do
x' <- latex' x
case x' of
[Para xs] ->
return $ B.fromList
$ intercalate [Str ";", Space]
$ splitByAnd xs
[Plain xs] ->
return $ B.fromList
$ intercalate [Str ";", Space]
$ splitByAnd xs
_ -> mzero
Nothing -> notFound f
splitByAnd :: [Inline] -> [[Inline]]
splitByAnd = splitOn [Space, Str "and", Space]
toLiteralList :: [Block] -> Bib [Inlines]
toLiteralList [Para xs] =
return $ map B.fromList $ splitByAnd xs
toLiteralList [Plain xs] = toLiteralList [Para xs]
toLiteralList _ = mzero
concatWith :: Char -> [Inlines] -> Inlines
concatWith sep = foldl' go mempty
where go :: Inlines -> Inlines -> Inlines
go accum s
| s == mempty = accum
| otherwise =
case Seq.viewr (B.unMany accum) of
Seq.EmptyR -> s
_ Seq.:> Str x
| not (T.null x) &&
T.last x `elem` ("!?.,:;" :: String)
-> accum <> B.space <> s
_ -> accum <> B.str (T.singleton sep) <>
B.space <> s
getNameList :: Options -> Text -> Bib [Name]
getNameList opts f = do
fs <- asks fields
case Map.lookup f fs of
Just x -> latexNames opts x
Nothing -> notFound f
toNameList :: Options -> [Block] -> Bib [Name]
toNameList opts [Para xs] =
filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod)
(splitByAnd xs)
toNameList opts [Plain xs] = toNameList opts [Para xs]
toNameList _ _ = mzero
latexNames :: Options -> Text -> Bib [Name]
latexNames opts t = latex' (T.strip t) >>= toNameList opts
-- see issue 392 for motivation. We want to treat
-- "J.G. Smith" and "J. G. Smith" the same.
addSpaceAfterPeriod :: [Inline] -> [Inline]
addSpaceAfterPeriod = go . splitStrWhen (=='.')
where
go [] = []
go (Str (T.unpack -> [c]):Str ".":Str (T.unpack -> [d]):xs)
| isLetter d
, isLetter c
, isUpper c
, isUpper d
= Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs)
go (x:xs) = x:go xs
emptyName :: Name
emptyName =
Name { nameFamily = Nothing
, nameGiven = Nothing
, nameDroppingParticle = Nothing
, nameNonDroppingParticle = Nothing
, nameSuffix = Nothing
, nameLiteral = Nothing
, nameCommaSuffix = False
, nameStaticOrdering = False
}
toName :: Options -> [Inline] -> Bib Name
toName _ [Str "others"] =
return emptyName{ nameLiteral = Just "others" }
toName _ [Span ("",[],[]) ils] = -- corporate author
return emptyName{ nameLiteral = Just $ stringify ils }
-- extended BibLaTeX name format - see #266
toName _ ils@(Str ys:_) | T.any (== '=') ys = do
let commaParts = splitWhen (== Str ",")
. splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
$ ils
let addPart ag (Str "given" : Str "=" : xs) =
ag{ nameGiven = case nameGiven ag of
Nothing -> Just $ stringify xs
Just t -> Just $ t <> " " <> stringify xs }
addPart ag (Str "family" : Str "=" : xs) =
ag{ nameFamily = Just $ stringify xs }
addPart ag (Str "prefix" : Str "=" : xs) =
ag{ nameDroppingParticle = Just $ stringify xs }
addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
ag{ nameNonDroppingParticle = nameDroppingParticle ag
, nameDroppingParticle = Nothing }
addPart ag (Str "suffix" : Str "=" : xs) =
ag{ nameSuffix = Just $ stringify xs }
addPart ag (Space : xs) = addPart ag xs
addPart ag _ = ag
return $ foldl' addPart emptyName commaParts
-- First von Last
-- von Last, First
-- von Last, Jr ,First
-- NOTE: biblatex and bibtex differ on:
-- Drummond de Andrade, Carlos
-- bibtex takes "Drummond de" as the von;
-- biblatex takes the whole as a last name.
-- See https://github.com/plk/biblatex/issues/236
-- Here we implement the more sensible biblatex behavior.
toName opts ils = do
let useprefix = optionSet "useprefix" opts
let usecomma = optionSet "juniorcomma" opts
let bibtex = optionSet "bibtex" opts
let words' = wordsBy (\x -> x == Space || x == Str "\160")
let commaParts = map words' $ splitWhen (== Str ",")
$ splitStrWhen
(\c -> c == ',' || c == '\160') ils
let (first, vonlast, jr) =
case commaParts of
--- First is the longest sequence of white-space separated
-- words starting with an uppercase and that is not the
-- whole string. von is the longest sequence of whitespace
-- separated words whose last word starts with lower case
-- and that is not the whole string.
[fvl] -> let (caps', rest') = span isCapitalized fvl
in if null rest' && not (null caps')
then (init caps', [last caps'], [])
else (caps', rest', [])
[vl,f] -> (f, vl, [])
(vl:j:f:_) -> (f, vl, j )
[] -> ([], [], [])
let (von, lastname) =
if bibtex
then case span isCapitalized $ reverse vonlast of
([],w:ws) -> (reverse ws, [w])
(vs, ws) -> (reverse ws, reverse vs)
else case break isCapitalized vonlast of
(vs@(_:_), []) -> (init vs, [last vs])
(vs, ws) -> (vs, ws)
let prefix = T.unwords $ map stringify von
let family = T.unwords $ map stringify lastname
let suffix = T.unwords $ map stringify jr
let given = T.unwords $ map stringify first
return
Name { nameFamily = if T.null family
then Nothing
else Just family
, nameGiven = if T.null given
then Nothing
else Just given
, nameDroppingParticle = if useprefix || T.null prefix
then Nothing
else Just prefix
, nameNonDroppingParticle = if useprefix && not (T.null prefix)
then Just prefix
else Nothing
, nameSuffix = if T.null suffix
then Nothing
else Just suffix
, nameLiteral = Nothing
, nameCommaSuffix = usecomma
, nameStaticOrdering = False
}
splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen _ [] = []
splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys
where go s =
let (w,z) = T.break p s
in if T.null z
then if T.null w
then []
else [w]
else if T.null w
then (T.take 1 z : go (T.drop 1 z))
else (w : T.take 1 z : go (T.drop 1 z))
splitStrWhen p (x : ys) = x : splitStrWhen p ys
ordinalize :: Locale -> Text -> Text
ordinalize locale n =
let terms = localeTerms locale
pad0 t = case T.length t of
0 -> "00"
1 -> "0" <> t
_ -> t
in case Map.lookup ("ordinal-" <> pad0 n) terms <|>
Map.lookup "ordinal" terms of
Nothing -> n
Just [] -> n
Just (t:_) -> n <> snd t
isCapitalized :: [Inline] -> Bool
isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
| isUpper c = True
| isDigit c = isCapitalized (Str cs : rest)
| otherwise = False
isCapitalized (_:rest) = isCapitalized rest
isCapitalized [] = True
optionSet :: Text -> Options -> Bool
optionSet key opts = case lookup key opts of
Just "true" -> True
Just s -> s == mempty
_ -> False
getTypeAndGenre :: Bib (Text, Maybe Text)
getTypeAndGenre = do
lang <- gets localeLang
et <- asks entryType
guard $ et /= "xdata"
reftype' <- resolveKey' lang <$> getRawField "type"
<|> return mempty
st <- getRawField "entrysubtype" <|> return mempty
isEvent <- (True <$ (getRawField "eventdate"
<|> getRawField "eventtitle"
<|> getRawField "venue")) <|> return False
let reftype =
case et of
"article"
| st == "magazine" -> "article-magazine"
| st == "newspaper" -> "article-newspaper"
| otherwise -> "article-journal"
"book" -> "book"
"booklet" -> "pamphlet"
"bookinbook" -> "chapter"
"collection" -> "book"
"dataset" -> "dataset"
"electronic" -> "webpage"
"inbook" -> "chapter"
"incollection" -> "chapter"
"inreference" -> "entry-encyclopedia"
"inproceedings" -> "paper-conference"
"manual" -> "book"
"mastersthesis" -> "thesis"
"misc" -> ""
"mvbook" -> "book"
"mvcollection" -> "book"
"mvproceedings" -> "book"
"mvreference" -> "book"
"online" -> "webpage"
"patent" -> "patent"
"periodical"
| st == "magazine" -> "article-magazine"
| st == "newspaper" -> "article-newspaper"
| otherwise -> "article-journal"
"phdthesis" -> "thesis"
"proceedings" -> "book"
"reference" -> "book"
"report" -> "report"
"software" -> "book" -- no "software" type in CSL
"suppbook" -> "chapter"
"suppcollection" -> "chapter"
"suppperiodical"
| st == "magazine" -> "article-magazine"
| st == "newspaper" -> "article-newspaper"
| otherwise -> "article-journal"
"techreport" -> "report"
"thesis" -> "thesis"
"unpublished" -> if isEvent then "speech" else "manuscript"
"www" -> "webpage"
-- biblatex, "unsupported"
"artwork" -> "graphic"
"audio" -> "song" -- for audio *recordings*
"commentary" -> "book"
"image" -> "graphic" -- or "figure" ?
"jurisdiction" -> "legal_case"
"legislation" -> "legislation" -- or "bill" ?
"legal" -> "treaty"
"letter" -> "personal_communication"
"movie" -> "motion_picture"
"music" -> "song" -- for musical *recordings*
"performance" -> "speech"
"review" -> "review" -- or "review-book" ?
"standard" -> "legislation"
"video" -> "motion_picture"
-- biblatex-apa:
"data" -> "dataset"
"letters" -> "personal_communication"
"newsarticle" -> "article-newspaper"
_ -> ""
let refgenre =
case et of
"mastersthesis" -> if T.null reftype'
then Just $ resolveKey' lang "mathesis"
else Just reftype'
"phdthesis" -> if T.null reftype'
then Just $ resolveKey' lang "phdthesis"
else Just reftype'
_ -> if T.null reftype'
then Nothing
else Just reftype'
return (reftype, refgenre)
-- transformKey source target key
-- derived from Appendix C of bibtex manual
transformKey :: Text -> Text -> Text -> [Text]
transformKey _ _ "ids" = []
transformKey _ _ "crossref" = []
transformKey _ _ "xref" = []
transformKey _ _ "entryset" = []
transformKey _ _ "entrysubtype" = []
transformKey _ _ "execute" = []
transformKey _ _ "label" = []
transformKey _ _ "options" = []
transformKey _ _ "presort" = []
transformKey _ _ "related" = []
transformKey _ _ "relatedoptions" = []
transformKey _ _ "relatedstring" = []
transformKey _ _ "relatedtype" = []
transformKey _ _ "shorthand" = []
transformKey _ _ "shorthandintro" = []
transformKey _ _ "sortkey" = []
transformKey x y "author"
| x `elem` ["mvbook", "book"] &&
y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
-- note: this next clause is not in the biblatex manual, but it makes
-- sense in the context of CSL conversion:
transformKey x y "author"
| x == "mvbook" && y == "book" = ["bookauthor", "author"]
transformKey "mvbook" y z
| y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
transformKey x y z
| x `elem` ["mvcollection", "mvreference"] &&
y `elem` ["collection", "reference", "incollection", "inreference",
"suppcollection"] = standardTrans z
transformKey "mvproceedings" y z
| y `elem` ["proceedings", "inproceedings"] = standardTrans z
transformKey "book" y z
| y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
transformKey x y z
| x `elem` ["collection", "reference"] &&
y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
transformKey "proceedings" "inproceedings" z = bookTrans z
transformKey "periodical" y z
| y `elem` ["article", "suppperiodical"] =
case z of
"title" -> ["journaltitle"]
"subtitle" -> ["journalsubtitle"]
"shorttitle" -> []
"sorttitle" -> []
"indextitle" -> []
"indexsorttitle" -> []
_ -> [z]
transformKey _ _ x = [x]
standardTrans :: Text -> [Text]
standardTrans z =
case z of
"title" -> ["maintitle"]
"subtitle" -> ["mainsubtitle"]
"titleaddon" -> ["maintitleaddon"]
"shorttitle" -> []
"sorttitle" -> []
"indextitle" -> []
"indexsorttitle" -> []
_ -> [z]
bookTrans :: Text -> [Text]
bookTrans z =
case z of
"title" -> ["booktitle"]
"subtitle" -> ["booksubtitle"]
"titleaddon" -> ["booktitleaddon"]
"shorttitle" -> []
"sorttitle" -> []
"indextitle" -> []
"indexsorttitle" -> []
_ -> [z]
resolveKey :: Lang -> Inlines -> Inlines
resolveKey lang ils = Walk.walk go ils
where go (Str s) = Str $ resolveKey' lang s
go x = x
resolveKey' :: Lang -> Text -> Text
resolveKey' lang@(Lang l _) k =
case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
Nothing -> k
Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
convertEnDash :: Inline -> Inline
convertEnDash (Str s) = Str (T.map (\c -> if c == '' then '-' else c) s)
convertEnDash x = x