Clean up BibTeX parsing.

Previously there was a messy code path that gave strange
results in some cases, not passing through raw tex but
trying to extract a string content.  This was an artefact
of trying to handle some special bibtex-specific commands
in the BibTeX reader. Now we just handle these in the
LaTeX reader and simplify parsing in the BibTeX reader.
This does mean that more raw tex will be passed through
(and currently this is not sensitive to the `raw_tex`
extension; this should be fixed).

Closes #7049.
This commit is contained in:
John MacFarlane 2021-01-26 22:45:57 -08:00
parent 12bc662535
commit 98c2a52b4e
4 changed files with 25 additions and 37 deletions

View file

@ -750,41 +750,10 @@ blocksToInlines bs =
_ -> B.fromList $ Walk.query (:[]) bs
adjustSpans :: Lang -> Inline -> Inline
adjustSpans lang (RawInline (Format "latex") s)
| s == "\\hyphen" || s == "\\hyphen " = Str "-"
| otherwise = parseRawLaTeX lang s
adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s
adjustSpans _ SoftBreak = Space
adjustSpans _ x = x
parseRawLaTeX :: Lang -> Text -> Inline
parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
case parseLaTeX lang contents of
Right [Para ys] -> f command ys
Right [Plain ys] -> f command ys
Right [] -> f command []
_ -> RawInline (Format "latex") t
where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
command = T.strip command'
contents = T.drop 1 $ T.dropEnd 1 contents'
f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils]
f "mkbibemph" ils = Span nullAttr [Emph ils]
f "mkbibitalic" ils = Span nullAttr [Emph ils]
f "mkbibbold" ils = Span nullAttr [Strong ils]
f "mkbibparens" ils = Span nullAttr $
[Str "("] ++ ils ++ [Str ")"]
f "mkbibbrackets" ils = Span nullAttr $
[Str "["] ++ ils ++ [Str "]"]
-- ... both should be nestable & should work in year fields
f "autocap" ils = Span nullAttr ils
-- TODO: should work in year fields
f "textnormal" ils = Span ("",["nodecor"],[]) ils
f "bibstring" [Str s] = Str $ resolveKey' lang s
f "adddot" [] = Str "."
f "adddotspace" [] = Span nullAttr [Str ".", Space]
f "addabbrvspace" [] = Space
f _ ils = Span nullAttr ils
parseRawLaTeX _ t = RawInline (Format "latex") t
latex' :: Text -> Bib [Block]
latex' t = do
lang <- gets localeLang

View file

@ -940,6 +940,24 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("uline", underline <$> tok)
-- plain tex stuff that should just be passed through as raw tex
, ("ifdim", ifdim)
-- bibtex
, ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
, ("mkbibemph", spanWith nullAttr . emph <$> tok)
, ("mkbibitalic", spanWith nullAttr . emph <$> tok)
, ("mkbibbold", spanWith nullAttr . strong <$> tok)
, ("mkbibparens",
spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
, ("mkbibbrackets",
spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
, ("autocap", spanWith nullAttr <$> tok)
, ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
, ("bibstring",
(\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
<$> braced)
, ("adddot", pure (str "."))
, ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
, ("addabbrvspace", pure space)
, ("hyphen", pure (str "-"))
]
accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines

View file

@ -43,9 +43,9 @@ Cotton, F. A., Wilkinson, G., Murillio, C. A., & Bochmann, M. (1999).
---
nocite: "[@*]"
references:
- annote: A book entry with author authors and an edition field. By
default, long author and editor lists are automatically truncated.
This is configurable
- annote: A book entry with `\arabic{author}`{=latex} authors and an
edition field. By default, long author and editor lists are
automatically truncated. This is configurable
author:
- family: Cotton
given: Frank Albert

View file

@ -55,8 +55,9 @@ properties as a function of core size. *Langmuir*, *14*(1), 1730.
---
nocite: "[@*]"
references:
- annote: An article entry with author authors. By default, long author
and editor lists are automatically truncated. This is configurable
- annote: An article entry with `\arabic{author}`{=latex} authors. By
default, long author and editor lists are automatically truncated.
This is configurable
author:
- family: Hostetler
given: Michael J.