From e1b7f3a63d521433d8a83e4e1f4f7aa1935d0bdd Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 13 Feb 2022 02:39:02 +0100 Subject: [PATCH] JATS reader: improve handling of fn-group elements (#7914) Footnotes in `<fn-group>` elements are collected and re-inserted into the document as proper footnotes in the place where they are referenced. Fixes: #6348 --- src/Text/Pandoc/Readers/JATS.hs | 25 ++++++++++++++++++-- test/command/6348.md | 42 +++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 test/command/6348.md diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 37e0d13bc..03ed4a659 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -30,8 +30,9 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, extractSpaces) -import Text.TeXMath (readMathML, writeTeX) +import Text.Pandoc.Walk (walk) import Text.Pandoc.XML.Light +import Text.TeXMath (readMathML, writeTeX) import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) import Text.Pandoc.Sources (ToSources(..), sourcesToText) @@ -43,6 +44,7 @@ data JATSState = JATSState{ jatsSectionLevel :: Int , jatsQuoteType :: QuoteType , jatsMeta :: Meta , jatsBook :: Bool + , jatsFootnotes :: Map.Map Text Blocks , jatsContent :: [Content] } deriving Show @@ -51,6 +53,7 @@ instance Default JATSState where , jatsQuoteType = DoubleQuote , jatsMeta = mempty , jatsBook = False + , jatsFootnotes = mempty , jatsContent = [] } @@ -63,7 +66,17 @@ readJATS _ inp = do tree <- either (throwError . PandocXMLError "") return $ parseXMLContents (TL.fromStrict . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree - return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) + let footnotes = jatsFootnotes st' + let blockList = toList $ mconcat bs + let linkToFootnotes :: Inline -> Inline + linkToFootnotes link'@(Link _attr _txt (href, _title)) = + case T.uncons href of + Just ('#', rid) -> case Map.lookup rid footnotes of + Just footnote -> Note (toList footnote) + Nothing -> link' + _ -> link' + linkToFootnotes inline = inline + return $ Pandoc (jatsMeta st') (walk linkToFootnotes blockList) -- convenience function to get an attribute value, defaulting to "" attrValue :: Text -> Element -> Text @@ -175,6 +188,7 @@ parseBlock (Elem e) = "table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e "caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6 + "fn-group" -> parseFootnoteGroup "ref-list" -> parseRefList e "?xml" -> return mempty _ -> getBlocks e @@ -239,6 +253,13 @@ parseBlock (Elem e) = (attrValue "title" g) _ -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e + parseFootnoteGroup = do + forM_ (filterChildren (named "fn") e) $ \fn -> do + let id' = attrValue "id" fn + contents <- getBlocks fn + modify $ \st -> + st { jatsFootnotes = Map.insert id' contents (jatsFootnotes st) } + return mempty parseTable = do let isCaption x = named "title" x || named "caption" x diff --git a/test/command/6348.md b/test/command/6348.md new file mode 100644 index 000000000..7f599e52e --- /dev/null +++ b/test/command/6348.md @@ -0,0 +1,42 @@ +``` +% pandoc -f jats -t native +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "http://jats.nlm.nih.gov/publishing/1.0/JATS-journalpublishing1.dtd"> +<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.0" article-type="research-article"> + <front> + <journal-meta> + <journal-title-group> + <journal-title>Opinion Research</journal-title> + </journal-title-group> + </journal-meta> + <article-meta> + <title-group> + <article-title>Example article</article-title> + </title-group> + </article-meta> + </front> + <body> + <sec sec-type="results"> + <title>Results</title> + <p>A University <xref ref-type="fn" rid="N0001">1</xref></p> + </sec> + </body> + <back> + <fn-group> + <fn id="N0001"> + <p>footnote</p> + </fn> + </fn-group> + </back> +</article> +^D +[ Header 1 ( "" , [] , [] ) [ Str "Results" ] +, Para + [ Str "A" + , Space + , Str "University" + , Space + , Note [ Para [ Str "footnote" ] ] + ] +] +```