JATS reader: improve handling of fn-group elements (#7914)

Footnotes in `<fn-group>` elements are collected and re-inserted into
the document as proper footnotes in the place where they are referenced.

Fixes: #6348
This commit is contained in:
Albert Krewinkel 2022-02-13 02:39:02 +01:00 committed by GitHub
parent 4b1cddd292
commit e1b7f3a63d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 2 deletions

View file

@ -30,8 +30,9 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Shared (safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML.Light
import Text.TeXMath (readMathML, writeTeX)
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
@ -43,6 +44,7 @@ data JATSState = JATSState{ jatsSectionLevel :: Int
, jatsQuoteType :: QuoteType
, jatsMeta :: Meta
, jatsBook :: Bool
, jatsFootnotes :: Map.Map Text Blocks
, jatsContent :: [Content]
} deriving Show
@ -51,6 +53,7 @@ instance Default JATSState where
, jatsQuoteType = DoubleQuote
, jatsMeta = mempty
, jatsBook = False
, jatsFootnotes = mempty
, jatsContent = [] }
@ -63,7 +66,17 @@ readJATS _ inp = do
tree <- either (throwError . PandocXMLError "") return $
parseXMLContents (TL.fromStrict . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
let footnotes = jatsFootnotes st'
let blockList = toList $ mconcat bs
let linkToFootnotes :: Inline -> Inline
linkToFootnotes link'@(Link _attr _txt (href, _title)) =
case T.uncons href of
Just ('#', rid) -> case Map.lookup rid footnotes of
Just footnote -> Note (toList footnote)
Nothing -> link'
_ -> link'
linkToFootnotes inline = inline
return $ Pandoc (jatsMeta st') (walk linkToFootnotes blockList)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: Text -> Element -> Text
@ -175,6 +188,7 @@ parseBlock (Elem e) =
"table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], [])
<$> getBlocks e
"caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6
"fn-group" -> parseFootnoteGroup
"ref-list" -> parseRefList e
"?xml" -> return mempty
_ -> getBlocks e
@ -239,6 +253,13 @@ parseBlock (Elem e) =
(attrValue "title" g)
_ -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
parseFootnoteGroup = do
forM_ (filterChildren (named "fn") e) $ \fn -> do
let id' = attrValue "id" fn
contents <- getBlocks fn
modify $ \st ->
st { jatsFootnotes = Map.insert id' contents (jatsFootnotes st) }
return mempty
parseTable = do
let isCaption x = named "title" x || named "caption" x

42
test/command/6348.md Normal file
View file

@ -0,0 +1,42 @@
```
% pandoc -f jats -t native
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "http://jats.nlm.nih.gov/publishing/1.0/JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.0" article-type="research-article">
<front>
<journal-meta>
<journal-title-group>
<journal-title>Opinion Research</journal-title>
</journal-title-group>
</journal-meta>
<article-meta>
<title-group>
<article-title>Example article</article-title>
</title-group>
</article-meta>
</front>
<body>
<sec sec-type="results">
<title>Results</title>
<p>A University <xref ref-type="fn" rid="N0001">1</xref></p>
</sec>
</body>
<back>
<fn-group>
<fn id="N0001">
<p>footnote</p>
</fn>
</fn-group>
</back>
</article>
^D
[ Header 1 ( "" , [] , [] ) [ Str "Results" ]
, Para
[ Str "A"
, Space
, Str "University"
, Space
, Note [ Para [ Str "footnote" ] ]
]
]
```