From e87b54dcad5e37133bc0f4cfc8039e9fd0dd1b4e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 16 May 2019 21:39:03 -0700
Subject: [PATCH] JATS writer: properly handle footnotes.

"Best Practice: When footnotes are grouped at the end of an article,
wrap them in a `<fn-group>` and use an `<xref>` element in the text, as
usual, to tie each footnote in the list to a particular location in the
text."

Closes #5511.
---
 src/Text/Pandoc/Writers/JATS.hs | 31 +++++++++++++-----
 test/Tests/Writers/JATS.hs      |  4 +--
 test/writer.jats                | 56 +++++++++++++++++++--------------
 3 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index e2c4e1e72..25ef3b223 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -16,6 +16,7 @@ https://jats.nlm.nih.gov/publishing/tag-library
 module Text.Pandoc.Writers.JATS ( writeJATS ) where
 import Prelude
 import Control.Monad.Reader
+import Control.Monad.State
 import Data.Char (toLower)
 import Data.Generics (everywhere, mkT)
 import Data.List (isSuffixOf, partition, isPrefixOf)
@@ -39,11 +40,16 @@ import qualified Text.XML.Light as Xml
 data JATSVersion = JATS1_1
      deriving (Eq, Show)
 
-type JATS = ReaderT JATSVersion
+data JATSState = JATSState
+  { jatsNotes :: [(Int, Doc)] }
+
+type JATS a = StateT JATSState (ReaderT JATSVersion a)
 
 writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeJATS opts d =
-  runReaderT (docToJATS opts d) JATS1_1
+  runReaderT (evalStateT (docToJATS opts d)
+     (JATSState{ jatsNotes = [] }))
+     JATS1_1
 
 -- | Convert Pandoc document to string in JATS format.
 docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text
@@ -52,7 +58,7 @@ docToJATS opts (Pandoc meta blocks) = do
       isBackBlock _                    = False
   let (backblocks, bodyblocks) = partition isBackBlock blocks
   let elements = hierarchicalize bodyblocks
-  let backElements = hierarchicalize backblocks
+  let backElements = hierarchicalize $ backblocks
   let colwidth = if writerWrapText opts == WrapAuto
                     then Just $ writerColumns opts
                     else Nothing
@@ -77,8 +83,10 @@ docToJATS opts (Pandoc meta blocks) = do
                  meta
   main <- (render' . vcat) <$>
             mapM (elementToJATS opts' startLvl) elements
-  back <- (render' . vcat) <$>
-            mapM (elementToJATS opts' startLvl) backElements
+  notes <- reverse . map snd <$> gets jatsNotes
+  backs <- mapM (elementToJATS opts' startLvl) backElements
+  let fns = inTagsIndented "fn-group" $ vcat notes
+  let back = render' $ vcat backs $$ fns
   let context = defField "body" main
               $ defField "back" back
               $ defField "mathml" (case writerHTMLMathMethod opts of
@@ -368,9 +376,18 @@ inlineToJATS _ Space = return space
 inlineToJATS opts SoftBreak
   | writerWrapText opts == WrapPreserve = return cr
   | otherwise = return space
-inlineToJATS opts (Note contents) =
+inlineToJATS opts (Note contents) = do
   -- TODO technically only <p> tags are allowed inside
-  inTagsIndented "fn" <$> blocksToJATS opts contents
+  notes <- gets jatsNotes
+  let notenum = case notes of
+                  (n, _):_ -> n + 1
+                  []       -> 1
+  thenote <- inTags True "fn" [("id","fn" ++ show notenum)]
+                <$> blocksToJATS opts contents
+  modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes }
+  return $ inTags False "xref" [("ref-type", "fn"),
+                                ("rid", "fn" ++ show notenum)]
+         $ text (show notenum)
 inlineToJATS opts (Cite _ lst) =
   -- TODO revisit this after examining the jats.csl pipeline
   inlinesToJATS opts lst
diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs
index 669220eea..6de058701 100644
--- a/test/Tests/Writers/JATS.hs
+++ b/test/Tests/Writers/JATS.hs
@@ -86,9 +86,7 @@ tests = [ testGroup "inline code"
             headerWith ("foo",["unnumbered"],[]) 1
               (text "Header 1" <> note (plain $ text "note")) =?>
             "<sec id=\"foo\">\n\
-            \  <title>Header 1<fn>\n\
-            \    <p>note</p>\n\
-            \  </fn></title>\n\
+            \  <title>Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref></title>\n\
             \</sec>"
           , "unnumbered sub header" =:
             headerWith ("foo",["unnumbered"],[]) 1
diff --git a/test/writer.jats b/test/writer.jats
index a4d604607..99cb3230b 100644
--- a/test/writer.jats
+++ b/test/writer.jats
@@ -844,38 +844,48 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
 </sec>
 <sec id="footnotes">
   <title>Footnotes</title>
-  <p>Here is a footnote reference,<fn>
-    <p>Here is the footnote. It can go anywhere after the footnote reference.
-    It need not be placed at the end of the document.</p>
-  </fn> and another.<fn>
-    <p>Here’s the long note. This one contains multiple blocks.</p>
-    <p>Subsequent blocks are indented to show that they belong to the footnote
-    (as with list items).</p>
-    <preformat>  { &lt;code&gt; }</preformat>
-    <p>If you want, you can indent every line, but you can also be lazy and
-    just indent the first line of each block.</p>
-  </fn> This should <italic>not</italic> be a footnote reference, because it
-  contains a space.[^my note] Here is an inline note.<fn>
-    <p>This is <italic>easier</italic> to type. Inline notes may contain
-    <ext-link ext-link-type="uri" xlink:href="http://google.com">links</ext-link>
-    and <monospace>]</monospace> verbatim characters, as well as [bracketed
-    text].</p>
-  </fn></p>
+  <p>Here is a footnote reference,<xref ref-type="fn" rid="fn1">1</xref> and
+  another.<xref ref-type="fn" rid="fn2">2</xref> This should
+  <italic>not</italic> be a footnote reference, because it contains a
+  space.[^my note] Here is an inline
+  note.<xref ref-type="fn" rid="fn3">3</xref></p>
   <disp-quote>
-    <p>Notes can go in quotes.<fn>
-      <p>In quote.</p>
-    </fn></p>
+    <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
   </disp-quote>
   <list list-type="order">
     <list-item>
-      <p>And in list items.<fn>
-        <p>In list.</p>
-      </fn></p>
+      <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>
     </list-item>
   </list>
   <p>This paragraph should not be part of the note, as it is not indented.</p>
 </sec>
 </body>
 <back>
+<fn-group>
+  <fn id="fn1">
+    <p>Here is the footnote. It can go anywhere after the footnote reference.
+    It need not be placed at the end of the document.</p>
+  </fn>
+  <fn id="fn2">
+    <p>Here’s the long note. This one contains multiple blocks.</p>
+    <p>Subsequent blocks are indented to show that they belong to the footnote
+    (as with list items).</p>
+    <preformat>  { &lt;code&gt; }</preformat>
+    <p>If you want, you can indent every line, but you can also be lazy and
+    just indent the first line of each block.</p>
+  </fn>
+  <fn id="fn3">
+    <p>This is <italic>easier</italic> to type. Inline notes may contain
+    <ext-link ext-link-type="uri" xlink:href="http://google.com">links</ext-link>
+    and <monospace>]</monospace> verbatim characters, as well as [bracketed
+    text].</p>
+  </fn>
+  <fn id="fn4">
+    <p>In quote.</p>
+  </fn>
+  <fn id="fn5">
+    <p>In list.</p>
+  </fn>
+</fn-group>
 </back>
 </article>