JATS writer: reduce unnecessary use of <p> elements for wrapping

The `<p>` element is used for wrapping in cases were the contents would
otherwise not be allowed in a certain context. Unnecessary wrapping is
avoided, especially around quotes (`<disp-quote>` elements).

Closes: 
This commit is contained in:
Albert Krewinkel 2021-04-16 22:13:29 +02:00
parent 2e7fee9c3c
commit 5f79a66ed6
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
7 changed files with 168 additions and 146 deletions

View file

@ -1,6 +1,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ViewPatterns #-}
{- | {- |
Module : Text.Pandoc.Writers.JATS Module : Text.Pandoc.Writers.JATS
Copyright : 2017-2021 John MacFarlane Copyright : 2017-2021 John MacFarlane
@ -80,7 +81,7 @@ writeJats tagSet opts d = do
let environment = JATSEnv let environment = JATSEnv
{ jatsTagSet = tagSet { jatsTagSet = tagSet
, jatsInlinesWriter = inlinesToJATS , jatsInlinesWriter = inlinesToJATS
, jatsBlockWriter = blockToJATS , jatsBlockWriter = wrappedBlocksToJATS
, jatsReferences = refs , jatsReferences = refs
} }
let initialState = JATSState { jatsNotes = [] } let initialState = JATSState { jatsNotes = [] }
@ -162,11 +163,9 @@ wrappedBlocksToJATS needsWrap opts =
wrappedBlockToJATS b = do wrappedBlockToJATS b = do
inner <- blockToJATS opts b inner <- blockToJATS opts b
return $ return $
if needsWrap b || isBlockQuote b -- see #7041 if needsWrap b
then inTags True "p" [("specific-use","wrapper")] inner then inTags True "p" [("specific-use","wrapper")] inner
else inner else inner
isBlockQuote (BlockQuote _) = True
isBlockQuote _ = False
-- | Auxiliary function to convert Plain block to Para. -- | Auxiliary function to convert Plain block to Para.
plainToPara :: Block -> Block plainToPara :: Block -> Block
@ -324,10 +323,13 @@ blockToJATS opts (LineBlock lns) =
blockToJATS opts $ linesToPara lns blockToJATS opts $ linesToPara lns
blockToJATS opts (BlockQuote blocks) = do blockToJATS opts (BlockQuote blocks) = do
tagSet <- asks jatsTagSet tagSet <- asks jatsTagSet
let blocksToJats' = if tagSet == TagSetArticleAuthoring let needsWrap = if tagSet == TagSetArticleAuthoring
then wrappedBlocksToJATS (not . isPara) then not . isPara
else blocksToJATS else \case
inTagsIndented "disp-quote" <$> blocksToJats' opts blocks Header{} -> True
HorizontalRule -> True
_ -> False
inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks
blockToJATS _ (CodeBlock a str) = return $ blockToJATS _ (CodeBlock a str) = return $
inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str)))
where (lang, attr) = codeAttr a where (lang, attr) = codeAttr a

View file

@ -34,13 +34,19 @@ tableToJATS :: PandocMonad m
-> JATS m (Doc Text) -> JATS m (Doc Text)
tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
let (Caption _maybeShortCaption captionBlocks) = caption let (Caption _maybeShortCaption captionBlocks) = caption
-- Only paragraphs are allowed in captions, all other blocks must be
-- wrapped in @<p>@ elements.
let needsWrapping = \case
Plain{} -> False
Para{} -> False
_ -> True
tbl <- captionlessTable opts attr colspecs thead tbodies tfoot tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
captionDoc <- if null captionBlocks captionDoc <- if null captionBlocks
then return empty then return empty
else do else do
blockToJATS <- asks jatsBlockWriter blockToJATS <- asks jatsBlockWriter
inTagsIndented "caption" . vcat <$> inTagsIndented "caption" <$>
mapM (blockToJATS opts) captionBlocks blockToJATS needsWrapping opts captionBlocks
return $ inTags True "table-wrap" [] $ captionDoc $$ tbl return $ inTags True "table-wrap" [] $ captionDoc $$ tbl
captionlessTable :: PandocMonad m captionlessTable :: PandocMonad m
@ -230,7 +236,7 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
inlinesToJats <- asks jatsInlinesWriter inlinesToJats <- asks jatsInlinesWriter
let cellContents = \case let cellContents = \case
[Plain inlines] -> inlinesToJats opts inlines [Plain inlines] -> inlinesToJats opts inlines
blocks -> vcat <$> mapM (blockToJats opts) blocks blocks -> blockToJats needsWrapInCell opts blocks
let tag' = case ctype of let tag' = case ctype of
BodyCell -> "td" BodyCell -> "td"
HeaderCell -> "th" HeaderCell -> "th"
@ -246,3 +252,17 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
. maybeCons (colspanAttrib colspan) . maybeCons (colspanAttrib colspan)
$ toAttribs attr validAttribs $ toAttribs attr validAttribs
inTags False tag' attribs <$> cellContents item inTags False tag' attribs <$> cellContents item
-- | Whether the JATS produced from this block should be wrapped in a
-- @<p>@ element when put directly below a @<td>@ element.
needsWrapInCell :: Block -> Bool
needsWrapInCell = \case
Plain{} -> False -- should be unwrapped anyway
Para{} -> False
BulletList{} -> False
OrderedList{} -> False
DefinitionList{} -> False
HorizontalRule -> False
CodeBlock{} -> False
RawBlock{} -> False -- responsibility of the user
_ -> True

View file

@ -37,11 +37,20 @@ newtype JATSState = JATSState
{ jatsNotes :: [(Int, Doc Text)] { jatsNotes :: [(Int, Doc Text)]
} }
-- | Environment containing all information relevant for rendering.
data JATSEnv m = JATSEnv data JATSEnv m = JATSEnv
{ jatsTagSet :: JATSTagSet { jatsTagSet :: JATSTagSet -- ^ The tag set that's being ouput
, jatsBlockWriter :: (Block -> Bool)
-> WriterOptions -> [Block] -> JATS m (Doc Text)
-- ^ Converts a block list to JATS, wrapping top-level blocks into a
-- @<p>@ element if the property evaluates to @True@.
-- See #7227.
, jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text) , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text)
, jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text) -- ^ Converts an inline list to JATS.
, jatsReferences :: [Reference Inlines]
, jatsReferences :: [Reference Inlines] -- ^ List of references
} }
-- | JATS writer type -- | JATS writer type

23
test/command/7041.md Normal file
View file

@ -0,0 +1,23 @@
```
% pandoc -f html -t jats
<table>
<tr><td><blockquote>Fly, you fools!</blockquote></td></tr>
</table>
^D
<table-wrap>
<table>
<colgroup>
<col width="100%" />
</colgroup>
<tbody>
<tr>
<td><p specific-use="wrapper">
<disp-quote>
<p>Fly, you fools!</p>
</disp-quote>
</p></td>
</tr>
</tbody>
</table>
</table-wrap>
```

View file

@ -78,39 +78,31 @@ Grubers markdown test suite.</p>
<sec id="block-quotes"> <sec id="block-quotes">
<title>Block Quotes</title> <title>Block Quotes</title>
<p>E-mail style:</p> <p>E-mail style:</p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>This is a block quote. It is pretty short.</p>
<p>This is a block quote. It is pretty short.</p> </disp-quote>
</disp-quote> <disp-quote>
</p> <p>Code in a block quote:</p>
<p specific-use="wrapper"> <preformat>sub status {
<disp-quote>
<p>Code in a block quote:</p>
<preformat>sub status {
print &quot;working&quot;; print &quot;working&quot;;
}</preformat> }</preformat>
<p>A list:</p> <p>A list:</p>
<list list-type="order"> <list list-type="order">
<list-item> <list-item>
<p>item one</p> <p>item one</p>
</list-item> </list-item>
<list-item> <list-item>
<p>item two</p> <p>item two</p>
</list-item> </list-item>
</list> </list>
<p>Nested block quotes:</p> <p>Nested block quotes:</p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>nested</p>
<p>nested</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
</disp-quote> </disp-quote>
</p> <disp-quote>
<p>nested</p>
</disp-quote>
</disp-quote>
<p>This should not be a block quote: 2 &gt; 1.</p> <p>This should not be a block quote: 2 &gt; 1.</p>
<p>And a following paragraph.</p> <p>And a following paragraph.</p>
</sec> </sec>
@ -837,12 +829,10 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</list-item> </list-item>
</list> </list>
<p>An e-mail address: <email>nobody@nowhere.net</email></p> <p>An e-mail address: <email>nobody@nowhere.net</email></p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>Blockquoted:
<p>Blockquoted: <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> </disp-quote>
</disp-quote>
</p>
<p>Auto-links should not occur here: <p>Auto-links should not occur here:
<monospace>&lt;http://example.com/&gt;</monospace></p> <monospace>&lt;http://example.com/&gt;</monospace></p>
<preformat>or here: &lt;http://example.com/&gt;</preformat> <preformat>or here: &lt;http://example.com/&gt;</preformat>
@ -866,11 +856,9 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
<italic>not</italic> be a footnote reference, because it contains a <italic>not</italic> be a footnote reference, because it contains a
space.[^my note] Here is an inline space.[^my note] Here is an inline
note.<xref ref-type="fn" rid="fn3">3</xref></p> note.<xref ref-type="fn" rid="fn3">3</xref></p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
<p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> </disp-quote>
</disp-quote>
</p>
<list list-type="order"> <list list-type="order">
<list-item> <list-item>
<p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p> <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>

View file

@ -67,43 +67,39 @@ Grubers markdown test suite.</p>
<sec id="block-quotes"> <sec id="block-quotes">
<title>Block Quotes</title> <title>Block Quotes</title>
<p>E-mail style:</p> <p>E-mail style:</p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>This is a block quote. It is pretty short.</p>
<p>This is a block quote. It is pretty short.</p> </disp-quote>
</disp-quote> <disp-quote>
</p> <p>Code in a block quote:</p>
<p specific-use="wrapper"> <p specific-use="wrapper">
<disp-quote> <preformat>sub status {
<p>Code in a block quote:</p>
<p specific-use="wrapper">
<preformat>sub status {
print &quot;working&quot;; print &quot;working&quot;;
}</preformat> }</preformat>
</p> </p>
<p>A list:</p> <p>A list:</p>
<p specific-use="wrapper"> <p specific-use="wrapper">
<list list-type="order"> <list list-type="order">
<list-item> <list-item>
<p>item one</p> <p>item one</p>
</list-item> </list-item>
<list-item> <list-item>
<p>item two</p> <p>item two</p>
</list-item> </list-item>
</list> </list>
</p> </p>
<p>Nested block quotes:</p> <p>Nested block quotes:</p>
<p specific-use="wrapper"> <p specific-use="wrapper">
<disp-quote> <disp-quote>
<p>nested</p> <p>nested</p>
</disp-quote> </disp-quote>
</p> </p>
<p specific-use="wrapper"> <p specific-use="wrapper">
<disp-quote> <disp-quote>
<p>nested</p> <p>nested</p>
</disp-quote> </disp-quote>
</p> </p>
</disp-quote> </disp-quote>
</p>
<p>This should not be a block quote: 2 &gt; 1.</p> <p>This should not be a block quote: 2 &gt; 1.</p>
<p>And a following paragraph.</p> <p>And a following paragraph.</p>
</sec> </sec>
@ -817,12 +813,10 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</list-item> </list-item>
</list> </list>
<p>An e-mail address: <email>nobody@nowhere.net</email></p> <p>An e-mail address: <email>nobody@nowhere.net</email></p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>Blockquoted:
<p>Blockquoted: <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> </disp-quote>
</disp-quote>
</p>
<p>Auto-links should not occur here: <p>Auto-links should not occur here:
<monospace>&lt;http://example.com/&gt;</monospace></p> <monospace>&lt;http://example.com/&gt;</monospace></p>
<preformat>or here: &lt;http://example.com/&gt;</preformat> <preformat>or here: &lt;http://example.com/&gt;</preformat>
@ -860,13 +854,11 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
and <monospace>]</monospace> verbatim characters, as well as [bracketed and <monospace>]</monospace> verbatim characters, as well as [bracketed
text].</p> text].</p>
</fn></p> </fn></p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>Notes can go in quotes.<fn>
<p>Notes can go in quotes.<fn> <p>In quote.</p>
<p>In quote.</p> </fn></p>
</fn></p> </disp-quote>
</disp-quote>
</p>
<list list-type="order"> <list list-type="order">
<list-item> <list-item>
<p>And in list items.<fn> <p>And in list items.<fn>

View file

@ -78,39 +78,31 @@ Grubers markdown test suite.</p>
<sec id="block-quotes"> <sec id="block-quotes">
<title>Block Quotes</title> <title>Block Quotes</title>
<p>E-mail style:</p> <p>E-mail style:</p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>This is a block quote. It is pretty short.</p>
<p>This is a block quote. It is pretty short.</p> </disp-quote>
</disp-quote> <disp-quote>
</p> <p>Code in a block quote:</p>
<p specific-use="wrapper"> <preformat>sub status {
<disp-quote>
<p>Code in a block quote:</p>
<preformat>sub status {
print &quot;working&quot;; print &quot;working&quot;;
}</preformat> }</preformat>
<p>A list:</p> <p>A list:</p>
<list list-type="order"> <list list-type="order">
<list-item> <list-item>
<p>item one</p> <p>item one</p>
</list-item> </list-item>
<list-item> <list-item>
<p>item two</p> <p>item two</p>
</list-item> </list-item>
</list> </list>
<p>Nested block quotes:</p> <p>Nested block quotes:</p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>nested</p>
<p>nested</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
</disp-quote> </disp-quote>
</p> <disp-quote>
<p>nested</p>
</disp-quote>
</disp-quote>
<p>This should not be a block quote: 2 &gt; 1.</p> <p>This should not be a block quote: 2 &gt; 1.</p>
<p>And a following paragraph.</p> <p>And a following paragraph.</p>
</sec> </sec>
@ -837,12 +829,10 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</list-item> </list-item>
</list> </list>
<p>An e-mail address: <email>nobody@nowhere.net</email></p> <p>An e-mail address: <email>nobody@nowhere.net</email></p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>Blockquoted:
<p>Blockquoted: <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> </disp-quote>
</disp-quote>
</p>
<p>Auto-links should not occur here: <p>Auto-links should not occur here:
<monospace>&lt;http://example.com/&gt;</monospace></p> <monospace>&lt;http://example.com/&gt;</monospace></p>
<preformat>or here: &lt;http://example.com/&gt;</preformat> <preformat>or here: &lt;http://example.com/&gt;</preformat>
@ -866,11 +856,9 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
<italic>not</italic> be a footnote reference, because it contains a <italic>not</italic> be a footnote reference, because it contains a
space.[^my note] Here is an inline space.[^my note] Here is an inline
note.<xref ref-type="fn" rid="fn3">3</xref></p> note.<xref ref-type="fn" rid="fn3">3</xref></p>
<p specific-use="wrapper"> <disp-quote>
<disp-quote> <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
<p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> </disp-quote>
</disp-quote>
</p>
<list list-type="order"> <list list-type="order">
<list-item> <list-item>
<p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p> <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>