JATS writer: reduce unnecessary use of <p> elements for wrapping

The `<p>` element is used for wrapping in cases were the contents would
otherwise not be allowed in a certain context. Unnecessary wrapping is
avoided, especially around quotes (`<disp-quote>` elements).

Closes: #7227
This commit is contained in:
Albert Krewinkel 2021-04-16 22:13:29 +02:00
parent 2e7fee9c3c
commit 5f79a66ed6
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
7 changed files with 168 additions and 146 deletions

View file

@ -1,6 +1,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Writers.JATS
Copyright : 2017-2021 John MacFarlane
@ -80,7 +81,7 @@ writeJats tagSet opts d = do
let environment = JATSEnv
{ jatsTagSet = tagSet
, jatsInlinesWriter = inlinesToJATS
, jatsBlockWriter = blockToJATS
, jatsBlockWriter = wrappedBlocksToJATS
, jatsReferences = refs
}
let initialState = JATSState { jatsNotes = [] }
@ -162,11 +163,9 @@ wrappedBlocksToJATS needsWrap opts =
wrappedBlockToJATS b = do
inner <- blockToJATS opts b
return $
if needsWrap b || isBlockQuote b -- see #7041
if needsWrap b
then inTags True "p" [("specific-use","wrapper")] inner
else inner
isBlockQuote (BlockQuote _) = True
isBlockQuote _ = False
-- | Auxiliary function to convert Plain block to Para.
plainToPara :: Block -> Block
@ -324,10 +323,13 @@ blockToJATS opts (LineBlock lns) =
blockToJATS opts $ linesToPara lns
blockToJATS opts (BlockQuote blocks) = do
tagSet <- asks jatsTagSet
let blocksToJats' = if tagSet == TagSetArticleAuthoring
then wrappedBlocksToJATS (not . isPara)
else blocksToJATS
inTagsIndented "disp-quote" <$> blocksToJats' opts blocks
let needsWrap = if tagSet == TagSetArticleAuthoring
then not . isPara
else \case
Header{} -> True
HorizontalRule -> True
_ -> False
inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks
blockToJATS _ (CodeBlock a str) = return $
inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str)))
where (lang, attr) = codeAttr a

View file

@ -34,13 +34,19 @@ tableToJATS :: PandocMonad m
-> JATS m (Doc Text)
tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
let (Caption _maybeShortCaption captionBlocks) = caption
-- Only paragraphs are allowed in captions, all other blocks must be
-- wrapped in @<p>@ elements.
let needsWrapping = \case
Plain{} -> False
Para{} -> False
_ -> True
tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
captionDoc <- if null captionBlocks
then return empty
else do
blockToJATS <- asks jatsBlockWriter
inTagsIndented "caption" . vcat <$>
mapM (blockToJATS opts) captionBlocks
inTagsIndented "caption" <$>
blockToJATS needsWrapping opts captionBlocks
return $ inTags True "table-wrap" [] $ captionDoc $$ tbl
captionlessTable :: PandocMonad m
@ -230,7 +236,7 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
inlinesToJats <- asks jatsInlinesWriter
let cellContents = \case
[Plain inlines] -> inlinesToJats opts inlines
blocks -> vcat <$> mapM (blockToJats opts) blocks
blocks -> blockToJats needsWrapInCell opts blocks
let tag' = case ctype of
BodyCell -> "td"
HeaderCell -> "th"
@ -246,3 +252,17 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
. maybeCons (colspanAttrib colspan)
$ toAttribs attr validAttribs
inTags False tag' attribs <$> cellContents item
-- | Whether the JATS produced from this block should be wrapped in a
-- @<p>@ element when put directly below a @<td>@ element.
needsWrapInCell :: Block -> Bool
needsWrapInCell = \case
Plain{} -> False -- should be unwrapped anyway
Para{} -> False
BulletList{} -> False
OrderedList{} -> False
DefinitionList{} -> False
HorizontalRule -> False
CodeBlock{} -> False
RawBlock{} -> False -- responsibility of the user
_ -> True

View file

@ -37,11 +37,20 @@ newtype JATSState = JATSState
{ jatsNotes :: [(Int, Doc Text)]
}
-- | Environment containing all information relevant for rendering.
data JATSEnv m = JATSEnv
{ jatsTagSet :: JATSTagSet
{ jatsTagSet :: JATSTagSet -- ^ The tag set that's being ouput
, jatsBlockWriter :: (Block -> Bool)
-> WriterOptions -> [Block] -> JATS m (Doc Text)
-- ^ Converts a block list to JATS, wrapping top-level blocks into a
-- @<p>@ element if the property evaluates to @True@.
-- See #7227.
, jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text)
, jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text)
, jatsReferences :: [Reference Inlines]
-- ^ Converts an inline list to JATS.
, jatsReferences :: [Reference Inlines] -- ^ List of references
}
-- | JATS writer type

23
test/command/7041.md Normal file
View file

@ -0,0 +1,23 @@
```
% pandoc -f html -t jats
<table>
<tr><td><blockquote>Fly, you fools!</blockquote></td></tr>
</table>
^D
<table-wrap>
<table>
<colgroup>
<col width="100%" />
</colgroup>
<tbody>
<tr>
<td><p specific-use="wrapper">
<disp-quote>
<p>Fly, you fools!</p>
</disp-quote>
</p></td>
</tr>
</tbody>
</table>
</table-wrap>
```

View file

@ -78,39 +78,31 @@ Grubers markdown test suite.</p>
<sec id="block-quotes">
<title>Block Quotes</title>
<p>E-mail style:</p>
<p specific-use="wrapper">
<disp-quote>
<p>This is a block quote. It is pretty short.</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>Code in a block quote:</p>
<preformat>sub status {
<disp-quote>
<p>This is a block quote. It is pretty short.</p>
</disp-quote>
<disp-quote>
<p>Code in a block quote:</p>
<preformat>sub status {
print &quot;working&quot;;
}</preformat>
<p>A list:</p>
<list list-type="order">
<list-item>
<p>item one</p>
</list-item>
<list-item>
<p>item two</p>
</list-item>
</list>
<p>Nested block quotes:</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<p>A list:</p>
<list list-type="order">
<list-item>
<p>item one</p>
</list-item>
<list-item>
<p>item two</p>
</list-item>
</list>
<p>Nested block quotes:</p>
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<disp-quote>
<p>nested</p>
</disp-quote>
</disp-quote>
<p>This should not be a block quote: 2 &gt; 1.</p>
<p>And a following paragraph.</p>
</sec>
@ -837,12 +829,10 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</list-item>
</list>
<p>An e-mail address: <email>nobody@nowhere.net</email></p>
<p specific-use="wrapper">
<disp-quote>
<p>Blockquoted:
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
</disp-quote>
</p>
<disp-quote>
<p>Blockquoted:
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
</disp-quote>
<p>Auto-links should not occur here:
<monospace>&lt;http://example.com/&gt;</monospace></p>
<preformat>or here: &lt;http://example.com/&gt;</preformat>
@ -866,11 +856,9 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
<italic>not</italic> be a footnote reference, because it contains a
space.[^my note] Here is an inline
note.<xref ref-type="fn" rid="fn3">3</xref></p>
<p specific-use="wrapper">
<disp-quote>
<p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
</disp-quote>
</p>
<disp-quote>
<p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
</disp-quote>
<list list-type="order">
<list-item>
<p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>

View file

@ -67,43 +67,39 @@ Grubers markdown test suite.</p>
<sec id="block-quotes">
<title>Block Quotes</title>
<p>E-mail style:</p>
<p specific-use="wrapper">
<disp-quote>
<p>This is a block quote. It is pretty short.</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>Code in a block quote:</p>
<p specific-use="wrapper">
<preformat>sub status {
<disp-quote>
<p>This is a block quote. It is pretty short.</p>
</disp-quote>
<disp-quote>
<p>Code in a block quote:</p>
<p specific-use="wrapper">
<preformat>sub status {
print &quot;working&quot;;
}</preformat>
</p>
<p>A list:</p>
<p specific-use="wrapper">
<list list-type="order">
<list-item>
<p>item one</p>
</list-item>
<list-item>
<p>item two</p>
</list-item>
</list>
</p>
<p>Nested block quotes:</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
</disp-quote>
</p>
</p>
<p>A list:</p>
<p specific-use="wrapper">
<list list-type="order">
<list-item>
<p>item one</p>
</list-item>
<list-item>
<p>item two</p>
</list-item>
</list>
</p>
<p>Nested block quotes:</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
</disp-quote>
<p>This should not be a block quote: 2 &gt; 1.</p>
<p>And a following paragraph.</p>
</sec>
@ -817,12 +813,10 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</list-item>
</list>
<p>An e-mail address: <email>nobody@nowhere.net</email></p>
<p specific-use="wrapper">
<disp-quote>
<p>Blockquoted:
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
</disp-quote>
</p>
<disp-quote>
<p>Blockquoted:
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
</disp-quote>
<p>Auto-links should not occur here:
<monospace>&lt;http://example.com/&gt;</monospace></p>
<preformat>or here: &lt;http://example.com/&gt;</preformat>
@ -860,13 +854,11 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
and <monospace>]</monospace> verbatim characters, as well as [bracketed
text].</p>
</fn></p>
<p specific-use="wrapper">
<disp-quote>
<p>Notes can go in quotes.<fn>
<p>In quote.</p>
</fn></p>
</disp-quote>
</p>
<disp-quote>
<p>Notes can go in quotes.<fn>
<p>In quote.</p>
</fn></p>
</disp-quote>
<list list-type="order">
<list-item>
<p>And in list items.<fn>

View file

@ -78,39 +78,31 @@ Grubers markdown test suite.</p>
<sec id="block-quotes">
<title>Block Quotes</title>
<p>E-mail style:</p>
<p specific-use="wrapper">
<disp-quote>
<p>This is a block quote. It is pretty short.</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>Code in a block quote:</p>
<preformat>sub status {
<disp-quote>
<p>This is a block quote. It is pretty short.</p>
</disp-quote>
<disp-quote>
<p>Code in a block quote:</p>
<preformat>sub status {
print &quot;working&quot;;
}</preformat>
<p>A list:</p>
<list list-type="order">
<list-item>
<p>item one</p>
</list-item>
<list-item>
<p>item two</p>
</list-item>
</list>
<p>Nested block quotes:</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<p specific-use="wrapper">
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<p>A list:</p>
<list list-type="order">
<list-item>
<p>item one</p>
</list-item>
<list-item>
<p>item two</p>
</list-item>
</list>
<p>Nested block quotes:</p>
<disp-quote>
<p>nested</p>
</disp-quote>
</p>
<disp-quote>
<p>nested</p>
</disp-quote>
</disp-quote>
<p>This should not be a block quote: 2 &gt; 1.</p>
<p>And a following paragraph.</p>
</sec>
@ -837,12 +829,10 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</list-item>
</list>
<p>An e-mail address: <email>nobody@nowhere.net</email></p>
<p specific-use="wrapper">
<disp-quote>
<p>Blockquoted:
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
</disp-quote>
</p>
<disp-quote>
<p>Blockquoted:
<ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
</disp-quote>
<p>Auto-links should not occur here:
<monospace>&lt;http://example.com/&gt;</monospace></p>
<preformat>or here: &lt;http://example.com/&gt;</preformat>
@ -866,11 +856,9 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
<italic>not</italic> be a footnote reference, because it contains a
space.[^my note] Here is an inline
note.<xref ref-type="fn" rid="fn3">3</xref></p>
<p specific-use="wrapper">
<disp-quote>
<p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
</disp-quote>
</p>
<disp-quote>
<p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
</disp-quote>
<list list-type="order">
<list-item>
<p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>