Remove extraneous, significant whitespace in JATS writer output (#4335)

This patch fixes some cases where the JATS writer was introducing
semantically significant whitespace by indenting and wrapping tags.
Note that the JATS spec has a content model for `<p>` tags of `(#PCDATA | ...`.
Any tag where `#PCDATA` children are possible should not have any
indentation. The same is true for `<th>`, `<td>`, `<term>`, `<label>`.
This commit is contained in:
Nokome Bentley 2018-03-06 06:44:34 +13:00 committed by John MacFarlane
parent 475f46fa7c
commit 7d193b2aad
6 changed files with 831 additions and 2240 deletions

View file

@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to JATS XML.
Reference:
https://jats.nlm.nih.gov/publishing/tag-library/1.1d3/element/mml-math.html
https://jats.nlm.nih.gov/publishing/tag-library
-}
module Text.Pandoc.Writers.JATS ( writeJATS ) where
import Control.Monad.Reader
@ -139,7 +139,7 @@ deflistItemToJATS opts term defs = do
term' <- inlinesToJATS opts term
def' <- blocksToJATS opts $ concatMap (map plainToPara) defs
return $ inTagsIndented "def-item" $
inTagsIndented "term" term' $$
inTagsSimple "term" term' $$
inTagsIndented "def" def'
-- | Convert a list of lists of blocks to a list of JATS list items.
@ -156,7 +156,7 @@ listItemToJATS :: PandocMonad m
listItemToJATS opts mbmarker item = do
contents <- blocksToJATS opts item
return $ inTagsIndented "list-item" $
maybe empty (\lbl -> inTagsIndented "label" (text lbl)) mbmarker
maybe empty (\lbl -> inTagsSimple "label" (text lbl)) mbmarker
$$ contents
imageMimeType :: String -> [(String, String)] -> (String, String)
@ -250,7 +250,7 @@ blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do
"xlink:type"]]
return $ selfClosingTag "graphic" attr
blockToJATS opts (Para lst) =
inTagsIndented "p" <$> inlinesToJATS opts lst
inTagsSimple "p" <$> inlinesToJATS opts lst
blockToJATS opts (LineBlock lns) =
blockToJATS opts $ linesToPara lns
blockToJATS opts (BlockQuote blocks) =
@ -326,10 +326,10 @@ tableItemToJATS :: PandocMonad m
-> [Block]
-> JATS m Doc
tableItemToJATS opts isHeader [Plain item] =
inTags True (if isHeader then "th" else "td") [] <$>
inTags False (if isHeader then "th" else "td") [] <$>
inlinesToJATS opts item
tableItemToJATS opts isHeader item =
(inTags True (if isHeader then "th" else "td") [] . vcat) <$>
(inTags False (if isHeader then "th" else "td") [] . vcat) <$>
mapM (blockToJATS opts) item
-- | Convert a list of inline elements to JATS.

View file

@ -30,8 +30,8 @@ infix 4 =:
tests :: [TestTree]
tests = [ testGroup "inline code"
[ "basic" =: code "@&" =?> "<p>\n <monospace>@&amp;</monospace>\n</p>"
, "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p>\n <code language=\"c\">@&amp;</code>\n</p>"
[ "basic" =: code "@&" =?> "<p><monospace>@&amp;</monospace></p>"
, "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p><code language=\"c\">@&amp;</code></p>"
]
, testGroup "block code"
[ "basic" =: codeBlock "@&" =?> "<preformat>@&amp;</preformat>"
@ -44,7 +44,7 @@ tests = [ testGroup "inline code"
]
, testGroup "inlines"
[ "Emphasis" =: emph "emphasized"
=?> "<p>\n <italic>emphasized</italic>\n</p>"
=?> "<p><italic>emphasized</italic></p>"
]
, "bullet list" =: bulletList [ plain $ text "first"
, plain $ text "second"
@ -52,19 +52,13 @@ tests = [ testGroup "inline code"
]
=?> "<list list-type=\"bullet\">\n\
\ <list-item>\n\
\ <p>\n\
\ first\n\
\ </p>\n\
\ <p>first</p>\n\
\ </list-item>\n\
\ <list-item>\n\
\ <p>\n\
\ second\n\
\ </p>\n\
\ <p>second</p>\n\
\ </list-item>\n\
\ <list-item>\n\
\ <p>\n\
\ third\n\
\ </p>\n\
\ <p>third</p>\n\
\ </list-item>\n\
\</list>"
, testGroup "definition lists"
@ -72,24 +66,18 @@ tests = [ testGroup "inline code"
[plain (text "hi there")])] =?>
"<def-list>\n\
\ <def-item>\n\
\ <term>\n\
\ <xref alt=\"testing\" rid=\"go\">testing</xref>\n\
\ </term>\n\
\ <term><xref alt=\"testing\" rid=\"go\">testing</xref></term>\n\
\ <def>\n\
\ <p>\n\
\ hi there\n\
\ </p>\n\
\ <p>hi there</p>\n\
\ </def>\n\
\ </def-item>\n\
\</def-list>"
]
, testGroup "math"
[ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?>
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
\ <mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>\n\
\</p>"
"<p><inline-formula><alternatives>\n\
\<tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
\<mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula></p>"
]
, testGroup "headers"
[ "unnumbered header" =:
@ -97,9 +85,7 @@ tests = [ testGroup "inline code"
(text "Header 1" <> note (plain $ text "note")) =?>
"<sec id=\"foo\">\n\
\ <title>Header 1<fn>\n\
\ <p>\n\
\ note\n\
\ </p>\n\
\ <p>note</p>\n\
\ </fn></title>\n\
\</sec>"
, "unnumbered sub header" =:

View file

@ -1,5 +1,5 @@
Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",SoftBreak,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
,Header 1 ("headers",[],[]) [Str "Headers"]
,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")]
,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]]
@ -13,7 +13,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"]
,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"]
,Para [Str "Here's",Space,Str "a",Space,Str "regular",Space,Str "paragraph."]
,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",SoftBreak,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",SoftBreak,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."]
,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."]
,Para [Str "Here's",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."]
,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Str "here."]
,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"]

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,7 @@
<p>
Simple table with caption:
</p>
<p>Simple table with caption:</p>
<table-wrap>
<caption>
<p>
Demonstration of simple table syntax.
</p>
<p>Demonstration of simple table syntax.</p>
</caption>
<table>
<col align="right" />
@ -14,69 +10,35 @@
<col align="left" />
<thead>
<tr>
<th>
Right
</th>
<th>
Left
</th>
<th>
Center
</th>
<th>
Default
</th>
<th>Right</th>
<th>Left</th>
<th>Center</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>12</td>
<td>12</td>
<td>12</td>
<td>12</td>
</tr>
<tr>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>123</td>
<td>123</td>
<td>123</td>
<td>123</td>
</tr>
<tr>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>1</td>
<td>1</td>
<td>1</td>
<td>1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
Simple table without caption:
</p>
<p>Simple table without caption:</p>
<table>
<col align="right" />
<col align="left" />
@ -84,73 +46,37 @@
<col align="left" />
<thead>
<tr>
<th>
Right
</th>
<th>
Left
</th>
<th>
Center
</th>
<th>
Default
</th>
<th>Right</th>
<th>Left</th>
<th>Center</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>12</td>
<td>12</td>
<td>12</td>
<td>12</td>
</tr>
<tr>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>123</td>
<td>123</td>
<td>123</td>
<td>123</td>
</tr>
<tr>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>1</td>
<td>1</td>
<td>1</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>
Simple table indented two spaces:
</p>
<p>Simple table indented two spaces:</p>
<table-wrap>
<caption>
<p>
Demonstration of simple table syntax.
</p>
<p>Demonstration of simple table syntax.</p>
</caption>
<table>
<col align="right" />
@ -159,74 +85,38 @@
<col align="left" />
<thead>
<tr>
<th>
Right
</th>
<th>
Left
</th>
<th>
Center
</th>
<th>
Default
</th>
<th>Right</th>
<th>Left</th>
<th>Center</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>12</td>
<td>12</td>
<td>12</td>
<td>12</td>
</tr>
<tr>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>123</td>
<td>123</td>
<td>123</td>
<td>123</td>
</tr>
<tr>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>1</td>
<td>1</td>
<td>1</td>
<td>1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
Multiline table with caption:
</p>
<p>Multiline table with caption:</p>
<table-wrap>
<caption>
<p>
Heres the caption. It may span multiple lines.
</p>
<p>Heres the caption. It may span multiple lines.</p>
</caption>
<table>
<col width="15*" align="center" />
@ -235,55 +125,29 @@
<col width="33*" align="left" />
<thead>
<tr>
<th>
Centered Header
</th>
<th>
Left Aligned
</th>
<th>
Right Aligned
</th>
<th>
Default aligned
</th>
<th>Centered Header</th>
<th>Left Aligned</th>
<th>Right Aligned</th>
<th>Default aligned</th>
</tr>
</thead>
<tbody>
<tr>
<td>
First
</td>
<td>
row
</td>
<td>
12.0
</td>
<td>
Example of a row that spans multiple lines.
</td>
<td>First</td>
<td>row</td>
<td>12.0</td>
<td>Example of a row that spans multiple lines.</td>
</tr>
<tr>
<td>
Second
</td>
<td>
row
</td>
<td>
5.0
</td>
<td>
Heres another one. Note the blank line between rows.
</td>
<td>Second</td>
<td>row</td>
<td>5.0</td>
<td>Heres another one. Note the blank line between rows.</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
Multiline table without caption:
</p>
<p>Multiline table without caption:</p>
<table>
<col width="15*" align="center" />
<col width="13*" align="left" />
@ -291,54 +155,28 @@
<col width="33*" align="left" />
<thead>
<tr>
<th>
Centered Header
</th>
<th>
Left Aligned
</th>
<th>
Right Aligned
</th>
<th>
Default aligned
</th>
<th>Centered Header</th>
<th>Left Aligned</th>
<th>Right Aligned</th>
<th>Default aligned</th>
</tr>
</thead>
<tbody>
<tr>
<td>
First
</td>
<td>
row
</td>
<td>
12.0
</td>
<td>
Example of a row that spans multiple lines.
</td>
<td>First</td>
<td>row</td>
<td>12.0</td>
<td>Example of a row that spans multiple lines.</td>
</tr>
<tr>
<td>
Second
</td>
<td>
row
</td>
<td>
5.0
</td>
<td>
Heres another one. Note the blank line between rows.
</td>
<td>Second</td>
<td>row</td>
<td>5.0</td>
<td>Heres another one. Note the blank line between rows.</td>
</tr>
</tbody>
</table>
<p>
Table without column headers:
</p>
<p>Table without column headers:</p>
<table>
<col align="right" />
<col align="left" />
@ -346,52 +184,26 @@
<col align="right" />
<tbody>
<tr>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>
12
</td>
<td>12</td>
<td>12</td>
<td>12</td>
<td>12</td>
</tr>
<tr>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>
123
</td>
<td>123</td>
<td>123</td>
<td>123</td>
<td>123</td>
</tr>
<tr>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>
1
</td>
<td>1</td>
<td>1</td>
<td>1</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>
Multiline table without column headers:
</p>
<p>Multiline table without column headers:</p>
<table>
<col width="15*" align="center" />
<col width="13*" align="left" />
@ -399,32 +211,16 @@
<col width="33*" align="left" />
<tbody>
<tr>
<td>
First
</td>
<td>
row
</td>
<td>
12.0
</td>
<td>
Example of a row that spans multiple lines.
</td>
<td>First</td>
<td>row</td>
<td>12.0</td>
<td>Example of a row that spans multiple lines.</td>
</tr>
<tr>
<td>
Second
</td>
<td>
row
</td>
<td>
5.0
</td>
<td>
Heres another one. Note the blank line between rows.
</td>
<td>Second</td>
<td>row</td>
<td>5.0</td>
<td>Heres another one. Note the blank line between rows.</td>
</tr>
</tbody>
</table>

File diff suppressed because it is too large Load diff