From f5ea5f0aad1d5000b326ce4c45c92fdfb1a4b5d3 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 13 Feb 2020 05:36:02 +0100 Subject: [PATCH] Introduce new format variants for JATS (#6067) New formats: - `jats_archiving` for the "Archiving and Interchange Tag Set", - `jats_publishing` for the "Journal Publishing Tag Set", and - `jats_articleauthoring` for the "Article Authoring Tag Set." The "jats" output format is now an alias for "jats_archiving". Closes: #6014 --- MANUAL.txt | 5 +- .../{default.jats => article.jats_publishing} | 9 +- data/templates/default.jats_archiving | 7 + data/templates/default.jats_articleauthoring | 90 ++ data/templates/default.jats_publishing | 7 + pandoc.cabal | 13 +- src/Text/Pandoc/Templates.hs | 1 + src/Text/Pandoc/Writers.hs | 8 +- src/Text/Pandoc/Writers/JATS.hs | 140 ++- test/Tests/Old.hs | 9 +- test/Tests/Writers/JATS.hs | 16 +- test/{tables.jats => tables.jats_archiving} | 0 test/tables.jats_articleauthoring | 226 +++++ test/tables.jats_publishing | 226 +++++ test/{writer.jats => writer.jats_archiving} | 0 test/writer.jats_articleauthoring | 874 +++++++++++++++++ test/writer.jats_publishing | 898 ++++++++++++++++++ 17 files changed, 2474 insertions(+), 55 deletions(-) rename data/templates/{default.jats => article.jats_publishing} (94%) create mode 100644 data/templates/default.jats_archiving create mode 100644 data/templates/default.jats_articleauthoring create mode 100644 data/templates/default.jats_publishing rename test/{tables.jats => tables.jats_archiving} (100%) create mode 100644 test/tables.jats_articleauthoring create mode 100644 test/tables.jats_publishing rename test/{writer.jats => writer.jats_archiving} (100%) create mode 100644 test/writer.jats_articleauthoring create mode 100644 test/writer.jats_publishing diff --git a/MANUAL.txt b/MANUAL.txt index c90ac7b01..4fb744085 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -294,7 +294,10 @@ header when requesting a document from a URL: - `html4` ([XHTML] 1.0 Transitional) - `icml` ([InDesign ICML]) - `ipynb` ([Jupyter notebook]) - - `jats` ([JATS] XML) + - `jats_archiving` ([JATS] XML, Archiving and Interchange Tag Set) + - `jats_articleauthoring` ([JATS] XML, Article Authoring Tag Set) + - `jats_publishing` ([JATS] XML, Journal Publishing Tag Set) + - `jats` (alias for `jats_archiving`) - `jira` ([Jira] wiki markup) - `json` (JSON version of native AST) - `latex` ([LaTeX]) diff --git a/data/templates/default.jats b/data/templates/article.jats_publishing similarity index 94% rename from data/templates/default.jats rename to data/templates/article.jats_publishing index 20560cdc5..ce184c0ed 100644 --- a/data/templates/default.jats +++ b/data/templates/article.jats_publishing @@ -1,9 +1,3 @@ - -$if(xml-stylesheet)$ - -$endif$ - $if(article.type)$
$else$ @@ -164,8 +158,8 @@ $if(copyright.text)$ $copyright.text$ - $endif$ + $endif$ $if(abstract)$ @@ -198,3 +192,4 @@ $back$ $endif$
+ diff --git a/data/templates/default.jats_archiving b/data/templates/default.jats_archiving new file mode 100644 index 000000000..9b99aab12 --- /dev/null +++ b/data/templates/default.jats_archiving @@ -0,0 +1,7 @@ + +$if(xml-stylesheet)$ + +$endif$ + +${ article.jats_publishing() } diff --git a/data/templates/default.jats_articleauthoring b/data/templates/default.jats_articleauthoring new file mode 100644 index 000000000..f86bb2d3b --- /dev/null +++ b/data/templates/default.jats_articleauthoring @@ -0,0 +1,90 @@ + +$if(xml-stylesheet)$ + +$endif$ + +$if(article.type)$ +
+$else$ +
+$endif$ + + +$if(title)$ + +$title$ + +$endif$ +$if(author)$ + +$for(author)$ + +$if(author.orcid)$ +$author.orcid$ +$endif$ +$if(author.surname)$ + +$author.surname$ +$author.given-names$ + +$else$ +$author$ +$endif$ +$if(author.email)$ +$author.email$ +$endif$ +$if(author.aff-id)$ + +$endif$ +$if(author.cor-id)$ +* +$endif$ + +$endfor$ + +$endif$ +$if(copyright)$ + +$if(copyright.statement)$ +$copyright.statement$ +$endif$ +$if(copyright.year)$ +$copyright.year$ +$endif$ +$if(copyright.holder)$ +$copyright.holder$ +$endif$ +$if(copyright.text)$ + +$copyright.text$ + +$endif$ + +$endif$ + +$abstract$ + +$if(tags)$ + +$for(tags)$ +$tags$ +$endfor$ + +$endif$ +$if(article.funding-statement)$ + +$article.funding-statement$ + +$endif$ + + + +$body$ + + +$if(back)$ +$back$ +$endif$ + +
diff --git a/data/templates/default.jats_publishing b/data/templates/default.jats_publishing new file mode 100644 index 000000000..79e9b00d8 --- /dev/null +++ b/data/templates/default.jats_publishing @@ -0,0 +1,7 @@ + +$if(xml-stylesheet)$ + +$endif$ + +${ article.jats_publishing() } diff --git a/pandoc.cabal b/pandoc.cabal index 3b8e84634..95c85bf75 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -46,7 +46,9 @@ data-files: data/templates/default.html5 data/templates/default.docbook4 data/templates/default.docbook5 - data/templates/default.jats + data/templates/default.jats_archiving + data/templates/default.jats_articleauthoring + data/templates/default.jats_publishing data/templates/default.tei data/templates/default.opendocument data/templates/default.icml @@ -79,6 +81,7 @@ data-files: data/templates/default.org data/templates/default.epub2 data/templates/default.epub3 + data/templates/article.jats_publishing -- translations data/translations/*.yaml -- source files for reference.docx @@ -254,7 +257,9 @@ extra-source-files: test/tables.context test/tables.docbook4 test/tables.docbook5 - test/tables.jats + test/tables.jats_archiving + test/tables.jats_articleauthoring + test/tables.jats_publishing test/tables.jira test/tables.dokuwiki test/tables.zimwiki @@ -286,7 +291,9 @@ extra-source-files: test/writer.context test/writer.docbook4 test/writer.docbook5 - test/writer.jats + test/writer.jats_archiving + test/writer.jats_articleauthoring + test/writer.jats_publishing test/writer.jira test/writer.html4 test/writer.html5 diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index f04a73b58..6444728ae 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -90,6 +90,7 @@ getDefaultTemplate writer = do "docbook" -> getDefaultTemplate "docbook5" "epub" -> getDefaultTemplate "epub3" "beamer" -> getDefaultTemplate "latex" + "jats" -> getDefaultTemplate "jats_archiving" "markdown_strict" -> getDefaultTemplate "markdown" "multimarkdown" -> getDefaultTemplate "markdown" "markdown_github" -> getDefaultTemplate "markdown" diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index 753972855..cdf3ca1c8 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -41,6 +41,9 @@ module Text.Pandoc.Writers , writeHtml5String , writeICML , writeJATS + , writeJatsArchiving + , writeJatsArticleAuthoring + , writeJatsPublishing , writeJSON , writeJira , writeLaTeX @@ -146,7 +149,10 @@ writers = [ ,("docbook" , TextWriter writeDocbook5) ,("docbook4" , TextWriter writeDocbook4) ,("docbook5" , TextWriter writeDocbook5) - ,("jats" , TextWriter writeJATS) + ,("jats" , TextWriter writeJatsArchiving) + ,("jats_articleauthoring", TextWriter writeJatsArticleAuthoring) + ,("jats_publishing" , TextWriter writeJatsPublishing) + ,("jats_archiving" , TextWriter writeJatsArchiving) ,("jira" , TextWriter writeJira) ,("opml" , TextWriter writeOPML) ,("opendocument" , TextWriter writeOpenDocument) diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index ab95110bf..49ace4636 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS - Copyright : Copyright (C) 2017-2019 John MacFarlane + Copyright : Copyright (C) 2017-2020 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -15,7 +15,12 @@ Conversion of 'Pandoc' documents to JATS XML. Reference: https://jats.nlm.nih.gov/publishing/tag-library -} -module Text.Pandoc.Writers.JATS ( writeJATS ) where +module Text.Pandoc.Writers.JATS + ( writeJATS + , writeJatsArchiving + , writeJatsPublishing + , writeJatsArticleAuthoring + ) where import Prelude import Control.Monad.Reader import Control.Monad.State @@ -43,19 +48,46 @@ import Text.Pandoc.XML import Text.TeXMath import qualified Text.XML.Light as Xml -data JATSVersion = JATS1_1 - deriving (Eq, Show) +-- | JATS tag set variant +data JATSTagSet + = TagSetArchiving -- ^ Archiving and Interchange Tag Set + | TagSetPublishing -- ^ Journal Publishing Tag Set + | TagSetArticleAuthoring -- ^ Article Authoring Tag Set + deriving (Eq) -data JATSState = JATSState +-- | Internal state used by the writer. +newtype JATSState = JATSState { jatsNotes :: [(Int, Doc Text)] } -type JATS a = StateT JATSState (ReaderT JATSVersion a) +-- | JATS writer type +type JATS a = StateT JATSState (ReaderT JATSTagSet a) +-- | Convert a @'Pandoc'@ document to JATS (Archiving and Interchange +-- Tag Set.) +writeJatsArchiving :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeJatsArchiving = writeJats TagSetArchiving + +-- | Convert a @'Pandoc'@ document to JATS (Journal Publishing Tag Set.) +writeJatsPublishing :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeJatsPublishing = writeJats TagSetPublishing + +-- | Convert a @'Pandoc'@ document to JATS (Archiving and Interchange +-- Tag Set.) +writeJatsArticleAuthoring :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeJatsArticleAuthoring = writeJats TagSetArticleAuthoring + +-- | Alias for @'writeJatsArchiving'@. This function exists for backwards +-- compatibility, but will be deprecated in the future. Use +-- @'writeJatsArchiving'@ instead. writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text -writeJATS opts d = +writeJATS = writeJatsArchiving + +-- | Convert a @'Pandoc'@ document to JATS. +writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text +writeJats tagSet opts d = runReaderT (evalStateT (docToJATS opts d) - (JATSState{ jatsNotes = [] })) - JATS1_1 + (JATSState{ jatsNotes = [] })) + tagSet -- | Convert Pandoc document to string in JATS format. docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text @@ -80,7 +112,10 @@ docToJATS opts (Pandoc meta blocks) = do main <- fromBlocks bodyblocks notes <- reverse . map snd <$> gets jatsNotes backs <- fromBlocks backblocks - let fns = if null notes + tagSet <- ask + -- In the "Article Authoring" tag set, occurrence of fn-group elements + -- is restricted to table footers. Footnotes have to be placed inline. + let fns = if null notes || tagSet == TagSetArticleAuthoring then mempty else inTagsIndented "fn-group" $ vcat notes let back = backs $$ fns @@ -116,6 +151,8 @@ docToJATS opts (Pandoc meta blocks) = do blocksToJATS :: PandocMonad m => WriterOptions -> [Block] -> JATS m (Doc Text) blocksToJATS = wrappedBlocksToJATS (const False) +-- | Like @'blocksToJATS'@, but wraps top-level blocks into a @

@ +-- element if the @needsWrap@ predicate evaluates to @True@. wrappedBlocksToJATS :: PandocMonad m => (Block -> Bool) -> WriterOptions @@ -275,8 +312,12 @@ blockToJATS opts (Para lst) = inTagsSimple "p" <$> inlinesToJATS opts lst blockToJATS opts (LineBlock lns) = blockToJATS opts $ linesToPara lns -blockToJATS opts (BlockQuote blocks) = - inTagsIndented "disp-quote" <$> blocksToJATS opts blocks +blockToJATS opts (BlockQuote blocks) = do + tagSet <- ask + let blocksToJats' = if tagSet == TagSetArticleAuthoring + then wrappedBlocksToJATS (not . isPara) + else blocksToJATS + inTagsIndented "disp-quote" <$> blocksToJats' opts blocks blockToJATS _ (CodeBlock a str) = return $ inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) where (lang, attr) = codeAttr a @@ -287,14 +328,20 @@ blockToJATS opts (BulletList lst) = listItemsToJATS opts Nothing lst blockToJATS _ (OrderedList _ []) = return empty blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do - let listType = case numstyle of - DefaultStyle -> "order" - Decimal -> "order" - Example -> "order" - UpperAlpha -> "alpha-upper" - LowerAlpha -> "alpha-lower" - UpperRoman -> "roman-upper" - LowerRoman -> "roman-lower" + tagSet <- ask + let listType = + -- The Article Authoring tag set doesn't allow a more specific + -- @list-type@ attribute than "order". + if tagSet == TagSetArticleAuthoring + then "order" + else case numstyle of + DefaultStyle -> "order" + Decimal -> "order" + Example -> "order" + UpperAlpha -> "alpha-upper" + LowerAlpha -> "alpha-lower" + UpperRoman -> "roman-upper" + LowerRoman -> "roman-lower" let simpleList = start == 1 && (delimstyle == DefaultDelim || delimstyle == Period) let markers = if simpleList @@ -407,17 +454,22 @@ inlineToJATS opts SoftBreak | writerWrapText opts == WrapPreserve = return cr | otherwise = return space inlineToJATS opts (Note contents) = do - notes <- gets jatsNotes - let notenum = case notes of - (n, _):_ -> n + 1 - [] -> 1 - thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] - <$> wrappedBlocksToJATS (not . isPara) opts - (walk demoteHeaderAndRefs contents) - modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } - return $ inTags False "xref" [("ref-type", "fn"), - ("rid", "fn" <> tshow notenum)] - $ text (show notenum) + tagSet <- ask + -- Footnotes must occur inline when using the Article Authoring tag set. + if tagSet == TagSetArticleAuthoring + then inTagsIndented "fn" <$> wrappedBlocksToJATS (not . isPara) opts contents + else do + notes <- gets jatsNotes + let notenum = case notes of + (n, _):_ -> n + 1 + [] -> 1 + thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] + <$> wrappedBlocksToJATS (not . isPara) opts + (walk demoteHeaderAndRefs contents) + modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } + return $ inTags False "xref" [("ref-type", "fn"), + ("rid", "fn" <> tshow notenum)] + $ text (show notenum) inlineToJATS opts (Cite _ lst) = -- TODO revisit this after examining the jats.csl pipeline inlinesToJATS opts lst @@ -444,16 +496,22 @@ inlineToJATS _ (Math t str) = do let tagtype = case t of DisplayMath -> "disp-formula" InlineMath -> "inline-formula" - let rawtex = inTagsSimple "tex-math" - $ text " - literal str <> - text "]]>" - return $ inTagsSimple tagtype $ - case res of - Right r -> inTagsSimple "alternatives" $ - cr <> rawtex $$ - text (Xml.ppcElement conf $ fixNS r) - Left _ -> rawtex + + let rawtex = text " literal str <> text "]]>" + let texMath = inTagsSimple "tex-math" rawtex + + tagSet <- ask + return . inTagsSimple tagtype $ + case res of + Right r -> let mathMl = text (Xml.ppcElement conf $ fixNS r) + -- tex-math is unsupported in Article Authoring tag set + in if tagSet == TagSetArticleAuthoring + then mathMl + else inTagsSimple "alternatives" $ + cr <> texMath $$ mathMl + Left _ -> if tagSet /= TagSetArticleAuthoring + then texMath + else rawtex inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) | escapeURI t == email = return $ inTagsSimple "email" $ literal (escapeStringForXML email) diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 3543cdbb3..2851db5d4 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -94,7 +94,14 @@ tests pandocPath = [ testGroup "writer" $ writerTests' "docbook5" ] , testGroup "jats" - [ testGroup "writer" $ writerTests' "jats" + [ testGroup "writer" + [ testGroup "jats_archiving" $ + writerTests' "jats_archiving" + , testGroup "jats_articleauthoring" $ + writerTests' "jats_articleauthoring" + , testGroup "jats_publishing" $ + writerTests' "jats_publishing" + ] , test' "reader" ["-r", "jats", "-w", "native", "-s"] "jats-reader.xml" "jats-reader.native" ] diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 6de058701..7d98f979b 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -11,7 +11,14 @@ import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder jats :: (ToPandoc a) => a -> String -jats = unpack . purely (writeJATS def{ writerWrapText = WrapNone }) . toPandoc +jats = unpack + . purely (writeJATS def{ writerWrapText = WrapNone }) + . toPandoc + +jatsArticleAuthoring :: (ToPandoc a) => a -> String +jatsArticleAuthoring = unpack + . purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone }) + . toPandoc {- "my test" =: X =?> Y @@ -47,6 +54,13 @@ tests = [ testGroup "inline code" , testGroup "inlines" [ "Emphasis" =: emph "emphasized" =?> "

emphasized

" + + , test jatsArticleAuthoring "footnote in articleauthoring tag set" + ("test" <> note (para "footnote") =?> + unlines [ "

test" + , "

footnote

" + , "

" + ]) ] , "bullet list" =: bulletList [ plain $ text "first" , plain $ text "second" diff --git a/test/tables.jats b/test/tables.jats_archiving similarity index 100% rename from test/tables.jats rename to test/tables.jats_archiving diff --git a/test/tables.jats_articleauthoring b/test/tables.jats_articleauthoring new file mode 100644 index 000000000..70f71e384 --- /dev/null +++ b/test/tables.jats_articleauthoring @@ -0,0 +1,226 @@ +

Simple table with caption:

+ + +

Demonstration of simple table syntax.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+
+

Simple table without caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+

Simple table indented two spaces:

+ + +

Demonstration of simple table syntax.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+
+

Multiline table with caption:

+ + +

Here’s the caption. It may span multiple lines.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+
+

Multiline table without caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+

Table without column headers:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
12121212
123123123123
1111
+

Multiline table without column headers:

+ + + + + + + + + + + + + + + + + + + +
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
diff --git a/test/tables.jats_publishing b/test/tables.jats_publishing new file mode 100644 index 000000000..70f71e384 --- /dev/null +++ b/test/tables.jats_publishing @@ -0,0 +1,226 @@ +

Simple table with caption:

+ + +

Demonstration of simple table syntax.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+
+

Simple table without caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+

Simple table indented two spaces:

+ + +

Demonstration of simple table syntax.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+
+

Multiline table with caption:

+ + +

Here’s the caption. It may span multiple lines.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+
+

Multiline table without caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+

Table without column headers:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
12121212
123123123123
1111
+

Multiline table without column headers:

+ + + + + + + + + + + + + + + + + + + +
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
diff --git a/test/writer.jats b/test/writer.jats_archiving similarity index 100% rename from test/writer.jats rename to test/writer.jats_archiving diff --git a/test/writer.jats_articleauthoring b/test/writer.jats_articleauthoring new file mode 100644 index 000000000..90437992e --- /dev/null +++ b/test/writer.jats_articleauthoring @@ -0,0 +1,874 @@ + + +
+ + + +Pandoc Test Suite + + + +John MacFarlane + + +Anonymous + + + + + + + + +

This is a set of tests for pandoc. Most of them are adapted from John +Gruber’s markdown test suite.

+ + Headers + + Level 2 with an + <ext-link ext-link-type="uri" xlink:href="/url">embedded + link</ext-link> + + Level 3 with <italic>emphasis</italic> + + Level 4 + + Level 5 + + + + + + + Level 1 + + Level 2 with <italic>emphasis</italic> + + Level 3 +

with no blank line

+
+
+ + Level 2 +

with no blank line

+
+
+ + Paragraphs +

Here’s a regular paragraph.

+

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list + item. Because a hard-wrapped line in the middle of a paragraph looked like a + list item.

+

Here’s one with a bullet. * criminey.

+

There should be a hard line break + here.

+
+ + Block Quotes +

E-mail style:

+ +

This is a block quote. It is pretty short.

+
+ +

Code in a block quote:

+

+ sub status { + print "working"; +} +

+

A list:

+

+ + +

item one

+ + +

item two

+
+ +

+

Nested block quotes:

+

+ +

nested

+
+

+

+ +

nested

+ +

+ +

This should not be a block quote: 2 > 1.

+

And a following paragraph.

+
+ + Code Blocks +

Code:

+ ---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab +

And:

+ this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ +
+ + Lists + + Unordered +

Asterisks tight:

+ + +

asterisk 1

+
+ +

asterisk 2

+
+ +

asterisk 3

+
+
+

Asterisks loose:

+ + +

asterisk 1

+
+ +

asterisk 2

+
+ +

asterisk 3

+
+
+

Pluses tight:

+ + +

Plus 1

+
+ +

Plus 2

+
+ +

Plus 3

+
+
+

Pluses loose:

+ + +

Plus 1

+
+ +

Plus 2

+
+ +

Plus 3

+
+
+

Minuses tight:

+ + +

Minus 1

+
+ +

Minus 2

+
+ +

Minus 3

+
+
+

Minuses loose:

+ + +

Minus 1

+
+ +

Minus 2

+
+ +

Minus 3

+
+
+
+ + Ordered +

Tight:

+ + +

First

+
+ +

Second

+
+ +

Third

+
+
+

and:

+ + +

One

+
+ +

Two

+
+ +

Three

+
+
+

Loose using tabs:

+ + +

First

+
+ +

Second

+
+ +

Third

+
+
+

and using spaces:

+ + +

One

+
+ +

Two

+
+ +

Three

+
+
+

Multiple paragraphs:

+ + +

Item 1, graf one.

+

Item 1. graf two. The quick brown fox jumped over the lazy dog’s + back.

+
+ +

Item 2.

+
+ +

Item 3.

+
+
+
+ + Nested + + +

Tab

+ + +

Tab

+ + +

Tab

+
+
+
+
+
+
+

Here’s another:

+ + +

First

+
+ +

Second:

+ + +

Fee

+
+ +

Fie

+
+ +

Foe

+
+
+
+ +

Third

+
+
+

Same thing but with paragraphs:

+ + +

First

+
+ +

Second:

+ + +

Fee

+
+ +

Fie

+
+ +

Foe

+
+
+
+ +

Third

+
+
+
+ + Tabs and spaces + + +

this is a list item indented with tabs

+
+ +

this is a list item indented with spaces

+ + +

this is an example list item indented with tabs

+
+ +

this is an example list item indented with spaces

+
+
+
+
+
+ + Fancy list markers + + + +

begins with 2

+
+ + +

and now 3

+

with a continuation

+ + + +

sublist with roman numerals, starting with 4

+
+ + +

more items

+ + + +

a subsublist

+
+ + +

a subsublist

+
+
+
+
+
+
+

Nesting:

+ + +

Upper Alpha

+ + +

Upper Roman.

+ + + +

Decimal start with 6

+ + + +

Lower alpha with paren

+
+
+
+
+
+
+
+
+

Autonumbering:

+ + +

Autonumber.

+
+ +

More.

+ + +

Nested.

+
+
+
+
+

Should not be a list item:

+

M.A. 2007

+

B. Williams

+
+
+ + Definition Lists +

Tight using spaces:

+ + + apple + +

red fruit

+
+
+ + orange + +

orange fruit

+
+
+ + banana + +

yellow fruit

+
+
+
+

Tight using tabs:

+ + + apple + +

red fruit

+
+
+ + orange + +

orange fruit

+
+
+ + banana + +

yellow fruit

+
+
+
+

Loose:

+ + + apple + +

red fruit

+
+
+ + orange + +

orange fruit

+
+
+ + banana + +

yellow fruit

+
+
+
+

Multiple blocks with italics:

+ + + apple + +

red fruit

+

contains seeds, crisp, pleasant to taste

+
+
+ + orange + +

orange fruit

+

+ { orange code block } +

+

+ +

orange block quote

+ +

+
+
+
+

Multiple definitions, tight:

+ + + apple + +

red fruit

+

computer

+
+
+ + orange + +

orange fruit

+

bank

+
+
+
+

Multiple definitions, loose:

+ + + apple + +

red fruit

+

computer

+
+
+ + orange + +

orange fruit

+

bank

+
+
+
+

Blank line after term, indented marker, alternate markers:

+ + + apple + +

red fruit

+

computer

+
+
+ + orange + +

orange fruit

+

+ + +

sublist

+ + +

sublist

+
+ +

+
+
+
+
+ + HTML Blocks +

Simple block on one line:

+ +

foo

+
+

And nested without indentation:

+ + + +

foo

+
+
+ +

bar

+
+
+

Interpreted markdown in a table:

+

This is emphasized

+

And this is strong

+

Here’s a simple block:

+ +

foo

+
+

This should be a code block, though:

+ <div> + foo +</div> +

As should this:

+ <div>foo</div> +

Now, nested:

+ + + +

foo

+
+
+
+

This should just be an HTML comment:

+

Multiline:

+

Code block:

+ <!-- Comment --> +

Just plain comment, with trailing spaces on the line:

+

Code:

+ <hr /> +

Hr’s:

+
+ + Inline Markup +

This is emphasized, and so is this.

+

This is strong, and so is this.

+

An emphasized + link.

+

This is strong and em.

+

So is this word.

+

This is strong and em.

+

So is this word.

+

This is code: >, $, + \, \$, + <html>.

+

This is strikeout.

+

Superscripts: abcd ahello + ahello there.

+

Subscripts: H2O, H23O, + Hmany of themO.

+

These should not be superscripts or subscripts, because of the unescaped + spaces: a^b c^d, a~b c~d.

+
+ + Smart quotes, ellipses, dashes +

“Hello,” said the spider. “‘Shelob’ is my name.”

+

‘A’, ‘B’, and ‘C’ are letters.

+

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

+

‘He said, “I want to go.”’ Were you alive in the 70’s?

+

Here is some quoted ‘code’ and a + “quoted + link”.

+

Some dashes: one—two — three—four — five.

+

Dashes between numbers: 5–7, 255–66, 1987–1999.

+

Ellipses…and…and….

+
+ + LaTeX + + +

+
+ +

2+2=4

+
+ +

xy

+
+ +

αω

+
+ +

223

+
+ +

p-Tree

+
+ +

Here’s some display math: + ddxf(x)=limh0f(x+h)f(x)h

+
+ +

Here’s one that has a line break in it: + α+ω×x2.

+
+
+

These shouldn’t be math:

+ + +

To get the famous equation, write + $e = mc^2$.

+
+ +

$22,000 is a lot of money. So is $34,000. (It worked + if “lot” is emphasized.)

+
+ +

Shoes ($20) and socks ($5).

+
+ +

Escaped $: $73 this should be + emphasized 23$.

+
+
+

Here’s a LaTeX table:

+
+ + Special Characters +

Here is some unicode:

+ + +

I hat: Î

+
+ +

o umlaut: ö

+
+ +

section: §

+
+ +

set membership: ∈

+
+ +

copyright: ©

+
+
+

AT&T has an ampersand in their name.

+

AT&T is another way to write it.

+

This & that.

+

4 < 5.

+

6 > 5.

+

Backslash: \

+

Backtick: `

+

Asterisk: *

+

Underscore: _

+

Left brace: {

+

Right brace: }

+

Left bracket: [

+

Right bracket: ]

+

Left paren: (

+

Right paren: )

+

Greater-than: >

+

Hash: #

+

Period: .

+

Bang: !

+

Plus: +

+

Minus: -

+
+ + Links + + Explicit +

Just a + URL.

+

URL + and title.

+

URL + and title.

+

URL + and title.

+

URL + and title

+

URL + and title

+

with_underscore

+

Email + link

+

Empty.

+
+ + Reference +

Foo + bar.

+

With embedded + [brackets].

+

b by itself + should be a link.

+

Indented + once.

+

Indented + twice.

+

Indented + thrice.

+

This should [not][] be a link.

+ [not]: /url +

Foo + bar.

+

Foo + biz.

+
+ + With ampersands +

Here’s a + link + with an ampersand in the URL.

+

Here’s a link with an amersand in the link text: + AT&T.

+

Here’s an + inline + link.

+

Here’s an + inline + link in pointy braces.

+
+ + Autolinks +

With an ampersand: + http://example.com/?foo=1&bar=2

+ + +

In a list?

+
+ +

http://example.com/

+
+ +

It should.

+
+
+

An e-mail address: nobody@nowhere.net

+ +

Blockquoted: + http://example.com/

+
+

Auto-links should not occur here: + <http://example.com/>

+ or here: <http://example.com/> +
+
+ + Images +

From “Voyage dans la Lune” by Georges Melies (1902):

+ +

lalune

+ +
+

Here is a movie + + icon.

+
+ + Footnotes +

Here is a footnote reference, +

Here is the footnote. It can go anywhere after the footnote reference. + It need not be placed at the end of the document.

+ and another. +

Here’s the long note. This one contains multiple blocks.

+

Subsequent blocks are indented to show that they belong to the footnote + (as with list items).

+

+ { <code> } +

+

If you want, you can indent every line, but you can also be lazy and + just indent the first line of each block.

+
This should not be a footnote reference, because it + contains a space.[^my note] Here is an inline note. +

This is easier to type. Inline notes may contain + links + and ] verbatim characters, as well as [bracketed + text].

+

+ +

Notes can go in quotes. +

In quote.

+

+
+ + +

And in list items. +

In list.

+

+
+
+

This paragraph should not be part of the note, as it is not indented.

+
+ + + +
diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing new file mode 100644 index 000000000..6384a5939 --- /dev/null +++ b/test/writer.jats_publishing @@ -0,0 +1,898 @@ + + +
+ + + + + + + + + + +Pandoc Test Suite + + + +John MacFarlane + + +Anonymous + + + +17 +7 +2006 + + + + +

This is a set of tests for pandoc. Most of them are adapted from John +Gruber’s markdown test suite.

+ + Headers + + Level 2 with an + <ext-link ext-link-type="uri" xlink:href="/url">embedded + link</ext-link> + + Level 3 with <italic>emphasis</italic> + + Level 4 + + Level 5 + + + + + + + Level 1 + + Level 2 with <italic>emphasis</italic> + + Level 3 +

with no blank line

+
+
+ + Level 2 +

with no blank line

+
+
+ + Paragraphs +

Here’s a regular paragraph.

+

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list + item. Because a hard-wrapped line in the middle of a paragraph looked like a + list item.

+

Here’s one with a bullet. * criminey.

+

There should be a hard line break + here.

+
+ + Block Quotes +

E-mail style:

+ +

This is a block quote. It is pretty short.

+
+ +

Code in a block quote:

+ sub status { + print "working"; +} +

A list:

+ + +

item one

+
+ +

item two

+
+
+

Nested block quotes:

+ +

nested

+
+ +

nested

+
+
+

This should not be a block quote: 2 > 1.

+

And a following paragraph.

+
+ + Code Blocks +

Code:

+ ---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab +

And:

+ this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ +
+ + Lists + + Unordered +

Asterisks tight:

+ + +

asterisk 1

+
+ +

asterisk 2

+
+ +

asterisk 3

+
+
+

Asterisks loose:

+ + +

asterisk 1

+
+ +

asterisk 2

+
+ +

asterisk 3

+
+
+

Pluses tight:

+ + +

Plus 1

+
+ +

Plus 2

+
+ +

Plus 3

+
+
+

Pluses loose:

+ + +

Plus 1

+
+ +

Plus 2

+
+ +

Plus 3

+
+
+

Minuses tight:

+ + +

Minus 1

+
+ +

Minus 2

+
+ +

Minus 3

+
+
+

Minuses loose:

+ + +

Minus 1

+
+ +

Minus 2

+
+ +

Minus 3

+
+
+
+ + Ordered +

Tight:

+ + +

First

+
+ +

Second

+
+ +

Third

+
+
+

and:

+ + +

One

+
+ +

Two

+
+ +

Three

+
+
+

Loose using tabs:

+ + +

First

+
+ +

Second

+
+ +

Third

+
+
+

and using spaces:

+ + +

One

+
+ +

Two

+
+ +

Three

+
+
+

Multiple paragraphs:

+ + +

Item 1, graf one.

+

Item 1. graf two. The quick brown fox jumped over the lazy dog’s + back.

+
+ +

Item 2.

+
+ +

Item 3.

+
+
+
+ + Nested + + +

Tab

+ + +

Tab

+ + +

Tab

+
+
+
+
+
+
+

Here’s another:

+ + +

First

+
+ +

Second:

+ + +

Fee

+
+ +

Fie

+
+ +

Foe

+
+
+
+ +

Third

+
+
+

Same thing but with paragraphs:

+ + +

First

+
+ +

Second:

+ + +

Fee

+
+ +

Fie

+
+ +

Foe

+
+
+
+ +

Third

+
+
+
+ + Tabs and spaces + + +

this is a list item indented with tabs

+
+ +

this is a list item indented with spaces

+ + +

this is an example list item indented with tabs

+
+ +

this is an example list item indented with spaces

+
+
+
+
+
+ + Fancy list markers + + + +

begins with 2

+
+ + +

and now 3

+

with a continuation

+ + + +

sublist with roman numerals, starting with 4

+
+ + +

more items

+ + + +

a subsublist

+
+ + +

a subsublist

+
+
+
+
+
+
+

Nesting:

+ + +

Upper Alpha

+ + +

Upper Roman.

+ + + +

Decimal start with 6

+ + + +

Lower alpha with paren

+
+
+
+
+
+
+
+
+

Autonumbering:

+ + +

Autonumber.

+
+ +

More.

+ + +

Nested.

+
+
+
+
+

Should not be a list item:

+

M.A. 2007

+

B. Williams

+
+
+ + Definition Lists +

Tight using spaces:

+ + + apple + +

red fruit

+
+
+ + orange + +

orange fruit

+
+
+ + banana + +

yellow fruit

+
+
+
+

Tight using tabs:

+ + + apple + +

red fruit

+
+
+ + orange + +

orange fruit

+
+
+ + banana + +

yellow fruit

+
+
+
+

Loose:

+ + + apple + +

red fruit

+
+
+ + orange + +

orange fruit

+
+
+ + banana + +

yellow fruit

+
+
+
+

Multiple blocks with italics:

+ + + apple + +

red fruit

+

contains seeds, crisp, pleasant to taste

+
+
+ + orange + +

orange fruit

+

+ { orange code block } +

+

+ +

orange block quote

+ +

+
+
+
+

Multiple definitions, tight:

+ + + apple + +

red fruit

+

computer

+
+
+ + orange + +

orange fruit

+

bank

+
+
+
+

Multiple definitions, loose:

+ + + apple + +

red fruit

+

computer

+
+
+ + orange + +

orange fruit

+

bank

+
+
+
+

Blank line after term, indented marker, alternate markers:

+ + + apple + +

red fruit

+

computer

+
+
+ + orange + +

orange fruit

+

+ + +

sublist

+ + +

sublist

+
+ +

+
+
+
+
+ + HTML Blocks +

Simple block on one line:

+ +

foo

+
+

And nested without indentation:

+ + + +

foo

+
+
+ +

bar

+
+
+

Interpreted markdown in a table:

+

This is emphasized

+

And this is strong

+

Here’s a simple block:

+ +

foo

+
+

This should be a code block, though:

+ <div> + foo +</div> +

As should this:

+ <div>foo</div> +

Now, nested:

+ + + +

foo

+
+
+
+

This should just be an HTML comment:

+

Multiline:

+

Code block:

+ <!-- Comment --> +

Just plain comment, with trailing spaces on the line:

+

Code:

+ <hr /> +

Hr’s:

+
+ + Inline Markup +

This is emphasized, and so is this.

+

This is strong, and so is this.

+

An emphasized + link.

+

This is strong and em.

+

So is this word.

+

This is strong and em.

+

So is this word.

+

This is code: >, $, + \, \$, + <html>.

+

This is strikeout.

+

Superscripts: abcd ahello + ahello there.

+

Subscripts: H2O, H23O, + Hmany of themO.

+

These should not be superscripts or subscripts, because of the unescaped + spaces: a^b c^d, a~b c~d.

+
+ + Smart quotes, ellipses, dashes +

“Hello,” said the spider. “‘Shelob’ is my name.”

+

‘A’, ‘B’, and ‘C’ are letters.

+

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

+

‘He said, “I want to go.”’ Were you alive in the 70’s?

+

Here is some quoted ‘code’ and a + “quoted + link”.

+

Some dashes: one—two — three—four — five.

+

Dashes between numbers: 5–7, 255–66, 1987–1999.

+

Ellipses…and…and….

+
+ + LaTeX + + +

+
+ +

+ + 2+2=4

+
+ +

+ + xy

+
+ +

+ + αω

+
+ +

+ + 223

+
+ +

+ + p-Tree

+
+ +

Here’s some display math: + + ddxf(x)=limh0f(x+h)f(x)h

+
+ +

Here’s one that has a line break in it: + + + α+ω×x2.

+
+
+

These shouldn’t be math:

+ + +

To get the famous equation, write + $e = mc^2$.

+
+ +

$22,000 is a lot of money. So is $34,000. (It worked + if “lot” is emphasized.)

+
+ +

Shoes ($20) and socks ($5).

+
+ +

Escaped $: $73 this should be + emphasized 23$.

+
+
+

Here’s a LaTeX table:

+
+ + Special Characters +

Here is some unicode:

+ + +

I hat: Î

+
+ +

o umlaut: ö

+
+ +

section: §

+
+ +

set membership: ∈

+
+ +

copyright: ©

+
+
+

AT&T has an ampersand in their name.

+

AT&T is another way to write it.

+

This & that.

+

4 < 5.

+

6 > 5.

+

Backslash: \

+

Backtick: `

+

Asterisk: *

+

Underscore: _

+

Left brace: {

+

Right brace: }

+

Left bracket: [

+

Right bracket: ]

+

Left paren: (

+

Right paren: )

+

Greater-than: >

+

Hash: #

+

Period: .

+

Bang: !

+

Plus: +

+

Minus: -

+
+ + Links + + Explicit +

Just a + URL.

+

URL + and title.

+

URL + and title.

+

URL + and title.

+

URL + and title

+

URL + and title

+

with_underscore

+

Email + link

+

Empty.

+
+ + Reference +

Foo + bar.

+

With embedded + [brackets].

+

b by itself + should be a link.

+

Indented + once.

+

Indented + twice.

+

Indented + thrice.

+

This should [not][] be a link.

+ [not]: /url +

Foo + bar.

+

Foo + biz.

+
+ + With ampersands +

Here’s a + link + with an ampersand in the URL.

+

Here’s a link with an amersand in the link text: + AT&T.

+

Here’s an + inline + link.

+

Here’s an + inline + link in pointy braces.

+
+ + Autolinks +

With an ampersand: + http://example.com/?foo=1&bar=2

+ + +

In a list?

+
+ +

http://example.com/

+
+ +

It should.

+
+
+

An e-mail address: nobody@nowhere.net

+ +

Blockquoted: + http://example.com/

+
+

Auto-links should not occur here: + <http://example.com/>

+ or here: <http://example.com/> +
+
+ + Images +

From “Voyage dans la Lune” by Georges Melies (1902):

+ +

lalune

+ +
+

Here is a movie + + icon.

+
+ + Footnotes +

Here is a footnote reference,1 and + another.2 This should + not be a footnote reference, because it contains a + space.[^my note] Here is an inline + note.3

+ +

Notes can go in quotes.4

+
+ + +

And in list items.5

+
+
+

This paragraph should not be part of the note, as it is not indented.

+
+ + + + +

Here is the footnote. It can go anywhere after the footnote reference. + It need not be placed at the end of the document.

+
+ +

Here’s the long note. This one contains multiple blocks.

+

Subsequent blocks are indented to show that they belong to the footnote + (as with list items).

+

+ { <code> } +

+

If you want, you can indent every line, but you can also be lazy and + just indent the first line of each block.

+
+ +

This is easier to type. Inline notes may contain + links + and ] verbatim characters, as well as [bracketed + text].

+
+ +

In quote.

+
+ +

In list.

+
+
+
+