pandoc/test/Tests/Readers/JATS.hs
John MacFarlane 8ca191604d Add new unexported module T.P.XMLParser.
This exports functions that uses xml-conduit's parser to
produce an xml-light Element or [Content].  This allows
existing pandoc code to use a better parser without
much modification.

The new parser is used in all places where xml-light's
parser was previously used.  Benchmarks show a significant
performance improvement in parsing XML-based formats
(especially ODT and FB2).

Note that the xml-light types use String, so the
conversion from xml-conduit types involves a lot
of extra allocation.  It would be desirable to
avoid that in the future by gradually switching
to using xml-conduit directly. This can be done
module by module.

The new parser also reports errors, which we report
when possible.

A new constructor PandocXMLError has been added to
PandocError in T.P.Error [API change].

Closes #7091, which was the main stimulus.

These changes revealed the need for some changes
in the tests.  The docbook-reader.docbook test
lacked definitions for the entities it used; these
have been added. And the docx golden tests have been
updated, because the new parser does not preserve
the order of attributes.

Add entity defs to docbook-reader.docbook.

Update golden tests for docx.
2021-02-10 22:04:11 -08:00

145 lines
6 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Tests.Readers.JATS
Copyright : © 2017 Hamish Mackenzie
License : GNU GPL, version 2 or above
Maintainer : Hamish Mackenzie <Hamish.K.Mackenzie@googlemail.com>
Stability : alpha
Portability : portable
Tests for the JATS reader.
-}
module Tests.Readers.JATS (tests) where
import Data.Text (Text)
import Test.Tasty
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
import qualified Data.Text as T
jats :: Text -> Pandoc
jats = purely $ readJATS def
tests :: [TestTree]
tests = [ testGroup "inline code"
[ test jats "basic" $ "<p>\n <monospace>@&amp;</monospace>\n</p>" =?> para (code "@&")
, test jats "lang" $ "<p>\n <code language=\"c\">@&amp;</code>\n</p>" =?> para (codeWith ("", ["c"], []) "@&")
]
, testGroup "block code"
[ test jats "basic" $ "<preformat>@&amp;</preformat>" =?> codeBlock "@&"
, test jats "lang" $ "<code language=\"c\">@&amp;</code>" =?> codeBlockWith ("", ["c"], []) "@&"
]
, testGroup "images"
[ test jats "basic" $ "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
=?> para (image "/url" "title" mempty)
]
, test jats "bullet list" $
"<list list-type=\"bullet\">\n\
\ <list-item>\n\
\ <p>\n\
\ first\n\
\ </p>\n\
\ </list-item>\n\
\ <list-item>\n\
\ <p>\n\
\ second\n\
\ </p>\n\
\ </list-item>\n\
\ <list-item>\n\
\ <p>\n\
\ third\n\
\ </p>\n\
\ </list-item>\n\
\</list>"
=?> bulletList [ para $ text "first"
, para $ text "second"
, para $ text "third"
]
, testGroup "definition lists"
[ test jats "with internal link" $
"<def-list>\n\
\ <def-item>\n\
\ <term>\n\
\ <xref alt=\"testing\" rid=\"go\">testing</xref>\n\
\ </term>\n\
\ <def>\n\
\ <p>\n\
\ hi there\n\
\ </p>\n\
\ </def>\n\
\ </def-item>\n\
\</def-list>"
=?> definitionList [(link "#go" "" (str "testing"),
[para (text "hi there")])]
]
, testGroup "math"
[ test jats "escape |" $
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
\ <mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>\n\
\</p>"
=?> para (math "\\sigma|_{\\{x\\}}")
, test jats "tex-math only" $
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
\ </alternatives></inline-formula>\n\
\</p>"
=?> para (math "\\sigma|_{\\{x\\}}")
, test jats "math ml only" $
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>\n\
\</p>"
=?> para (math "\\sigma|_{\\{ x\\}}")
]
, testGroup "headers"
-- TODO fix footnotes in headers
-- [ test jats "unnumbered header" $
-- "<sec>\n\
-- \ <title>Header 1<fn>\n\
-- \ <p>\n\
-- \ note\n\
-- \ </p>\n\
-- \ </fn></title>\n\
-- \</sec>"
-- =?> header 1
-- (text "Header 1" <> note (plain $ text "note"))
[ test jats "unnumbered sub header" $
"<sec id=\"foo\">\n\
\ <title>Header</title>\n\
\ <sec id=\"foo2\">\n\
\ <title>Sub-Header</title>\n\
\ </sec>\n\
\</sec>"
=?> headerWith ("foo", [], []) 1
(text "Header")
<> headerWith ("foo2", [], []) 2
(text "Sub-Header")
, test jats "containing image" $
"<sec>\n\
\ <title><inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" /></title>\n\
\</sec>"
=?> header 1 (image "imgs/foo.jpg" "" mempty)
]
, testGroup "metadata"
[ test jats "abstract" $
T.unlines [ "<front>"
, "<article-meta>"
, "<abstract>"
, "<p>Paragraph 1</p>"
, "<p>Paragraph 2</p>"
, "</abstract>"
, "</article-meta>"
, "</front>"
] =?>
let abstract = para "Paragraph 1" <> para "Paragraph 2"
in setMeta "abstract" abstract $ doc mempty
]
]