Merge pull request #4177 from stencila/jats-xml-reader

Add Basic JATS reader based on DocBook reader
This commit is contained in:
John MacFarlane 2017-12-21 23:16:03 -07:00 committed by GitHub
commit af04881655
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 2761 additions and 20 deletions

View file

@ -212,6 +212,7 @@ extra-source-files:
test/creole-reader.txt
test/creole-reader.native
test/rst-reader.rst
test/jats-reader.xml
test/s5-basic.html
test/s5-fancy.html
test/s5-fragment.html
@ -434,6 +435,7 @@ library
Text.Pandoc.Readers.RST,
Text.Pandoc.Readers.Org,
Text.Pandoc.Readers.DocBook,
Text.Pandoc.Readers.JATS,
Text.Pandoc.Readers.OPML,
Text.Pandoc.Readers.Textile,
Text.Pandoc.Readers.Native,
@ -623,6 +625,7 @@ test-suite test-pandoc
Tests.Shared
Tests.Readers.LaTeX
Tests.Readers.HTML
Tests.Readers.JATS
Tests.Readers.Markdown
Tests.Readers.Org
Tests.Readers.RST

View file

@ -52,6 +52,7 @@ module Text.Pandoc.Readers
, readOrg
, readLaTeX
, readHtml
, readJATS
, readTextile
, readDocBook
, readOPML
@ -84,7 +85,8 @@ import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.EPUB
import Text.Pandoc.Readers.Haddock
import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.JATS (readJATS)
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
@ -129,6 +131,7 @@ readers = [ ("native" , TextReader readNative)
,("org" , TextReader readOrg)
,("textile" , TextReader readTextile) -- TODO : textile+lhs
,("html" , TextReader readHtml)
,("jats" , TextReader readJATS)
,("latex" , TextReader readLaTeX)
,("haddock" , TextReader readHaddock)
,("twiki" , TextReader readTWiki)

View file

@ -0,0 +1,404 @@
{-# LANGUAGE ExplicitForAll, TupleSections #-}
module Text.Pandoc.Readers.JATS ( readJATS ) where
import Control.Monad.State.Strict
import Data.Char (isDigit, isSpace, toUpper)
import Data.Default
import Data.Generics
import Data.List (intersperse)
import Data.Maybe (maybeToList, fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead)
import Text.TeXMath (readMathML, writeTeX)
import Text.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
type JATS m = StateT JATSState m
data JATSState = JATSState{ jatsSectionLevel :: Int
, jatsQuoteType :: QuoteType
, jatsMeta :: Meta
, jatsAcceptsMeta :: Bool
, jatsBook :: Bool
, jatsFigureTitle :: Inlines
, jatsContent :: [Content]
} deriving Show
instance Default JATSState where
def = JATSState{ jatsSectionLevel = 0
, jatsQuoteType = DoubleQuote
, jatsMeta = mempty
, jatsAcceptsMeta = False
, jatsBook = False
, jatsFigureTitle = mempty
, jatsContent = [] }
readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readJATS _ inp = do
let tree = normalizeTree . parseXML
$ T.unpack $ crFilter inp
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
Text (CData CDataText (s1 ++ s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
Text (CData CDataText (s1 ++ convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
Text (CData CDataText (convertEntity r ++ s1) z):xs
go (CRef r1:CRef r2:xs) =
Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
go xs = xs
convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr =
fromMaybe "" . maybeAttrValue attr
maybeAttrValue :: String -> Element -> Maybe String
maybeAttrValue attr elt =
lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
-- convenience function
named :: String -> Element -> Bool
named s e = qName (elName e) == s
--
acceptingMetadata :: PandocMonad m => JATS m a -> JATS m a
acceptingMetadata p = do
modify (\s -> s { jatsAcceptsMeta = True } )
res <- p
modify (\s -> s { jatsAcceptsMeta = False })
return res
checkInMeta :: (PandocMonad m, Monoid a) => JATS m () -> JATS m a
checkInMeta p = do
accepts <- jatsAcceptsMeta <$> get
when accepts p
return mempty
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m ()
addMeta field val = modify (setMeta field val)
instance HasMeta JATSState where
setMeta field v s = s {jatsMeta = setMeta field v (jatsMeta s)}
deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)}
isBlockElement :: Content -> Bool
isBlockElement (Elem e) = qName (elName e) `S.member` blocktags
where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags
paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap",
"code", "fig", "fig-group", "graphic", "media", "preformat",
"supplementary-material", "table-wrap", "table-wrap-group",
"alternatives", "disp-formula", "disp-formula-group"]
lists = ["def-list", "list"]
mathML = ["tex-math", "mml:math"]
other = ["p", "related-article", "related-object", "ack", "disp-quote",
"speech", "statement", "verse-group", "x"]
inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material",
"related-article", "related-object", "hr", "bold", "fixed-case",
"italic", "monospace", "overline", "overline-start", "overline-end",
"roman", "sans-serif", "sc", "strike", "underline", "underline-start",
"underline-end", "ruby", "alternatives", "inline-graphic", "private-char",
"chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev",
"milestone-end", "milestone-start", "named-content", "styled-content",
"fn", "target", "xref", "sub", "sup", "x", "address", "array",
"boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic",
"media", "preformat", "supplementary-material", "table-wrap",
"table-wrap-group", "disp-formula", "disp-formula-group",
"citation-alternatives", "element-citation", "mixed-citation",
"nlm-citation", "award-id", "funding-source", "open-access",
"def-list", "list", "ack", "disp-quote", "speech", "statement",
"verse-group"]
isBlockElement _ = False
-- Trim leading and trailing newline characters
trimNl :: String -> String
trimNl = reverse . go . reverse . go
where go ('\n':xs) = xs
go xs = xs
-- function that is used by both graphic (in parseBlock)
-- and inline-graphic (in parseInline)
getGraphic :: PandocMonad m => Element -> JATS m Inlines
getGraphic e = do
let atVal a = attrValue a e
attr = (atVal "id", words $ atVal "role", [])
imageUrl = atVal "href"
captionOrLabel = case filterChild (\x -> named "caption" x
|| named "label" x) e of
Nothing -> return mempty
Just z -> mconcat <$>
mapM parseInline (elContent z)
figTitle <- gets jatsFigureTitle
let (caption, title) = if isNull figTitle
then (captionOrLabel, atVal "title")
else (return figTitle, "fig:")
fmap (imageWith attr imageUrl title) caption
getBlocks :: PandocMonad m => Element -> JATS m Blocks
getBlocks e = mconcat <$>
mapM parseBlock (elContent e)
parseBlock :: PandocMonad m => Content -> JATS m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
else return $ plain $ trimInlines $ text s
parseBlock (CRef x) = return $ plain $ str $ map toUpper x
parseBlock (Elem e) =
case qName (elName e) of
"p" -> parseMixed para (elContent e)
"code" -> codeBlockWithLang
"preformat" -> codeBlockWithLang
"disp-quote" -> parseBlockquote
"list" -> case attrValue "list-type" e of
"bullet" -> bulletList <$> listitems
listType -> do
let start = fromMaybe 1 $
(strContent <$> (filterElement (named "list-item") e
>>= filterElement (named "lable")))
>>= safeRead
orderedListWith (start, parseListStyleType listType, DefaultDelim)
<$> listitems
"def-list" -> definitionList <$> deflistitems
"sec" -> gets jatsSectionLevel >>= sect . (+1)
"title" -> return mempty
"title-group" -> checkInMeta getTitle
"graphic" -> para <$> getGraphic e
"journal-meta" -> metaBlock
"article-meta" -> metaBlock
"custom-meta" -> metaBlock
"table" -> parseTable
"fig" -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
"table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e
"caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6
"ref-list" -> divWith ("refs", [], []) <$> getBlocks e
"ref" -> divWith ("ref-" <> attrValue "id" e, [], []) <$> getBlocks e
"?xml" -> return mempty
_ -> getBlocks e
where parseMixed container conts = do
let (ils,rest) = break isBlockElement conts
ils' <- (trimInlines . mconcat) <$> mapM parseInline ils
let p = if ils' == mempty then mempty else container ils'
case rest of
[] -> return p
(r:rs) -> do
b <- parseBlock r
x <- parseMixed container rs
return $ p <> b <> x
codeBlockWithLang = do
let classes' = case attrValue "language" e of
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
$ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
Just z -> (para . (str "" <>) . mconcat)
<$>
mapM parseInline (elContent z)
contents <- getBlocks e
return $ blockQuote (contents <> attrib)
parseListStyleType "roman-lower" = LowerRoman
parseListStyleType "roman-upper" = UpperRoman
parseListStyleType "alpha-lower" = LowerAlpha
parseListStyleType "alpha-upper" = UpperAlpha
parseListStyleType _ = DefaultStyle
listitems = mapM getBlocks $ filterChildren (named "list-item") e
deflistitems = mapM parseVarListEntry $ filterChildren
(named "def-item") e
parseVarListEntry e' = do
let terms = filterChildren (named "term") e'
let items = filterChildren (named "def") e'
terms' <- mapM getInlines terms
items' <- mapM getBlocks items
return (mconcat $ intersperse (str "; ") terms', items')
getTitle = do
tit <- case filterChild (named "article-title") e of
Just s -> getInlines s
Nothing -> return mempty
subtit <- case filterChild (named "subtitle") e of
Just s -> (text ": " <>) <$>
getInlines s
Nothing -> return mempty
addMeta "title" (tit <> subtit)
parseTable = do
let isCaption x = named "title" x || named "caption" x
caption <- case filterChild isCaption e of
Just t -> getInlines t
Nothing -> return mempty
let e' = fromMaybe e $ filterChild (named "tgroup") e
let isColspec x = named "colspec" x || named "col" x
let colspecs = case filterChild (named "colgroup") e' of
Just c -> filterChildren isColspec c
_ -> filterChildren isColspec e'
let isRow x = named "row" x || named "tr" x
headrows <- case filterChild (named "thead") e' of
Just h -> case filterChild isRow h of
Just x -> parseRow x
Nothing -> return []
Nothing -> return []
bodyrows <- case filterChild (named "tbody") e' of
Just b -> mapM parseRow
$ filterChildren isRow b
Nothing -> mapM parseRow
$ filterChildren isRow e'
let toAlignment c = case findAttr (unqual "align") c of
Just "left" -> AlignLeft
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
let toWidth c = case findAttr (unqual "colwidth") c of
Just w -> fromMaybe 0
$ safeRead $ '0': filter (\x ->
isDigit x || x == '.') w
Nothing -> 0 :: Double
let numrows = case bodyrows of
[] -> 0
xs -> maximum $ map length xs
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
let widths = case colspecs of
[] -> replicate numrows 0
cs -> let ws = map toWidth cs
tot = sum ws
in if all (> 0) ws
then map (/ tot) ws
else replicate numrows 0
let headrows' = if null headrows
then replicate numrows mempty
else headrows
return $ table caption (zip aligns widths)
headrows' bodyrows
isEntry x = named "entry" x || named "td" x || named "th" x
parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
sect n = do isbook <- gets jatsBook
let n' = if isbook || n == 0 then n + 1 else n
headerText <- case filterChild (named "title") e `mplus`
(filterChild (named "info") e >>=
filterChild (named "title")) of
Just t -> getInlines t
Nothing -> return mempty
oldN <- gets jatsSectionLevel
modify $ \st -> st{ jatsSectionLevel = n }
b <- getBlocks e
let ident = attrValue "id" e
modify $ \st -> st{ jatsSectionLevel = oldN }
return $ headerWith (ident,[],[]) n' headerText <> b
-- lineItems = mapM getInlines $ filterChildren (named "line") e
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
getInlines :: PandocMonad m => Element -> JATS m Inlines
getInlines e' = (trimInlines . mconcat) <$>
mapM parseInline (elContent e')
strContentRecursive :: Element -> String
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
elementToStr :: Content -> Content
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> JATS m Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"italic" -> emph <$> innerInlines
"bold" -> strong <$> innerInlines
"strike" -> strikeout <$> innerInlines
"sub" -> subscript <$> innerInlines
"sup" -> superscript <$> innerInlines
"underline" -> underlineSpan <$> innerInlines
"break" -> return linebreak
"sc" -> smallcaps <$> innerInlines
"code" -> codeWithLang
"monospace" -> codeWithLang
"inline-graphic" -> getGraphic e
"disp-quote" -> do
qt <- gets jatsQuoteType
let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
modify $ \st -> st{ jatsQuoteType = qt' }
contents <- innerInlines
modify $ \st -> st{ jatsQuoteType = qt }
return $ if qt == SingleQuote
then singleQuoted contents
else doubleQuoted contents
"xref" -> do
ils <- innerInlines
let rid = attrValue "rid" e
let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e
let attr = (attrValue "id" e, [], maybeToList refType)
return $ linkWith attr ('#' : rid) "" ils
"ext-link" -> do
ils <- innerInlines
let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just h -> h
_ -> '#' : attrValue "rid" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, [], [])
return $ linkWith attr href title ils'
"disp-formula" -> formula displayMath
"inline-formula" -> formula math
"math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
"tex-math" -> return . math $ strContent e
"email" -> return $ link ("mailto:" ++ strContent e) ""
$ str $ strContent e
"uri" -> return $ link (strContent e) "" $ str $ strContent e
"fn" -> (note . mconcat) <$>
mapM parseBlock (elContent e)
-- Note: this isn't a real docbook tag; it's what we convert
-- <?asciidor-br?> to in handleInstructions, above. A kludge to
-- work around xml-light's inability to parse an instruction.
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
mapM parseInline (elContent e)
mathML x =
case readMathML . showElement $ everywhere (mkT removePrefix) x of
Left _ -> mempty
Right m -> writeTeX m
formula constructor = do
let whereToLook = fromMaybe e $ filterElement (named "alternatives") e
texMaths = map strContent $
filterChildren (named "tex-math") whereToLook
mathMLs = map mathML $
filterChildren isMathML whereToLook
return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
isMathML x = qName (elName x) == "math" &&
qPrefix (elName x) == Just "mml"
removePrefix elname = elname { qPrefix = Nothing }
codeWithLang = do
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e

View file

@ -170,6 +170,28 @@ imageMimeType src kvs =
((drop 1 . dropWhile (/='/')) <$> mbMT)
in (maintype, subtype)
languageFor :: [String] -> String
languageFor classes =
case langs of
(l:_) -> escapeStringForXML l
[] -> ""
where isLang l = map toLower l `elem` map (map toLower) languages
langsFrom s = if isLang s
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
codeAttr :: Attr -> (String, [(String, String)])
codeAttr (ident,classes,kvs) = (lang, attr)
where
attr = [("id",ident) | not (null ident)] ++
[("language",lang) | not (null lang)] ++
[(k,v) | (k,v) <- kvs, k `elem` ["code-type",
"code-version", "executable",
"language-version", "orientation",
"platforms", "position", "specific-use"]]
lang = languageFor classes
-- | Convert a Pandoc block element to JATS.
blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc
blockToJATS _ Null = return empty
@ -233,23 +255,10 @@ blockToJATS opts (LineBlock lns) =
blockToJATS opts $ linesToPara lns
blockToJATS opts (BlockQuote blocks) =
inTagsIndented "disp-quote" <$> blocksToJATS opts blocks
blockToJATS _ (CodeBlock (ident,classes,kvs) str) = return $
blockToJATS _ (CodeBlock a str) = return $
inTags False tag attr (flush (text (escapeStringForXML str)))
where attr = [("id",ident) | not (null ident)] ++
[("language",lang) | not (null lang)] ++
[(k,v) | (k,v) <- kvs, k `elem` ["code-type",
"code-version", "executable",
"language-version", "orientation",
"platforms", "position", "specific-use"]]
tag = if null lang then "preformat" else "code"
lang = case langs of
(l:_) -> escapeStringForXML l
[] -> ""
isLang l = map toLower l `elem` map (map toLower) languages
langsFrom s = if isLang s
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
where (lang, attr) = codeAttr a
tag = if null lang then "preformat" else "code"
blockToJATS _ (BulletList []) = return empty
blockToJATS opts (BulletList lst) =
inTags True "list" [("list-type", "bullet")] <$>
@ -349,8 +358,10 @@ inlineToJATS opts (Quoted SingleQuote lst) = do
inlineToJATS opts (Quoted DoubleQuote lst) = do
contents <- inlinesToJATS opts lst
return $ char '“' <> contents <> char '”'
inlineToJATS _ (Code _ str) =
return $ inTagsSimple "monospace" $ text (escapeStringForXML str)
inlineToJATS _ (Code a str) =
return $ inTags False tag attr $ text (escapeStringForXML str)
where (lang, attr) = codeAttr a
tag = if null lang then "monospace" else "code"
inlineToJATS _ il@(RawInline f x)
| f == "jats" = return $ text x
| otherwise = do

View file

@ -79,6 +79,8 @@ tests = [ testGroup "markdown"
]
, testGroup "jats"
[ testGroup "writer" $ writerTests "jats"
, test "reader" ["-r", "jats", "-w", "native", "-s"]
"jats-reader.xml" "jats-reader.native"
]
, testGroup "native"
[ testGroup "writer" $ writerTests "native"

116
test/Tests/Readers/JATS.hs Normal file
View file

@ -0,0 +1,116 @@
{-# LANGUAGE OverloadedStrings #-}
module Tests.Readers.JATS (tests) where
import Data.Text (Text)
import Test.Tasty
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
jats :: Text -> Pandoc
jats = purely $ readJATS def
tests :: [TestTree]
tests = [ testGroup "inline code"
[ test jats "basic" $ "<p>\n <monospace>@&amp;</monospace>\n</p>" =?> para (code "@&")
, test jats "lang" $ "<p>\n <code language=\"c\">@&amp;</code>\n</p>" =?> para (codeWith ("", ["c"], []) "@&")
]
, testGroup "block code"
[ test jats "basic" $ "<preformat>@&amp;</preformat>" =?> codeBlock "@&"
, test jats "lang" $ "<code language=\"c\">@&amp;</code>" =?> codeBlockWith ("", ["c"], []) "@&"
]
, testGroup "images"
[ test jats "basic" $ "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
=?> para (image "/url" "title" mempty)
]
, test jats "bullet list" $
"<list list-type=\"bullet\">\n\
\ <list-item>\n\
\ <p>\n\
\ first\n\
\ </p>\n\
\ </list-item>\n\
\ <list-item>\n\
\ <p>\n\
\ second\n\
\ </p>\n\
\ </list-item>\n\
\ <list-item>\n\
\ <p>\n\
\ third\n\
\ </p>\n\
\ </list-item>\n\
\</list>"
=?> bulletList [ para $ text "first"
, para $ text "second"
, para $ text "third"
]
, testGroup "definition lists"
[ test jats "with internal link" $
"<def-list>\n\
\ <def-item>\n\
\ <term>\n\
\ <xref alt=\"testing\" rid=\"go\">testing</xref>\n\
\ </term>\n\
\ <def>\n\
\ <p>\n\
\ hi there\n\
\ </p>\n\
\ </def>\n\
\ </def-item>\n\
\</def-list>"
=?> definitionList [(link "#go" "" (str "testing"),
[para (text "hi there")])]
]
, testGroup "math"
[ test jats "escape |" $
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
\ <mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>\n\
\</p>"
=?> para (math "\\sigma|_{\\{x\\}}")
, test jats "tex-math only" $
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
\</p>"
=?> para (math "\\sigma|_{\\{x\\}}")
, test jats "math ml only" $
"<p>\n\
\ <inline-formula><alternatives>\n\
\ <mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>\n\
\</p>"
=?> para (math "\\sigma|_{\\{ x\\}}")
]
, testGroup "headers"
-- TODO fix footnotes in headers
-- [ test jats "unnumbered header" $
-- "<sec>\n\
-- \ <title>Header 1<fn>\n\
-- \ <p>\n\
-- \ note\n\
-- \ </p>\n\
-- \ </fn></title>\n\
-- \</sec>"
-- =?> header 1
-- (text "Header 1" <> note (plain $ text "note"))
[ test jats "unnumbered sub header" $
"<sec id=\"foo\">\n\
\ <title>Header</title>\n\
\ <sec id=\"foo2\">\n\
\ <title>Sub-Header</title>\n\
\ </sec>\n\
\</sec>"
=?> headerWith ("foo", [], []) 1
(text "Header")
<> headerWith ("foo2", [], []) 2
(text "Sub-Header")
, test jats "containing image" $
"<sec>\n\
\ <title><inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" /></title>\n\
\</sec>"
=?> header 1 (image "imgs/foo.jpg" "" mempty)
]
]

View file

@ -31,6 +31,11 @@ infix 4 =:
tests :: [TestTree]
tests = [ testGroup "inline code"
[ "basic" =: code "@&" =?> "<p>\n <monospace>@&amp;</monospace>\n</p>"
, "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p>\n <code language=\"c\">@&amp;</code>\n</p>"
]
, testGroup "block code"
[ "basic" =: codeBlock "@&" =?> "<preformat>@&amp;</preformat>"
, "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "<code language=\"c\">@&amp;</code>"
]
, testGroup "images"
[ "basic" =:
@ -38,7 +43,7 @@ tests = [ testGroup "inline code"
=?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
]
, testGroup "inlines"
[ "Emphasis" =: emph ("emphasized")
[ "Emphasis" =: emph "emphasized"
=?> "<p>\n <italic>emphasized</italic>\n</p>"
]
, "bullet list" =: bulletList [ plain $ text "first"

422
test/jats-reader.native Normal file
View file

@ -0,0 +1,422 @@
Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",SoftBreak,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
,Header 1 ("headers",[],[]) [Str "Headers"]
,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")]
,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]]
,Header 4 ("level-4",[],[]) [Str "Level",Space,Str "4"]
,Header 5 ("level-5",[],[]) [Str "Level",Space,Str "5"]
,Header 1 ("level-1",[],[]) [Str "Level",Space,Str "1"]
,Header 2 ("level-2-with-emphasis",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Emph [Str "emphasis"]]
,Header 3 ("level-3",[],[]) [Str "Level",Space,Str "3"]
,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"]
,Header 2 ("level-2",[],[]) [Str "Level",Space,Str "2"]
,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"]
,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"]
,Para [Str "Here's",Space,Str "a",Space,Str "regular",Space,Str "paragraph."]
,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",SoftBreak,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",SoftBreak,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."]
,Para [Str "Here's",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."]
,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Str "here."]
,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"]
,Para [Str "E-mail",Space,Str "style:"]
,BlockQuote
[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]]
,BlockQuote
[Para [Str "Code",Space,Str "in",Space,Str "a",Space,Str "block",Space,Str "quote:"]
,CodeBlock ("",[],[]) "sub status {\n print \"working\";\n}"
,Para [Str "A",Space,Str "list:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "item",Space,Str "one"]]
,[Para [Str "item",Space,Str "two"]]]
,Para [Str "Nested",Space,Str "block",Space,Str "quotes:"]
,BlockQuote
[Para [Str "nested"]]
,BlockQuote
[Para [Str "nested"]]]
,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "a",Space,Str "block",Space,Str "quote:",Space,Str "2",Space,Str ">",Space,Str "1."]
,Para [Str "Box-style:"]
,BlockQuote
[Para [Str "Example:"]
,CodeBlock ("",[],[]) "sub status {\n print \"working\";\n}"]
,BlockQuote
[OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "do",Space,Str "laundry"]]
,[Para [Str "take",Space,Str "out",Space,Str "the",Space,Str "trash"]]]]
,Para [Str "Here's",Space,Str "a",Space,Str "nested",Space,Str "one:"]
,BlockQuote
[Para [Str "Joe",Space,Str "said:"]
,BlockQuote
[Para [Str "Don't",Space,Str "quote",Space,Str "me."]]]
,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."]
,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"]
,Para [Str "Code:"]
,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab"
,Para [Str "And:"]
,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{"
,Header 1 ("lists",[],[]) [Str "Lists"]
,Header 2 ("unordered",[],[]) [Str "Unordered"]
,Para [Str "Asterisks",Space,Str "tight:"]
,BulletList
[[Para [Str "asterisk",Space,Str "1"]]
,[Para [Str "asterisk",Space,Str "2"]]
,[Para [Str "asterisk",Space,Str "3"]]]
,Para [Str "Asterisks",Space,Str "loose:"]
,BulletList
[[Para [Str "asterisk",Space,Str "1"]]
,[Para [Str "asterisk",Space,Str "2"]]
,[Para [Str "asterisk",Space,Str "3"]]]
,Para [Str "Pluses",Space,Str "tight:"]
,BulletList
[[Para [Str "Plus",Space,Str "1"]]
,[Para [Str "Plus",Space,Str "2"]]
,[Para [Str "Plus",Space,Str "3"]]]
,Para [Str "Pluses",Space,Str "loose:"]
,BulletList
[[Para [Str "Plus",Space,Str "1"]]
,[Para [Str "Plus",Space,Str "2"]]
,[Para [Str "Plus",Space,Str "3"]]]
,Para [Str "Minuses",Space,Str "tight:"]
,BulletList
[[Para [Str "Minus",Space,Str "1"]]
,[Para [Str "Minus",Space,Str "2"]]
,[Para [Str "Minus",Space,Str "3"]]]
,Para [Str "Minuses",Space,Str "loose:"]
,BulletList
[[Para [Str "Minus",Space,Str "1"]]
,[Para [Str "Minus",Space,Str "2"]]
,[Para [Str "Minus",Space,Str "3"]]]
,Header 2 ("ordered",[],[]) [Str "Ordered"]
,Para [Str "Tight:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "First"]]
,[Para [Str "Second"]]
,[Para [Str "Third"]]]
,Para [Str "and:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "One"]]
,[Para [Str "Two"]]
,[Para [Str "Three"]]]
,Para [Str "Loose",Space,Str "using",Space,Str "tabs:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "First"]]
,[Para [Str "Second"]]
,[Para [Str "Third"]]]
,Para [Str "and",Space,Str "using",Space,Str "spaces:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "One"]]
,[Para [Str "Two"]]
,[Para [Str "Three"]]]
,Para [Str "Multiple",Space,Str "paragraphs:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "Item",Space,Str "1,",Space,Str "graf",Space,Str "one."]
,Para [Str "Item",Space,Str "1.",Space,Str "graf",Space,Str "two.",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",SoftBreak,Str "dog's",Space,Str "back."]]
,[Para [Str "Item",Space,Str "2."]]
,[Para [Str "Item",Space,Str "3."]]]
,Para [Str "List",Space,Str "styles:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[]
,OrderedList (1,LowerRoman,DefaultDelim)
[]
,Header 2 ("nested",[],[]) [Str "Nested"]
,BulletList
[[Para [Str "Tab"]
,BulletList
[[Para [Str "Tab"]
,BulletList
[[Para [Str "Tab"]]]]]]]
,Para [Str "Here's",Space,Str "another:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "First"]]
,[Para [Str "Second:"]
,BulletList
[[Para [Str "Fee"]]
,[Para [Str "Fie"]]
,[Para [Str "Foe"]]]]
,[Para [Str "Third"]]]
,Para [Str "Same",Space,Str "thing",Space,Str "but",Space,Str "with",Space,Str "paragraphs:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "First"]]
,[Para [Str "Second:"]
,BulletList
[[Para [Str "Fee"]]
,[Para [Str "Fie"]]
,[Para [Str "Foe"]]]]
,[Para [Str "Third"]]]
,Header 2 ("tabs-and-spaces",[],[]) [Str "Tabs",Space,Str "and",Space,Str "spaces"]
,BulletList
[[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]]
,[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"]
,BulletList
[[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]]
,[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"]]]]]
,Header 2 ("fancy-list-markers",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"]
,Para [Str "Autonumbering:"]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "Autonumber."]]
,[Para [Str "More."]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Para [Str "Nested."]]]]]
,Header 2 ("definition",[],[]) [Str "Definition"]
,DefinitionList
[([Str "Violin"],
[[Para [Str "Stringed",Space,Str "musical",Space,Str "instrument."]
,Para [Str "Torture",Space,Str "device."]]])
,([Str "Cello",LineBreak,Str "Violoncello"],
[[Para [Str "Low-voiced",Space,Str "stringed",Space,Str "instrument."]]])]
,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"]
,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",SoftBreak,Str "this"],Str "."]
,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",SoftBreak,Strong [Str "is",Space,Str "this"],Str "."]
,Para [Str "Empty",Space,Strong [],Space,Str "and",Space,Emph [],Str "."]
,Para [Str "An",SoftBreak,Emph [Link ("",[],[]) [Str "emphasized",SoftBreak,Str "link"] ("/url","")],Str "."]
,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]]
,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."]
,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]]
,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."]
,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",SoftBreak,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",SoftBreak,Code ("",[],[]) "<html>",Str "."]
,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "small",Space,Str "caps"],Str "."]
,Para [Str "These",Space,Str "are",Space,Str "all",Space,Str "underlined:",Space,Str "foo",Space,Str "and",Space,Str "bar."]
,Para [Str "These",Space,Str "are",Space,Str "all",Space,Str "strikethrough:",Space,Strikeout [Str "foo"],Str ",",SoftBreak,Strikeout [Str "bar"],Str ",",Space,Str "and",Space,Strikeout [Str "baz"],Str "."]
,Header 1 ("smart-quotes-ellipses-dashes",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"]
,Para [Str "\"Hello,\"",Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Str "\"'Shelob'",Space,Str "is",Space,Str "my",Space,Str "name.\""]
,Para [Str "'A',",Space,Str "'B',",Space,Str "and",Space,Str "'C'",Space,Str "are",Space,Str "letters."]
,Para [Str "'Oak,'",Space,Str "'elm,'",Space,Str "and",Space,Str "'beech'",Space,Str "are",Space,Str "names",Space,Str "of",Space,Str "trees.",Space,Str "So",Space,Str "is",Space,Str "'pine.'"]
,Para [Str "'He",Space,Str "said,",Space,Str "\"I",Space,Str "want",Space,Str "to",Space,Str "go.\"'",Space,Str "Were",Space,Str "you",Space,Str "alive",Space,Str "in",Space,Str "the",Space,Str "70's?"]
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "quoted",Space,Str "'",Code ("",[],[]) "code",Str "'",Space,Str "and",Space,Str "a",SoftBreak,Str "\"",Link ("",[],[]) [Str "quoted",SoftBreak,Str "link"] ("http://example.com/?foo=1&bar=2",""),Str "\"."]
,Para [Str "Some",Space,Str "dashes:",Space,Str "one---two",Space,Str "---",Space,Str "three--four",Space,Str "--",Space,Str "five."]
,Para [Str "Dashes",Space,Str "between",Space,Str "numbers:",Space,Str "5-7,",Space,Str "255-66,",Space,Str "1987-1999."]
,Para [Str "Ellipses...and.",Space,Str ".",Space,Str ".and",Space,Str ".",Space,Str ".",Space,Str ".",Space,Str "."]
,Header 1 ("latex",[],[]) [Str "LaTeX"]
,BulletList
[[Para [Str "\\cite[22-23]{smith.1899}"]]
,[Para [Str "\\doublespacing"]]
,[Para [Str "$2+2=4$"]]
,[Para [Str "$x",Space,Str "\\in",Space,Str "y$"]]
,[Para [Str "$\\alpha",Space,Str "\\wedge",Space,Str "\\omega$"]]
,[Para [Str "$223$"]]
,[Para [Str "$p$-Tree"]]
,[Para [Str "$\\frac{d}{dx}f(x)=\\lim_{h\\to",Space,Str "0}\\frac{f(x+h)-f(x)}{h}$"]]
,[Para [Str "Here's",Space,Str "one",Space,Str "that",Space,Str "has",Space,Str "a",Space,Str "line",Space,Str "break",Space,Str "in",Space,Str "it:",Space,Str "$\\alpha",Space,Str "+",Space,Str "\\omega",Space,Str "\\times",SoftBreak,Str "x^2$."]]]
,Para [Str "These",Space,Str "shouldn't",Space,Str "be",Space,Str "math:"]
,BulletList
[[Para [Str "To",Space,Str "get",Space,Str "the",Space,Str "famous",Space,Str "equation,",Space,Str "write",SoftBreak,Code ("",[],[]) "$e = mc^2$",Str "."]]
,[Para [Str "$22,000",Space,Str "is",Space,Str "a",Space,Emph [Str "lot"],Space,Str "of",Space,Str "money.",Space,Str "So",Space,Str "is",Space,Str "$34,000.",Space,Str "(It",SoftBreak,Str "worked",Space,Str "if",Space,Str "\"lot\"",Space,Str "is",Space,Str "emphasized.)"]]
,[Para [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",SoftBreak,Str "emphasized"],Space,Str "23$."]]]
,Para [Str "Here's",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"]
,Para [Str "\\begin{tabular}{|l|l|}\\hline",Space,Str "Animal",Space,Str "&",Space,Str "Number",Space,Str "\\\\",Space,Str "\\hline",Space,Str "Dog",Space,Str "&",SoftBreak,Str "2",Space,Str "\\\\",Space,Str "Cat",Space,Str "&",Space,Str "1",Space,Str "\\\\",Space,Str "\\hline",Space,Str "\\end{tabular}"]
,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"]
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"]
,BulletList
[[Para [Str "I",Space,Str "hat:",Space,Str "\206"]]
,[Para [Str "o",Space,Str "umlaut:",Space,Str "\246"]]
,[Para [Str "section:",Space,Str "\167"]]
,[Para [Str "set",Space,Str "membership:",Space,Str "elem"]]
,[Para [Str "copyright:",Space,Str "\169"]]]
,Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name."]
,Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it."]
,Para [Str "This",Space,Str "&",Space,Str "that."]
,Para [Str "4",Space,Str "<",Space,Str "5."]
,Para [Str "6",Space,Str ">",Space,Str "5."]
,Para [Str "Backslash:",Space,Str "\\"]
,Para [Str "Backtick:",Space,Str "`"]
,Para [Str "Asterisk:",Space,Str "*"]
,Para [Str "Underscore:",Space,Str "_"]
,Para [Str "Left",Space,Str "brace:",Space,Str "{"]
,Para [Str "Right",Space,Str "brace:",Space,Str "}"]
,Para [Str "Left",Space,Str "bracket:",Space,Str "["]
,Para [Str "Right",Space,Str "bracket:",Space,Str "]"]
,Para [Str "Left",Space,Str "paren:",Space,Str "("]
,Para [Str "Right",Space,Str "paren:",Space,Str ")"]
,Para [Str "Greater-than:",Space,Str ">"]
,Para [Str "Hash:",Space,Str "#"]
,Para [Str "Period:",Space,Str "."]
,Para [Str "Bang:",Space,Str "!"]
,Para [Str "Plus:",Space,Str "+"]
,Para [Str "Minus:",Space,Str "-"]
,Header 1 ("links",[],[]) [Str "Links"]
,Header 2 ("explicit",[],[]) [Str "Explicit"]
,Para [Str "Just",Space,Str "a",SoftBreak,Link ("",[],[]) [Str "URL"] ("/url/",""),Str "."]
,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title"),Str "."]
,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title preceded by two spaces"),Str "."]
,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title preceded by a tab"),Str "."]
,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title with \"quotes\" in it")]
,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title with single quotes")]
,Para [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere.net)"]
,Para [Link ("",[],[]) [Str "Empty"] ("",""),Str "."]
,Header 2 ("reference",[],[]) [Str "Reference"]
,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/",""),Str "."]
,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/",""),Str "."]
,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/",""),Str "."]
,Para [Str "With",Space,Link ("",[],[]) [Str "embedded",SoftBreak,Str "[brackets]"] ("/url/",""),Str "."]
,Para [Link ("",[],[]) [Str "b"] ("/url/",""),Space,Str "by",SoftBreak,Str "itself",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "link."]
,Para [Str "Indented",SoftBreak,Link ("",[],[]) [Str "once"] ("/url",""),Str "."]
,Para [Str "Indented",SoftBreak,Link ("",[],[]) [Str "twice"] ("/url",""),Str "."]
,Para [Str "Indented",SoftBreak,Link ("",[],[]) [Str "thrice"] ("/url",""),Str "."]
,Para [Str "This",Space,Str "should",Space,Str "[not]",Space,Str "be",Space,Str "a",Space,Str "link."]
,CodeBlock ("",[],[]) "[not]: /url"
,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/","Title with \"quotes\" inside"),Str "."]
,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "biz"] ("/url/","Title with \"quote\" inside"),Str "."]
,Header 2 ("with-ampersands",[],[]) [Str "With",Space,Str "ampersands"]
,Para [Str "Here's",Space,Str "a",SoftBreak,Link ("",[],[]) [Str "link",SoftBreak,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] ("http://example.com/?foo=1&bar=2",""),Str "."]
,Para [Str "Here's",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",SoftBreak,Link ("",[],[]) [Str "AT&T"] ("http://att.com/","AT&T"),Str "."]
,Para [Str "Here's",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "inline",SoftBreak,Str "link"] ("/script?foo=1&bar=2",""),Str "."]
,Para [Str "Here's",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "inline",SoftBreak,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."]
,Header 2 ("autolinks",[],[]) [Str "Autolinks"]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",SoftBreak,Link ("",[],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,BulletList
[[Para [Str "In",Space,Str "a",Space,Str "list?"]]
,[Para [Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Para [Str "It",Space,Str "should."]]]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere.net"]
,BlockQuote
[Para [Str "Blockquoted:",SoftBreak,Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",SoftBreak,Code ("",[],[]) "<http://example.com/>"]
,CodeBlock ("",[],[]) "or here: <http://example.com/>"
,Header 1 ("images",[],[]) [Str "Images"]
,Para [Str "From",Space,Str "\"Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune\"",Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"]
,Para [Image ("",[],[]) [] ("lalune.jpg","Voyage dans la Lune")]
,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",SoftBreak,Image ("",[],[]) [] ("movie.jpg",""),SoftBreak,Str "icon."]
,Header 1 ("footnotes",[],[]) [Str "Footnotes"]
,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference",Link ("",[],[]) [Str "(1)"] ("#note_1",""),Str ",",SoftBreak,Str "and",SoftBreak,Str "another",Link ("",[],[]) [Str "(longnote)"] ("#note_longnote",""),Str ".",SoftBreak,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",SoftBreak,Str "contains",Space,Str "a",Space,Str "space^(my",Space,Str "note)."]
,Para [Link ("",[],[]) [Str "(1)"] ("#ref_1",""),Space,Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",SoftBreak,Str "go",Space,Str "anywhere",Space,Str "in",Space,Str "the",Space,Str "document,",Space,Str "not",Space,Str "just",Space,Str "at",Space,Str "the",Space,Str "end."]
,Para [Link ("",[],[]) [Str "(longnote)"] ("#ref_longnote",""),Space,Str "Here's",SoftBreak,Str "the",Space,Str "other",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."]
,Para [Str "Caret",Space,Str "characters",Space,Str "are",Space,Str "used",Space,Str "to",Space,Str "indicate",Space,Str "that",Space,Str "the",Space,Str "blocks",Space,Str "all",Space,Str "belong",Space,Str "to",SoftBreak,Str "a",Space,Str "single",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "block",Space,Str "quotes)."]
,CodeBlock ("",[],[]) " { <code> }"
,Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "use",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "every",Space,Str "line,",Space,Str "as",SoftBreak,Str "with",Space,Str "blockquotes,",Space,Str "but",Space,Str "all",Space,Str "that",Space,Str "you",Space,Str "need",Space,Str "is",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",SoftBreak,Str "of",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "the",Space,Str "block",Space,Str "and",Space,Str "any",Space,Str "preceding",Space,Str "blank",Space,Str "lines."]
,Para [Str "text",Space,Emph [Str "Leading",Space,Str "space"]]
,Para [Emph [Str "Trailing",Space,Str "space"],Space,Str "text"]
,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]]
,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]
,Header 1 ("tables",[],[]) [Str "Tables"]
,Header 2 ("tables-with-headers",[],[]) [Str "Tables",Space,Str "with",Space,Str "Headers"]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[Para [Str "X"]]
,[Para [Str "Y"]]
,[Para [Str "Z"]]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Header 2 ("tables-without-headers",[],[]) [Str "Tables",Space,Str "without",Space,Str "Headers"]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[]
,[]
,[]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[]
,[]
,[]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[]
,[]
,[]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0]
[[]
,[]
,[]]
[[[Para [Str "1"]]
,[Para [Str "2"]]
,[Para [Str "3"]]]
,[[Para [Str "4"]]
,[Para [Str "5"]]
,[Para [Str "6"]]]]
,Header 2 ("empty-tables",[],[]) [Str "Empty",Space,Str "Tables"]
,Para [Str "This",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "empty."]]

1773
test/jats-reader.xml Normal file

File diff suppressed because it is too large Load diff

View file

@ -11,6 +11,7 @@ import qualified Tests.Readers.Creole
import qualified Tests.Readers.Docx
import qualified Tests.Readers.EPUB
import qualified Tests.Readers.HTML
import qualified Tests.Readers.JATS
import qualified Tests.Readers.LaTeX
import qualified Tests.Readers.Markdown
import qualified Tests.Readers.Muse
@ -61,6 +62,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
[ testGroup "LaTeX" Tests.Readers.LaTeX.tests
, testGroup "Markdown" Tests.Readers.Markdown.tests
, testGroup "HTML" Tests.Readers.HTML.tests
, testGroup "JATS" Tests.Readers.JATS.tests
, testGroup "Org" Tests.Readers.Org.tests
, testGroup "RST" Tests.Readers.RST.tests
, testGroup "Docx" Tests.Readers.Docx.tests