Improve support for code language in JATS

This commit is contained in:
Hamish Mackenzie 2017-12-20 23:55:48 +13:00
parent 5d3c9e5646
commit d853571397
4 changed files with 59 additions and 21 deletions

View file

@ -15,6 +15,8 @@ import Text.Pandoc.Options
import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead) import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead)
import Text.TeXMath (readMathML, writeTeX) import Text.TeXMath (readMathML, writeTeX)
import Text.XML.Light import Text.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
type JATS m = StateT JATSState m type JATS m = StateT JATSState m
@ -98,8 +100,8 @@ instance HasMeta JATSState where
deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)} deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)}
isBlockElement :: Content -> Bool isBlockElement :: Content -> Bool
isBlockElement (Elem e) = qName (elName e) `elem` blocktags isBlockElement (Elem e) = qName (elName e) `S.member` blocktags
where blocktags = paragraphLevel ++ lists ++ mathML ++ other where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags
paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap", paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap",
"code", "fig", "fig-group", "graphic", "media", "preformat", "code", "fig", "fig-group", "graphic", "media", "preformat",
"supplementary-material", "table-wrap", "table-wrap-group", "supplementary-material", "table-wrap", "table-wrap-group",
@ -108,6 +110,21 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
mathML = ["tex-math", "mml:math"] mathML = ["tex-math", "mml:math"]
other = ["p", "related-article", "related-object", "ack", "disp-quote", other = ["p", "related-article", "related-object", "ack", "disp-quote",
"speech", "statement", "verse-group", "x"] "speech", "statement", "verse-group", "x"]
inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material",
"related-article", "related-object", "hr", "bold", "fixed-case",
"italic", "monospace", "overline", "overline-start", "overline-end",
"roman", "sans-serif", "sc", "strike", "underline", "underline-start",
"underline-end", "ruby", "alternatives", "inline-graphic", "private-char",
"chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev",
"milestone-end", "milestone-start", "named-content", "styled-content",
"fn", "target", "xref", "sub", "sup", "x", "address", "array",
"boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic",
"media", "preformat", "supplementary-material", "table-wrap",
"table-wrap-group", "disp-formula", "disp-formula-group",
"citation-alternatives", "element-citation", "mixed-citation",
"nlm-citation", "award-id", "funding-source", "open-access",
"def-list", "list", "ack", "disp-quote", "speech", "statement",
"verse-group"]
isBlockElement _ = False isBlockElement _ = False
-- Trim leading and trailing newline characters -- Trim leading and trailing newline characters

View file

@ -170,6 +170,28 @@ imageMimeType src kvs =
((drop 1 . dropWhile (/='/')) <$> mbMT) ((drop 1 . dropWhile (/='/')) <$> mbMT)
in (maintype, subtype) in (maintype, subtype)
languageFor :: [String] -> String
languageFor classes =
case langs of
(l:_) -> escapeStringForXML l
[] -> ""
where isLang l = map toLower l `elem` map (map toLower) languages
langsFrom s = if isLang s
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
codeAttr :: Attr -> (String, [(String, String)])
codeAttr (ident,classes,kvs) = (lang, attr)
where
attr = [("id",ident) | not (null ident)] ++
[("language",lang) | not (null lang)] ++
[(k,v) | (k,v) <- kvs, k `elem` ["code-type",
"code-version", "executable",
"language-version", "orientation",
"platforms", "position", "specific-use"]]
lang = languageFor classes
-- | Convert a Pandoc block element to JATS. -- | Convert a Pandoc block element to JATS.
blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc
blockToJATS _ Null = return empty blockToJATS _ Null = return empty
@ -233,23 +255,10 @@ blockToJATS opts (LineBlock lns) =
blockToJATS opts $ linesToPara lns blockToJATS opts $ linesToPara lns
blockToJATS opts (BlockQuote blocks) = blockToJATS opts (BlockQuote blocks) =
inTagsIndented "disp-quote" <$> blocksToJATS opts blocks inTagsIndented "disp-quote" <$> blocksToJATS opts blocks
blockToJATS _ (CodeBlock (ident,classes,kvs) str) = return $ blockToJATS _ (CodeBlock a str) = return $
inTags False tag attr (flush (text (escapeStringForXML str))) inTags False tag attr (flush (text (escapeStringForXML str)))
where attr = [("id",ident) | not (null ident)] ++ where (lang, attr) = codeAttr a
[("language",lang) | not (null lang)] ++ tag = if null lang then "preformat" else "code"
[(k,v) | (k,v) <- kvs, k `elem` ["code-type",
"code-version", "executable",
"language-version", "orientation",
"platforms", "position", "specific-use"]]
tag = if null lang then "preformat" else "code"
lang = case langs of
(l:_) -> escapeStringForXML l
[] -> ""
isLang l = map toLower l `elem` map (map toLower) languages
langsFrom s = if isLang s
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
blockToJATS _ (BulletList []) = return empty blockToJATS _ (BulletList []) = return empty
blockToJATS opts (BulletList lst) = blockToJATS opts (BulletList lst) =
inTags True "list" [("list-type", "bullet")] <$> inTags True "list" [("list-type", "bullet")] <$>
@ -346,8 +355,10 @@ inlineToJATS opts (Quoted SingleQuote lst) = do
inlineToJATS opts (Quoted DoubleQuote lst) = do inlineToJATS opts (Quoted DoubleQuote lst) = do
contents <- inlinesToJATS opts lst contents <- inlinesToJATS opts lst
return $ char '“' <> contents <> char '”' return $ char '“' <> contents <> char '”'
inlineToJATS _ (Code _ str) = inlineToJATS _ (Code a str) =
return $ inTagsSimple "monospace" $ text (escapeStringForXML str) return $ inTags False tag attr $ text (escapeStringForXML str)
where (lang, attr) = codeAttr a
tag = if null lang then "monospace" else "code"
inlineToJATS _ il@(RawInline f x) inlineToJATS _ il@(RawInline f x)
| f == "jats" = return $ text x | f == "jats" = return $ text x
| otherwise = do | otherwise = do

View file

@ -14,6 +14,11 @@ jats = purely $ readJATS def
tests :: [TestTree] tests :: [TestTree]
tests = [ testGroup "inline code" tests = [ testGroup "inline code"
[ test jats "basic" $ "<p>\n <monospace>@&amp;</monospace>\n</p>" =?> para (code "@&") [ test jats "basic" $ "<p>\n <monospace>@&amp;</monospace>\n</p>" =?> para (code "@&")
, test jats "lang" $ "<p>\n <code language=\"c\">@&amp;</code>\n</p>" =?> para (codeWith ("", ["c"], []) "@&")
]
, testGroup "block code"
[ test jats "basic" $ "<preformat>@&amp;</preformat>" =?> codeBlock "@&"
, test jats "lang" $ "<code language=\"c\">@&amp;</code>" =?> codeBlockWith ("", ["c"], []) "@&"
] ]
, testGroup "images" , testGroup "images"
[ test jats "basic" $ "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />" [ test jats "basic" $ "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"

View file

@ -31,6 +31,11 @@ infix 4 =:
tests :: [TestTree] tests :: [TestTree]
tests = [ testGroup "inline code" tests = [ testGroup "inline code"
[ "basic" =: code "@&" =?> "<p>\n <monospace>@&amp;</monospace>\n</p>" [ "basic" =: code "@&" =?> "<p>\n <monospace>@&amp;</monospace>\n</p>"
, "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p>\n <code language=\"c\">@&amp;</code>\n</p>"
]
, testGroup "block code"
[ "basic" =: codeBlock "@&" =?> "<preformat>@&amp;</preformat>"
, "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "<code language=\"c\">@&amp;</code>"
] ]
, testGroup "images" , testGroup "images"
[ "basic" =: [ "basic" =:
@ -38,7 +43,7 @@ tests = [ testGroup "inline code"
=?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />" =?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
] ]
, testGroup "inlines" , testGroup "inlines"
[ "Emphasis" =: emph ("emphasized") [ "Emphasis" =: emph "emphasized"
=?> "<p>\n <italic>emphasized</italic>\n</p>" =?> "<p>\n <italic>emphasized</italic>\n</p>"
] ]
, "bullet list" =: bulletList [ plain $ text "first" , "bullet list" =: bulletList [ plain $ text "first"