Extend fix to #6719 to JATS reader

This commit is contained in:
John MacFarlane 2020-10-08 21:36:08 -07:00
parent f19286cf12
commit 2d4214fa31
2 changed files with 23 additions and 13 deletions

View file

@ -26,7 +26,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter, safeRead) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX) import Text.TeXMath (readMathML, writeTeX)
import Text.XML.Light import Text.XML.Light
import qualified Data.Set as S (fromList, member) import qualified Data.Set as S (fromList, member)
@ -460,14 +460,14 @@ parseInline (CRef ref) =
return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
parseInline (Elem e) = parseInline (Elem e) =
case qName (elName e) of case qName (elName e) of
"italic" -> emph <$> innerInlines "italic" -> innerInlines emph
"bold" -> strong <$> innerInlines "bold" -> innerInlines strong
"strike" -> strikeout <$> innerInlines "strike" -> innerInlines strikeout
"sub" -> subscript <$> innerInlines "sub" -> innerInlines subscript
"sup" -> superscript <$> innerInlines "sup" -> innerInlines superscript
"underline" -> underline <$> innerInlines "underline" -> innerInlines underline
"break" -> return linebreak "break" -> return linebreak
"sc" -> smallcaps <$> innerInlines "sc" -> innerInlines smallcaps
"code" -> codeWithLang "code" -> codeWithLang
"monospace" -> codeWithLang "monospace" -> codeWithLang
@ -477,14 +477,14 @@ parseInline (Elem e) =
qt <- gets jatsQuoteType qt <- gets jatsQuoteType
let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
modify $ \st -> st{ jatsQuoteType = qt' } modify $ \st -> st{ jatsQuoteType = qt' }
contents <- innerInlines contents <- innerInlines id
modify $ \st -> st{ jatsQuoteType = qt } modify $ \st -> st{ jatsQuoteType = qt }
return $ if qt == SingleQuote return $ if qt == SingleQuote
then singleQuoted contents then singleQuoted contents
else doubleQuoted contents else doubleQuoted contents
"xref" -> do "xref" -> do
ils <- innerInlines ils <- innerInlines id
let rid = attrValue "rid" e let rid = attrValue "rid" e
let rids = T.words rid let rids = T.words rid
let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e
@ -501,7 +501,7 @@ parseInline (Elem e) =
ils ils
else linkWith attr ("#" <> rid) "" ils else linkWith attr ("#" <> rid) "" ils
"ext-link" -> do "ext-link" -> do
ils <- innerInlines ils <- innerInlines id
let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just h -> T.pack h Just h -> T.pack h
@ -520,8 +520,8 @@ parseInline (Elem e) =
"uri" -> return $ link (textContent e) "" $ str $ textContent e "uri" -> return $ link (textContent e) "" $ str $ textContent e
"fn" -> note . mconcat <$> "fn" -> note . mconcat <$>
mapM parseBlock (elContent e) mapM parseBlock (elContent e)
_ -> innerInlines _ -> innerInlines id
where innerInlines = trimInlines . mconcat <$> where innerInlines f = extractSpaces f . mconcat <$>
mapM parseInline (elContent e) mapM parseInline (elContent e)
mathML x = mathML x =
case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of

View file

@ -6,3 +6,13 @@
^D ^D
[Para [Emph [Str "emphasized"],Space,Str "text"]] [Para [Emph [Str "emphasized"],Space,Str "text"]]
``` ```
```
% pandoc -f jats -t native
<p>
<italic> hi </italic>there
</p>
^D
[Para [Emph [Str "hi"],Space,Str "there"]]
```