RST reader: improve parsing of inline interpreted text roles.
* Use a Span with class "title-reference" for the default title-reference role. * Use B.text to split up contents into Spaces, SoftBreaks, and Strs for title-reference. * Use Code with class "interpreted-text" instead of Span and Str for unknown roles. (The RST writer has also been modified to round-trip this properly.) * Disallow blank lines in interpreted text. * Backslash-escape now works in interpreted text. * Backticks followed by alphanumerics no longer end interpreted text. Closes #4811.
This commit is contained in:
6 changed files with 86 additions and 19 deletions
@ -37,7 +37,7 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
import Data.Char (isHexDigit, isSpace, toLower, toUpper)
import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
nub, sort, transpose, union)
import qualified Data.Map as M
@ -1385,7 +1385,6 @@ strong = B.strong . trimInlines . mconcat <$>
-- TODO:
-- - Classes are silently discarded in addNewRole
-- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
@ -1395,12 +1394,12 @@ interpretedRole = try $ do
renderRole :: PandocMonad m
=> String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ B.str contents
"superscript" -> return $ B.superscript $ B.str contents
"sub" -> return $ B.subscript $ B.str contents
"subscript" -> return $ B.subscript $ B.str contents
"emphasis" -> return $ B.emph $ B.str contents
"strong" -> return $ B.strong $ B.str contents
"sup" -> return $ B.superscript $ treatAsText contents
"superscript" -> return $ B.superscript $ treatAsText contents
"sub" -> return $ B.subscript $ treatAsText contents
"subscript" -> return $ B.subscript $ treatAsText contents
"emphasis" -> return $ B.emph $ treatAsText contents
"strong" -> return $ B.strong $ treatAsText contents
"rfc-reference" -> return $ rfcLink contents
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
@ -1411,7 +1410,7 @@ renderRole contents fmt role attr = case role of
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
"span" -> return $ B.spanWith attr $ B.str contents
"span" -> return $ B.spanWith attr $ treatAsText contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRoles <- stateRstCustomRoles <$> getState
@ -1419,14 +1418,20 @@ renderRole contents fmt role attr = case role of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
Nothing -> -- undefined role
return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
return $ B.codeWith ("",["interpreted-text"],[("role",role)])
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
rfcLink rfcNo = rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
where rfcUrl = "" ++ rfcNo ++ ".html"
pepLink pepNo = pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "" ++ padNo ++ "/"
treatAsText = B.text . handleEscapes
handleEscapes [] = []
handleEscapes ('\\':' ':cs) = handleEscapes cs
handleEscapes ('\\':c:cs) = c : handleEscapes cs
handleEscapes (c:cs) = c : handleEscapes cs
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
@ -1450,7 +1455,18 @@ roleAfter = try $ do
return (role,contents)
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
unmarkedInterpretedText = try $ do
atStart (char '`')
contents <- mconcat <$> (many1
( many1 (noneOf "`\\\n")
<|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
<|> (string "\n" <* notFollowedBy blankline)
<|> try (string "`" <*
notFollowedBy (() <$ roleMarker) <*
lookAhead (satisfy isAlphaNum))
char '`'
return contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = <$ skipMany1 spaceChar <?> "whitespace"
@ -574,13 +574,16 @@ inlineToRST (Quoted DoubleQuote lst) = do
else return $ "“" <> contents <> "”"
inlineToRST (Cite _ lst) =
writeInlines lst
inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do
return $ ":" <> text role <> ":`" <> text str <> "`"
inlineToRST (Code _ str) = do
opts <- gets stOptions
-- we trim the string because the delimiters must adjoin a
-- non-space character; see #3496
-- we use :literal: when the code contains backticks, since
-- :literal: allows backslash-escapes; see #3974
return $ if '`' `elem` str
return $
if '`' `elem` str
then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
else "``" <> text (trim str) <> "``"
inlineToRST (Str str) = do
@ -177,7 +177,7 @@ tests = [ "line block with blank line" =:
=: ".. role:: haskell(code)\n.. role:: lhs(haskell)\n\n:lhs:`text`"
=?> para (codeWith ("", ["lhs", "haskell", "sourceCode"], []) "text")
, "unknown role" =: ":unknown:`text`" =?>
para (spanWith ("",[],[("role","unknown")]) (str "text"))
para (codeWith ("",["interpreted-text"],[("role","unknown")]) "text")
, testGroup "footnotes"
[ "remove space before note" =: T.unlines
@ -1,6 +1,6 @@
% pandoc -f native -t rst
[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
@ -9,5 +9,5 @@
% pandoc -f rst -t native
[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
Normal file
Normal file
@ -0,0 +1,48 @@
No blank lines in inline interpreted roles:
% pandoc -f rst -t native
[Para [Str "`no"]
,Para [Str "blank`:myrole:"]]
Backslash escape behaves properly in interpreted roles:
% pandoc -f rst -t native
`hi\ there`:sup:
`hi\ there`:code:
[Para [Superscript [Str "hithere"]]
,Para [Code ("",["sourceCode"],[]) "hi\\ there"]]
Backtick followed by alphanumeric doesn't end the span:
% pandoc -f rst -t native
[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi`there"]]
Newline is okay, as long as not blank:
% pandoc -f rst -t native
[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi\nthere"]]
Use span for title-reference:
% pandoc -f rst -t native
[Para [Span ("",["title-ref"],[]) [Str "default"]]]
@ -326,7 +326,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"]
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."]
,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."]
,Para [Str "And",Space,Str "now",Space,Span ("",["title-ref"],[]) [Str "some-invalid-string-3231231"],Space,Str "is",Space,Str "nonsense."]
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."]
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["py","python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]
Add table
Reference in a new issue