RST reader: improve parsing of inline interpreted text roles.
* Use a Span with class "title-reference" for the default title-reference role. * Use B.text to split up contents into Spaces, SoftBreaks, and Strs for title-reference. * Use Code with class "interpreted-text" instead of Span and Str for unknown roles. (The RST writer has also been modified to round-trip this properly.) * Disallow blank lines in interpreted text. * Backslash-escape now works in interpreted text. * Backticks followed by alphanumerics no longer end interpreted text. Closes #4811.
This commit is contained in:
parent
f7dc3e7487
commit
581a3514ca
6 changed files with 86 additions and 19 deletions
|
@ -37,7 +37,7 @@ import Control.Arrow (second)
|
|||
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad.Identity (Identity (..))
|
||||
import Data.Char (isHexDigit, isSpace, toLower, toUpper)
|
||||
import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
|
||||
import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
|
||||
nub, sort, transpose, union)
|
||||
import qualified Data.Map as M
|
||||
|
@ -1385,7 +1385,6 @@ strong = B.strong . trimInlines . mconcat <$>
|
|||
--
|
||||
-- TODO:
|
||||
-- - Classes are silently discarded in addNewRole
|
||||
-- - Lacks sensible implementation for title-reference (which is the default)
|
||||
-- - Allows direct use of the :raw: role, rST only allows inherited use.
|
||||
interpretedRole :: PandocMonad m => RSTParser m Inlines
|
||||
interpretedRole = try $ do
|
||||
|
@ -1395,12 +1394,12 @@ interpretedRole = try $ do
|
|||
renderRole :: PandocMonad m
|
||||
=> String -> Maybe String -> String -> Attr -> RSTParser m Inlines
|
||||
renderRole contents fmt role attr = case role of
|
||||
"sup" -> return $ B.superscript $ B.str contents
|
||||
"superscript" -> return $ B.superscript $ B.str contents
|
||||
"sub" -> return $ B.subscript $ B.str contents
|
||||
"subscript" -> return $ B.subscript $ B.str contents
|
||||
"emphasis" -> return $ B.emph $ B.str contents
|
||||
"strong" -> return $ B.strong $ B.str contents
|
||||
"sup" -> return $ B.superscript $ treatAsText contents
|
||||
"superscript" -> return $ B.superscript $ treatAsText contents
|
||||
"sub" -> return $ B.subscript $ treatAsText contents
|
||||
"subscript" -> return $ B.subscript $ treatAsText contents
|
||||
"emphasis" -> return $ B.emph $ treatAsText contents
|
||||
"strong" -> return $ B.strong $ treatAsText contents
|
||||
"rfc-reference" -> return $ rfcLink contents
|
||||
"RFC" -> return $ rfcLink contents
|
||||
"pep-reference" -> return $ pepLink contents
|
||||
|
@ -1411,7 +1410,7 @@ renderRole contents fmt role attr = case role of
|
|||
"title" -> titleRef contents
|
||||
"t" -> titleRef contents
|
||||
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
|
||||
"span" -> return $ B.spanWith attr $ B.str contents
|
||||
"span" -> return $ B.spanWith attr $ treatAsText contents
|
||||
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
|
||||
custom -> do
|
||||
customRoles <- stateRstCustomRoles <$> getState
|
||||
|
@ -1419,14 +1418,20 @@ renderRole contents fmt role attr = case role of
|
|||
Just (newRole, newFmt, newAttr) ->
|
||||
renderRole contents newFmt newRole newAttr
|
||||
Nothing -> -- undefined role
|
||||
return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
|
||||
return $ B.codeWith ("",["interpreted-text"],[("role",role)])
|
||||
contents
|
||||
where
|
||||
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
|
||||
titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
|
||||
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
|
||||
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
|
||||
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
|
||||
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
|
||||
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
|
||||
treatAsText = B.text . handleEscapes
|
||||
handleEscapes [] = []
|
||||
handleEscapes ('\\':' ':cs) = handleEscapes cs
|
||||
handleEscapes ('\\':c:cs) = c : handleEscapes cs
|
||||
handleEscapes (c:cs) = c : handleEscapes cs
|
||||
|
||||
addClass :: String -> Attr -> Attr
|
||||
addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
|
||||
|
@ -1450,7 +1455,18 @@ roleAfter = try $ do
|
|||
return (role,contents)
|
||||
|
||||
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
|
||||
unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
|
||||
unmarkedInterpretedText = try $ do
|
||||
atStart (char '`')
|
||||
contents <- mconcat <$> (many1
|
||||
( many1 (noneOf "`\\\n")
|
||||
<|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
|
||||
<|> (string "\n" <* notFollowedBy blankline)
|
||||
<|> try (string "`" <*
|
||||
notFollowedBy (() <$ roleMarker) <*
|
||||
lookAhead (satisfy isAlphaNum))
|
||||
))
|
||||
char '`'
|
||||
return contents
|
||||
|
||||
whitespace :: PandocMonad m => RSTParser m Inlines
|
||||
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
|
||||
|
|
|
@ -574,15 +574,18 @@ inlineToRST (Quoted DoubleQuote lst) = do
|
|||
else return $ "“" <> contents <> "”"
|
||||
inlineToRST (Cite _ lst) =
|
||||
writeInlines lst
|
||||
inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do
|
||||
return $ ":" <> text role <> ":`" <> text str <> "`"
|
||||
inlineToRST (Code _ str) = do
|
||||
opts <- gets stOptions
|
||||
-- we trim the string because the delimiters must adjoin a
|
||||
-- non-space character; see #3496
|
||||
-- we use :literal: when the code contains backticks, since
|
||||
-- :literal: allows backslash-escapes; see #3974
|
||||
return $ if '`' `elem` str
|
||||
then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
|
||||
else "``" <> text (trim str) <> "``"
|
||||
return $
|
||||
if '`' `elem` str
|
||||
then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
|
||||
else "``" <> text (trim str) <> "``"
|
||||
inlineToRST (Str str) = do
|
||||
opts <- gets stOptions
|
||||
return $ text $
|
||||
|
|
|
@ -177,7 +177,7 @@ tests = [ "line block with blank line" =:
|
|||
=: ".. role:: haskell(code)\n.. role:: lhs(haskell)\n\n:lhs:`text`"
|
||||
=?> para (codeWith ("", ["lhs", "haskell", "sourceCode"], []) "text")
|
||||
, "unknown role" =: ":unknown:`text`" =?>
|
||||
para (spanWith ("",[],[("role","unknown")]) (str "text"))
|
||||
para (codeWith ("",["interpreted-text"],[("role","unknown")]) "text")
|
||||
]
|
||||
, testGroup "footnotes"
|
||||
[ "remove space before note" =: T.unlines
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
```
|
||||
% pandoc -f native -t rst
|
||||
[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
|
||||
[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
|
||||
^D
|
||||
:foo:`text`
|
||||
```
|
||||
|
@ -9,5 +9,5 @@
|
|||
% pandoc -f rst -t native
|
||||
:foo:`text`
|
||||
^D
|
||||
[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
|
||||
[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
|
||||
```
|
||||
|
|
48
test/command/4811.md
Normal file
48
test/command/4811.md
Normal file
|
@ -0,0 +1,48 @@
|
|||
No blank lines in inline interpreted roles:
|
||||
|
||||
```
|
||||
% pandoc -f rst -t native
|
||||
`no
|
||||
|
||||
blank`:myrole:
|
||||
^D
|
||||
[Para [Str "`no"]
|
||||
,Para [Str "blank`:myrole:"]]
|
||||
```
|
||||
|
||||
Backslash escape behaves properly in interpreted roles:
|
||||
|
||||
```
|
||||
% pandoc -f rst -t native
|
||||
`hi\ there`:sup:
|
||||
|
||||
`hi\ there`:code:
|
||||
^D
|
||||
[Para [Superscript [Str "hithere"]]
|
||||
,Para [Code ("",["sourceCode"],[]) "hi\\ there"]]
|
||||
```
|
||||
|
||||
Backtick followed by alphanumeric doesn't end the span:
|
||||
```
|
||||
% pandoc -f rst -t native
|
||||
`hi`there`:myrole:
|
||||
^D
|
||||
[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi`there"]]
|
||||
```
|
||||
|
||||
Newline is okay, as long as not blank:
|
||||
```
|
||||
% pandoc -f rst -t native
|
||||
`hi
|
||||
there`:myrole:
|
||||
^D
|
||||
[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi\nthere"]]
|
||||
```
|
||||
|
||||
Use span for title-reference:
|
||||
```
|
||||
% pandoc -f rst -t native
|
||||
`default`
|
||||
^D
|
||||
[Para [Span ("",["title-ref"],[]) [Str "default"]]]
|
||||
```
|
|
@ -326,7 +326,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
|
|||
,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"]
|
||||
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
|
||||
,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."]
|
||||
,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."]
|
||||
,Para [Str "And",Space,Str "now",Space,Span ("",["title-ref"],[]) [Str "some-invalid-string-3231231"],Space,Str "is",Space,Str "nonsense."]
|
||||
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
|
||||
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."]
|
||||
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["py","python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]
|
||||
|
|
Loading…
Reference in a new issue