RST reader: improve parsing of inline interpreted text roles.

* Use a Span with class "title-reference" for the default
  title-reference role.
* Use B.text to split up contents into Spaces, SoftBreaks, and Strs
  for title-reference.
* Use Code with class "interpreted-text" instead of Span and Str for
  unknown roles.  (The RST writer has also been modified to round-trip
  this properly.)
* Disallow blank lines in interpreted text.
* Backslash-escape now works in interpreted text.
* Backticks followed by alphanumerics no longer end interpreted text.

Closes #4811.
This commit is contained in:
John MacFarlane 2018-08-05 09:15:06 -07:00
parent f7dc3e7487
commit 581a3514ca
6 changed files with 86 additions and 19 deletions

View file

@ -37,7 +37,7 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
import Data.Char (isHexDigit, isSpace, toLower, toUpper)
import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
nub, sort, transpose, union)
import qualified Data.Map as M
@ -1385,7 +1385,6 @@ strong = B.strong . trimInlines . mconcat <$>
--
-- TODO:
-- - Classes are silently discarded in addNewRole
-- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
@ -1395,12 +1394,12 @@ interpretedRole = try $ do
renderRole :: PandocMonad m
=> String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ B.str contents
"superscript" -> return $ B.superscript $ B.str contents
"sub" -> return $ B.subscript $ B.str contents
"subscript" -> return $ B.subscript $ B.str contents
"emphasis" -> return $ B.emph $ B.str contents
"strong" -> return $ B.strong $ B.str contents
"sup" -> return $ B.superscript $ treatAsText contents
"superscript" -> return $ B.superscript $ treatAsText contents
"sub" -> return $ B.subscript $ treatAsText contents
"subscript" -> return $ B.subscript $ treatAsText contents
"emphasis" -> return $ B.emph $ treatAsText contents
"strong" -> return $ B.strong $ treatAsText contents
"rfc-reference" -> return $ rfcLink contents
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
@ -1411,7 +1410,7 @@ renderRole contents fmt role attr = case role of
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
"span" -> return $ B.spanWith attr $ B.str contents
"span" -> return $ B.spanWith attr $ treatAsText contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRoles <- stateRstCustomRoles <$> getState
@ -1419,14 +1418,20 @@ renderRole contents fmt role attr = case role of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
Nothing -> -- undefined role
return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
return $ B.codeWith ("",["interpreted-text"],[("role",role)])
contents
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
treatAsText = B.text . handleEscapes
handleEscapes [] = []
handleEscapes ('\\':' ':cs) = handleEscapes cs
handleEscapes ('\\':c:cs) = c : handleEscapes cs
handleEscapes (c:cs) = c : handleEscapes cs
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
@ -1450,7 +1455,18 @@ roleAfter = try $ do
return (role,contents)
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
unmarkedInterpretedText = try $ do
atStart (char '`')
contents <- mconcat <$> (many1
( many1 (noneOf "`\\\n")
<|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
<|> (string "\n" <* notFollowedBy blankline)
<|> try (string "`" <*
notFollowedBy (() <$ roleMarker) <*
lookAhead (satisfy isAlphaNum))
))
char '`'
return contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"

View file

@ -574,15 +574,18 @@ inlineToRST (Quoted DoubleQuote lst) = do
else return $ "" <> contents <> ""
inlineToRST (Cite _ lst) =
writeInlines lst
inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do
return $ ":" <> text role <> ":`" <> text str <> "`"
inlineToRST (Code _ str) = do
opts <- gets stOptions
-- we trim the string because the delimiters must adjoin a
-- non-space character; see #3496
-- we use :literal: when the code contains backticks, since
-- :literal: allows backslash-escapes; see #3974
return $ if '`' `elem` str
then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
else "``" <> text (trim str) <> "``"
return $
if '`' `elem` str
then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
else "``" <> text (trim str) <> "``"
inlineToRST (Str str) = do
opts <- gets stOptions
return $ text $

View file

@ -177,7 +177,7 @@ tests = [ "line block with blank line" =:
=: ".. role:: haskell(code)\n.. role:: lhs(haskell)\n\n:lhs:`text`"
=?> para (codeWith ("", ["lhs", "haskell", "sourceCode"], []) "text")
, "unknown role" =: ":unknown:`text`" =?>
para (spanWith ("",[],[("role","unknown")]) (str "text"))
para (codeWith ("",["interpreted-text"],[("role","unknown")]) "text")
]
, testGroup "footnotes"
[ "remove space before note" =: T.unlines

View file

@ -1,6 +1,6 @@
```
% pandoc -f native -t rst
[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
^D
:foo:`text`
```
@ -9,5 +9,5 @@
% pandoc -f rst -t native
:foo:`text`
^D
[Para [Span ("",[],[("role","foo")]) [Str "text"]]]
[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]]
```

48
test/command/4811.md Normal file
View file

@ -0,0 +1,48 @@
No blank lines in inline interpreted roles:
```
% pandoc -f rst -t native
`no
blank`:myrole:
^D
[Para [Str "`no"]
,Para [Str "blank`:myrole:"]]
```
Backslash escape behaves properly in interpreted roles:
```
% pandoc -f rst -t native
`hi\ there`:sup:
`hi\ there`:code:
^D
[Para [Superscript [Str "hithere"]]
,Para [Code ("",["sourceCode"],[]) "hi\\ there"]]
```
Backtick followed by alphanumeric doesn't end the span:
```
% pandoc -f rst -t native
`hi`there`:myrole:
^D
[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi`there"]]
```
Newline is okay, as long as not blank:
```
% pandoc -f rst -t native
`hi
there`:myrole:
^D
[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi\nthere"]]
```
Use span for title-reference:
```
% pandoc -f rst -t native
`default`
^D
[Para [Span ("",["title-ref"],[]) [Str "default"]]]
```

View file

@ -326,7 +326,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"]
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."]
,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."]
,Para [Str "And",Space,Str "now",Space,Span ("",["title-ref"],[]) [Str "some-invalid-string-3231231"],Space,Str "is",Space,Str "nonsense."]
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."]
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["py","python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]