From 581a3514ca266e20e70f05d9ffe314515a0a7bb9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 5 Aug 2018 09:15:06 -0700 Subject: [PATCH] RST reader: improve parsing of inline interpreted text roles. * Use a Span with class "title-reference" for the default title-reference role. * Use B.text to split up contents into Spaces, SoftBreaks, and Strs for title-reference. * Use Code with class "interpreted-text" instead of Span and Str for unknown roles. (The RST writer has also been modified to round-trip this properly.) * Disallow blank lines in interpreted text. * Backslash-escape now works in interpreted text. * Backticks followed by alphanumerics no longer end interpreted text. Closes #4811. --- src/Text/Pandoc/Readers/RST.hs | 40 +++++++++++++++++++--------- src/Text/Pandoc/Writers/RST.hs | 9 ++++--- test/Tests/Readers/RST.hs | 2 +- test/command/3407.md | 4 +-- test/command/4811.md | 48 ++++++++++++++++++++++++++++++++++ test/rst-reader.native | 2 +- 6 files changed, 86 insertions(+), 19 deletions(-) create mode 100644 test/command/4811.md diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index f9752a83c..576c3b77c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -37,7 +37,7 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper) +import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, nub, sort, transpose, union) import qualified Data.Map as M @@ -1385,7 +1385,6 @@ strong = B.strong . trimInlines . mconcat <$> -- -- TODO: -- - Classes are silently discarded in addNewRole --- - Lacks sensible implementation for title-reference (which is the default) -- - Allows direct use of the :raw: role, rST only allows inherited use. interpretedRole :: PandocMonad m => RSTParser m Inlines interpretedRole = try $ do @@ -1395,12 +1394,12 @@ interpretedRole = try $ do renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of - "sup" -> return $ B.superscript $ B.str contents - "superscript" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "subscript" -> return $ B.subscript $ B.str contents - "emphasis" -> return $ B.emph $ B.str contents - "strong" -> return $ B.strong $ B.str contents + "sup" -> return $ B.superscript $ treatAsText contents + "superscript" -> return $ B.superscript $ treatAsText contents + "sub" -> return $ B.subscript $ treatAsText contents + "subscript" -> return $ B.subscript $ treatAsText contents + "emphasis" -> return $ B.emph $ treatAsText contents + "strong" -> return $ B.strong $ treatAsText contents "rfc-reference" -> return $ rfcLink contents "RFC" -> return $ rfcLink contents "pep-reference" -> return $ pepLink contents @@ -1411,7 +1410,7 @@ renderRole contents fmt role attr = case role of "title" -> titleRef contents "t" -> titleRef contents "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents - "span" -> return $ B.spanWith attr $ B.str contents + "span" -> return $ B.spanWith attr $ treatAsText contents "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents custom -> do customRoles <- stateRstCustomRoles <$> getState @@ -1419,14 +1418,20 @@ renderRole contents fmt role attr = case role of Just (newRole, newFmt, newAttr) -> renderRole contents newFmt newRole newAttr Nothing -> -- undefined role - return $ B.spanWith ("",[],[("role",role)]) (B.str contents) + return $ B.codeWith ("",["interpreted-text"],[("role",role)]) + contents where - titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour + titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) where padNo = replicate (4 - length pepNo) '0' ++ pepNo pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + treatAsText = B.text . handleEscapes + handleEscapes [] = [] + handleEscapes ('\\':' ':cs) = handleEscapes cs + handleEscapes ('\\':c:cs) = c : handleEscapes cs + handleEscapes (c:cs) = c : handleEscapes cs addClass :: String -> Attr -> Attr addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues) @@ -1450,7 +1455,18 @@ roleAfter = try $ do return (role,contents) unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] -unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar +unmarkedInterpretedText = try $ do + atStart (char '`') + contents <- mconcat <$> (many1 + ( many1 (noneOf "`\\\n") + <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n")) + <|> (string "\n" <* notFollowedBy blankline) + <|> try (string "`" <* + notFollowedBy (() <$ roleMarker) <* + lookAhead (satisfy isAlphaNum)) + )) + char '`' + return contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar "whitespace" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index f355a8f5b..566bcbeef 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -574,15 +574,18 @@ inlineToRST (Quoted DoubleQuote lst) = do else return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = writeInlines lst +inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do + return $ ":" <> text role <> ":`" <> text str <> "`" inlineToRST (Code _ str) = do opts <- gets stOptions -- we trim the string because the delimiters must adjoin a -- non-space character; see #3496 -- we use :literal: when the code contains backticks, since -- :literal: allows backslash-escapes; see #3974 - return $ if '`' `elem` str - then ":literal:`" <> text (escapeString opts (trim str)) <> "`" - else "``" <> text (trim str) <> "``" + return $ + if '`' `elem` str + then ":literal:`" <> text (escapeString opts (trim str)) <> "`" + else "``" <> text (trim str) <> "``" inlineToRST (Str str) = do opts <- gets stOptions return $ text $ diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs index 540c5d45a..8916eed6f 100644 --- a/test/Tests/Readers/RST.hs +++ b/test/Tests/Readers/RST.hs @@ -177,7 +177,7 @@ tests = [ "line block with blank line" =: =: ".. role:: haskell(code)\n.. role:: lhs(haskell)\n\n:lhs:`text`" =?> para (codeWith ("", ["lhs", "haskell", "sourceCode"], []) "text") , "unknown role" =: ":unknown:`text`" =?> - para (spanWith ("",[],[("role","unknown")]) (str "text")) + para (codeWith ("",["interpreted-text"],[("role","unknown")]) "text") ] , testGroup "footnotes" [ "remove space before note" =: T.unlines diff --git a/test/command/3407.md b/test/command/3407.md index 3160d1263..aec253ff5 100644 --- a/test/command/3407.md +++ b/test/command/3407.md @@ -1,6 +1,6 @@ ``` % pandoc -f native -t rst -[Para [Span ("",[],[("role","foo")]) [Str "text"]]] +[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]] ^D :foo:`text` ``` @@ -9,5 +9,5 @@ % pandoc -f rst -t native :foo:`text` ^D -[Para [Span ("",[],[("role","foo")]) [Str "text"]]] +[Para [Code ("",["interpreted-text"],[("role","foo")]) "text"]] ``` diff --git a/test/command/4811.md b/test/command/4811.md new file mode 100644 index 000000000..9c8bea7ce --- /dev/null +++ b/test/command/4811.md @@ -0,0 +1,48 @@ +No blank lines in inline interpreted roles: + +``` +% pandoc -f rst -t native +`no + +blank`:myrole: +^D +[Para [Str "`no"] +,Para [Str "blank`:myrole:"]] +``` + +Backslash escape behaves properly in interpreted roles: + +``` +% pandoc -f rst -t native +`hi\ there`:sup: + +`hi\ there`:code: +^D +[Para [Superscript [Str "hithere"]] +,Para [Code ("",["sourceCode"],[]) "hi\\ there"]] +``` + +Backtick followed by alphanumeric doesn't end the span: +``` +% pandoc -f rst -t native +`hi`there`:myrole: +^D +[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi`there"]] +``` + +Newline is okay, as long as not blank: +``` +% pandoc -f rst -t native +`hi +there`:myrole: +^D +[Para [Code ("",["interpreted-text"],[("role","myrole")]) "hi\nthere"]] +``` + +Use span for title-reference: +``` +% pandoc -f rst -t native +`default` +^D +[Para [Span ("",["title-ref"],[]) [Str "default"]]] +``` diff --git a/test/rst-reader.native b/test/rst-reader.native index b0e51bd3f..89dde7396 100644 --- a/test/rst-reader.native +++ b/test/rst-reader.native @@ -326,7 +326,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"] ,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."] ,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."] -,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."] +,Para [Str "And",Space,Str "now",Space,Span ("",["title-ref"],[]) [Str "some-invalid-string-3231231"],Space,Str "is",Space,Str "nonsense."] ,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "inline HTML",Str "."] ,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."] ,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["py","python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]