From 26748da6d4fc6912f1ef314fb01f9969d8a5fc59 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jul 2012 10:14:43 -0700 Subject: [PATCH] Don't recognize inline-markup starts inside words. For example, "2*2 = 4*1" should not contain an emphasized section. Added test case for "Literal symbols". Closes #569. --- src/Text/Pandoc/Readers/RST.hs | 25 +++++++++++++++++-------- tests/rst-reader.native | 4 +++- tests/rst-reader.rst | 4 ++++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d1010a736..df063ffd5 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when, liftM ) +import Control.Monad ( when, liftM, guard ) import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) @@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221" +specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221" -- -- parsing documents @@ -831,12 +831,21 @@ code = try $ do return $ Code nullAttr $ removeLeadingTrailingSpace $ intercalate " " $ lines result +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: GenParser Char ParserState a -> GenParser Char ParserState a +atStart p = do + pos <- getPosition + st <- getState + -- single quote start can't be right after str + guard $ stateLastStrPos st /= Just pos + p + emph :: GenParser Char ParserState Inline -emph = enclosed (char '*') (char '*') inline >>= +emph = enclosed (atStart $ char '*') (char '*') inline >>= return . Emph . normalizeSpaces strong :: GenParser Char ParserState Inline -strong = enclosed (string "**") (try $ string "**") inline >>= +strong = enclosed (atStart $ string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces -- Parses inline interpreted text which is required to have the given role. @@ -856,7 +865,7 @@ interpreted role = try $ do -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes unmarkedInterpretedText = do - result <- enclosed (char '`') (char '`') anyChar + result <- enclosed (atStart $ char '`') (char '`') anyChar return result superscript :: GenParser Char ParserState Inline @@ -873,9 +882,9 @@ whitespace = many1 spaceChar >> return Space "whitespace" str :: GenParser Char ParserState Inline str = do - result <- many1 (noneOf (specialChars ++ "\t\n ")) - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + let strChar = noneOf ("\t\n " ++ specialChars) + result <- many1 strChar + updateLastStrPos return $ Str result -- an endline character that can be treated as a space, not a structural break diff --git a/tests/rst-reader.native b/tests/rst-reader.native index bf794c849..c99e73f07 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -320,4 +320,6 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ": ,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"] ,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."] ,Para [Str "Reset",Space,Str "default",Str "-",Str "role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default",Str "."] -,Para [Str "And",Space,Str "now",Space,Str "`",Str "some",Str "-",Str "invalid",Str "-",Str "string",Str "-",Str "3231231",Str "`",Space,Str "is",Space,Str "nonsense",Str "."]] +,Para [Str "And",Space,Str "now",Space,Str "`",Str "some",Str "-",Str "invalid",Str "-",Str "string",Str "-",Str "3231231",Str "`",Space,Str "is",Space,Str "nonsense",Str "."] +,Header 2 [Str "Literal",Space,Str "symbols"] +,Para [Str "2",Str "*",Str "2",Space,Str "=",Space,Str "4",Str "*",Str "1"]] diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst index abe6d4f69..4d81ccb85 100644 --- a/tests/rst-reader.rst +++ b/tests/rst-reader.rst @@ -593,3 +593,7 @@ Reset default-role to the default default. And now `some-invalid-string-3231231` is nonsense. +Literal symbols +--------------- + +2*2 = 4*1