Don't recognize inline-markup starts inside words.

For example, "2*2 = 4*1" should not contain an emphasized
section.  Added test case for "Literal symbols".  Closes #569.
This commit is contained in:
John MacFarlane 2012-07-16 10:14:43 -07:00
parent f68b05e74b
commit 26748da6d4
3 changed files with 24 additions and 9 deletions

View file

@ -34,7 +34,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
import Control.Monad ( when, liftM )
import Control.Monad ( when, liftM, guard )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221"
specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221"
--
-- parsing documents
@ -831,12 +831,21 @@ code = try $ do
return $ Code nullAttr
$ removeLeadingTrailingSpace $ intercalate " " $ lines result
-- succeeds only if we're not right after a str (ie. in middle of word)
atStart :: GenParser Char ParserState a -> GenParser Char ParserState a
atStart p = do
pos <- getPosition
st <- getState
-- single quote start can't be right after str
guard $ stateLastStrPos st /= Just pos
p
emph :: GenParser Char ParserState Inline
emph = enclosed (char '*') (char '*') inline >>=
emph = enclosed (atStart $ char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
strong :: GenParser Char ParserState Inline
strong = enclosed (string "**") (try $ string "**") inline >>=
strong = enclosed (atStart $ string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
-- Parses inline interpreted text which is required to have the given role.
@ -856,7 +865,7 @@ interpreted role = try $ do
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-- but it should be good enough for most purposes
unmarkedInterpretedText = do
result <- enclosed (char '`') (char '`') anyChar
result <- enclosed (atStart $ char '`') (char '`') anyChar
return result
superscript :: GenParser Char ParserState Inline
@ -873,9 +882,9 @@ whitespace = many1 spaceChar >> return Space <?> "whitespace"
str :: GenParser Char ParserState Inline
str = do
result <- many1 (noneOf (specialChars ++ "\t\n "))
pos <- getPosition
updateState $ \s -> s{ stateLastStrPos = Just pos }
let strChar = noneOf ("\t\n " ++ specialChars)
result <- many1 strChar
updateLastStrPos
return $ Str result
-- an endline character that can be treated as a space, not a structural break

View file

@ -320,4 +320,6 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"]
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
,Para [Str "Reset",Space,Str "default",Str "-",Str "role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default",Str "."]
,Para [Str "And",Space,Str "now",Space,Str "`",Str "some",Str "-",Str "invalid",Str "-",Str "string",Str "-",Str "3231231",Str "`",Space,Str "is",Space,Str "nonsense",Str "."]]
,Para [Str "And",Space,Str "now",Space,Str "`",Str "some",Str "-",Str "invalid",Str "-",Str "string",Str "-",Str "3231231",Str "`",Space,Str "is",Space,Str "nonsense",Str "."]
,Header 2 [Str "Literal",Space,Str "symbols"]
,Para [Str "2",Str "*",Str "2",Space,Str "=",Space,Str "4",Str "*",Str "1"]]

View file

@ -593,3 +593,7 @@ Reset default-role to the default default.
And now `some-invalid-string-3231231` is nonsense.
Literal symbols
---------------
2*2 = 4*1