Better smart quote parsing.
* Added stateLastStrPos to ParserState. This lets us keep track of whether we're parsing the position immediately after a 'str'. If we encounter a ' in such a location, it must be an apostrophe, and can't be a single quote start. * Set this in the markdown, textile, html, and rst str parsers. * Closes #360.
This commit is contained in:
parent
600c22e7bf
commit
925a4c5164
5 changed files with 22 additions and 4 deletions
|
@ -603,6 +603,7 @@ data ParserState = ParserState
|
|||
{ stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
|
||||
stateParserContext :: ParserContext, -- ^ Inside list?
|
||||
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
|
||||
stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
|
||||
stateKeys :: KeyTable, -- ^ List of reference keys
|
||||
stateCitations :: [String], -- ^ List of available citations
|
||||
stateNotes :: NoteTable, -- ^ List of notes
|
||||
|
@ -630,6 +631,7 @@ defaultParserState =
|
|||
ParserState { stateParseRaw = False,
|
||||
stateParserContext = NullState,
|
||||
stateQuoteContext = NoQuote,
|
||||
stateLastStrPos = Nothing,
|
||||
stateKeys = M.empty,
|
||||
stateCitations = [],
|
||||
stateNotes = [],
|
||||
|
@ -751,8 +753,12 @@ charOrRef cs =
|
|||
return c)
|
||||
|
||||
singleQuoteStart :: GenParser Char ParserState ()
|
||||
singleQuoteStart = do
|
||||
singleQuoteStart = do
|
||||
failIfInQuoteContext InSingleQuote
|
||||
pos <- getPosition
|
||||
st <- getState
|
||||
-- single quote start can't be right after str
|
||||
guard $ stateLastStrPos st /= Just pos
|
||||
try $ do charOrRef "'\8216\145"
|
||||
notFollowedBy (oneOf ")!],;:-? \t\n")
|
||||
notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
|
||||
|
|
|
@ -421,8 +421,12 @@ pTagContents =
|
|||
pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
|
||||
|
||||
pStr :: GenParser Char ParserState Inline
|
||||
pStr = liftM Str $ many1 $ satisfy $ \c ->
|
||||
not (isSpace c) && not (isSpecial c) && not (isBad c)
|
||||
pStr = do
|
||||
result <- many1 $ satisfy $ \c ->
|
||||
not (isSpace c) && not (isSpecial c) && not (isBad c)
|
||||
pos <- getPosition
|
||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
||||
return $ Str result
|
||||
|
||||
isSpecial :: Char -> Bool
|
||||
isSpecial '"' = True
|
||||
|
|
|
@ -1096,6 +1096,8 @@ str = do
|
|||
lookAhead alphaNum >> return '\x2019')
|
||||
-- for things like l'aide
|
||||
else mzero
|
||||
pos <- getPosition
|
||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
||||
let result = a:as
|
||||
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
||||
if smart
|
||||
|
|
|
@ -791,7 +791,11 @@ whitespace :: GenParser Char ParserState Inline
|
|||
whitespace = many1 spaceChar >> return Space <?> "whitespace"
|
||||
|
||||
str :: GenParser Char ParserState Inline
|
||||
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
|
||||
str = do
|
||||
result <- many1 (noneOf (specialChars ++ "\t\n "))
|
||||
pos <- getPosition
|
||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
||||
return $ Str result
|
||||
|
||||
-- an endline character that can be treated as a space, not a structural break
|
||||
endline :: GenParser Char ParserState Inline
|
||||
|
|
|
@ -436,6 +436,8 @@ str = do
|
|||
next <- lookAhead letter
|
||||
guard $ isLetter (last xs) || isLetter next
|
||||
return $ xs ++ "-"
|
||||
pos <- getPosition
|
||||
updateState $ \s -> s{ stateLastStrPos = Just pos }
|
||||
return $ Str result
|
||||
|
||||
-- | Textile allows HTML span infos, we discard them
|
||||
|
|
Loading…
Add table
Reference in a new issue