From dd96267626e6f52e49c912cfe90380de2df469e5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Dec 2011 23:04:45 -0800 Subject: [PATCH] Modified str parser to capture apostrophes in smart mode. This solves a problem stemming from the fact that a parser doesn't know what came *before* in the input stream. Previously pandoc would parse D'oh l'*aide* as containing a single quoted "oh l", when both `'`s should be apostrophes. (Issue #360.) There are two issues here. (a) It is obvious that the first `'` is not an open quote, becaues of the preceding `D`. This patch solves the problem. (b) It is obvious to us that the second `'` is not an open quote, because we see that *aide* is some text. But getting a good algorithm that has good performance is a bit tricky. You can't assume that `'` followed by `*` is always an apostrophe: *'this is quoted'* This patch does not fix (b). --- src/Text/Pandoc/Readers/Markdown.hs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cc9b8a23d..b80d3871a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -44,7 +44,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT isTextTag, isCommentTag ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, guard) +import Control.Monad (when, liftM, guard, mzero) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) @@ -1087,8 +1087,15 @@ nonEndline = satisfy (/='\n') str :: GenParser Char ParserState Inline str = do + st <- getState a <- alphaNum - as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum) + as <- many $ alphaNum + <|> (try $ char '_' >>~ lookAhead alphaNum) + <|> if stateStrict st + then mzero + else (try $ char '\'' >> lookAhead alphaNum >> return '\x2019') + -- for things like l'aide - would be better to return + -- an Apostrophe, but we can't in this context let result = a:as state <- getState let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)