Modified str parser to capture apostrophes in smart mode.
This solves a problem stemming from the fact that a parser doesn't know what came *before* in the input stream. Previously pandoc would parse D'oh l'*aide* as containing a single quoted "oh l", when both `'`s should be apostrophes. (Issue #360.) There are two issues here. (a) It is obvious that the first `'` is not an open quote, becaues of the preceding `D`. This patch solves the problem. (b) It is obvious to us that the second `'` is not an open quote, because we see that *aide* is some text. But getting a good algorithm that has good performance is a bit tricky. You can't assume that `'` followed by `*` is always an apostrophe: *'this is quoted'* This patch does not fix (b).
This commit is contained in:
parent
765a777582
commit
dd96267626
1 changed files with 9 additions and 2 deletions
|
@ -44,7 +44,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT
|
||||||
isTextTag, isCommentTag )
|
isTextTag, isCommentTag )
|
||||||
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
|
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
|
||||||
import Text.ParserCombinators.Parsec
|
import Text.ParserCombinators.Parsec
|
||||||
import Control.Monad (when, liftM, guard)
|
import Control.Monad (when, liftM, guard, mzero)
|
||||||
import Text.HTML.TagSoup
|
import Text.HTML.TagSoup
|
||||||
import Text.HTML.TagSoup.Match (tagOpen)
|
import Text.HTML.TagSoup.Match (tagOpen)
|
||||||
|
|
||||||
|
@ -1087,8 +1087,15 @@ nonEndline = satisfy (/='\n')
|
||||||
|
|
||||||
str :: GenParser Char ParserState Inline
|
str :: GenParser Char ParserState Inline
|
||||||
str = do
|
str = do
|
||||||
|
st <- getState
|
||||||
a <- alphaNum
|
a <- alphaNum
|
||||||
as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum)
|
as <- many $ alphaNum
|
||||||
|
<|> (try $ char '_' >>~ lookAhead alphaNum)
|
||||||
|
<|> if stateStrict st
|
||||||
|
then mzero
|
||||||
|
else (try $ char '\'' >> lookAhead alphaNum >> return '\x2019')
|
||||||
|
-- for things like l'aide - would be better to return
|
||||||
|
-- an Apostrophe, but we can't in this context
|
||||||
let result = a:as
|
let result = a:as
|
||||||
state <- getState
|
state <- getState
|
||||||
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
|
||||||
|
|
Loading…
Reference in a new issue