Markdown reader: handle curly quotes better.

Previously, curly quotes were just parsed literally, leading
to problems in some output formats.  Now they are parsed as
Quoted inlines, if --smart is specified.

Resolves Issue #270.
This commit is contained in:
John MacFarlane 2010-12-06 20:36:58 -08:00
parent 5a4609584c
commit 7864f30717
4 changed files with 25 additions and 17 deletions

View file

@ -71,7 +71,7 @@ setextHChars = "=-"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
specialChars = "\\[]*_~`<>$!^-.&@'\";"
specialChars = "\\[]*_~`<>$!^-.&@'\";\8216\8217\8220\8221"
--
-- auxiliary functions
@ -1076,10 +1076,11 @@ singleQuoted = try $ do
return . Quoted SingleQuote . normalizeSpaces
doubleQuoted :: GenParser Char ParserState Inline
doubleQuoted = try $ do
doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
return . Quoted DoubleQuote . normalizeSpaces
withQuoteContext InDoubleQuote $ do
contents <- manyTill inline doubleQuoteEnd
return . Quoted DoubleQuote . normalizeSpaces $ contents
failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
failIfInQuoteContext context = do
@ -1088,31 +1089,29 @@ failIfInQuoteContext context = do
then fail "already inside quotes"
else return ()
singleQuoteStart :: GenParser Char ParserState Char
singleQuoteStart :: GenParser Char ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
try $ do char '\''
try $ do oneOf "'\8216"
notFollowedBy (oneOf ")!],.;:-? \t\n")
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
satisfy (not . isAlphaNum)))
-- possess/contraction
return '\''
return ()
singleQuoteEnd :: GenParser Char st Char
singleQuoteEnd :: GenParser Char st ()
singleQuoteEnd = try $ do
char '\''
oneOf "'\8217"
notFollowedBy alphaNum
return '\''
doubleQuoteStart :: GenParser Char ParserState Char
doubleQuoteStart :: GenParser Char ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do char '"'
try $ do oneOf "\"\8220"
notFollowedBy (oneOf " \t\n")
return '"'
doubleQuoteEnd :: GenParser Char st Char
doubleQuoteEnd = char '"'
doubleQuoteEnd :: GenParser Char st ()
doubleQuoteEnd = oneOf "\"\8221" >> return ()
ellipses :: GenParser Char st Inline
ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses

View file

@ -96,7 +96,7 @@ main = do
"testsuite.txt" "testsuite.native"
r7 <- runTest "markdown reader (tables)" ["-r", "markdown", "-w", "native"]
"tables.txt" "tables.native"
r7a <- runTest "markdown reader (more)" ["-r", "markdown", "-w", "native"]
r7a <- runTest "markdown reader (more)" ["-r", "markdown", "-w", "native", "-S"]
"markdown-reader-more.txt" "markdown-reader-more.native"
r8 <- runTest "rst reader" ["-r", "rst", "-w", "native", "-s", "-S"]
"rst-reader.rst" "rst-reader.native"

View file

@ -45,4 +45,7 @@ Pandoc (Meta {docTitle = [Str "Title",Space,Str "spanning",Space,Str "multiple",
, Header 2 [Str "Case",Str "-",Str "insensitive",Space,Str "references"]
, Para [Link [Str "Fum"] ("/fum","")]
, Para [Link [Str "FUM"] ("/fum","")]
, Para [Link [Str "bat"] ("/bat","")] ]
, Para [Link [Str "bat"] ("/bat","")]
, Header 2 [Str "Curly",Space,Str "smart",Space,Str "quotes"]
, Para [Quoted DoubleQuote [Str "Hi"]]
, Para [Quoted SingleQuote [Str "Hi"]] ]

View file

@ -116,3 +116,9 @@ $\tuple{x,y}$
[fum]: /fum
[BAT]: /bat
## Curly smart quotes
“Hi”
Hi