Parsing: Simplified dash and ellipsis.

This originated with @dubiousjim's observation in #1419
that there was a typo in the definition of enDash.
It returned an em dash character instead of an en dash.

I thought about why this had not been noticed before, and
realized that en dashes were just being parsed as regular
symbols.

That made me realize that, now that we no longer have
dedicate EnDash, EmDash, and Ellipses inline elements, as
we used to in pandoc, we no longer need to parse the
unicode characters specially.  This allowed a considerable
simplification of the code.

Partially resolves #1419.
This commit is contained in:
John MacFarlane 2014-07-12 22:59:35 -07:00
parent 4676bfdf82
commit 47a5f04761

View file

@ -169,7 +169,7 @@ import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec import Text.Parsec
import Text.Parsec.Pos (newPos) import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit, import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace ) isHexDigit, isSpace )
import Data.List ( intercalate, transpose ) import Data.List ( intercalate, transpose )
import Text.Pandoc.Shared import Text.Pandoc.Shared
@ -1124,48 +1124,21 @@ doubleQuoteEnd = void (charOrRef "\"\8221\148")
ellipses :: Stream s m Char ellipses :: Stream s m Char
=> ParserT s st m Inlines => ParserT s st m Inlines
ellipses = do ellipses = try (string "..." >> return (B.str "\8230"))
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
return (B.str "\8230")
dash :: Stream s m Char => ParserT s ParserState m Inlines dash :: (HasReaderOptions st, Stream s m Char)
dash = do => ParserT s st m Inlines
dash = try $ do
oldDashes <- getOption readerOldDashes oldDashes <- getOption readerOldDashes
if oldDashes if oldDashes
then emDashOld <|> enDashOld then do
else B.str <$> (hyphenDash <|> emDash <|> enDash) char '-'
(char '-' >> return (B.str "\8212"))
-- Two hyphens = en-dash, three = em-dash <|> (lookAhead digit >> return (B.str "\8211"))
hyphenDash :: Stream s m Char else do
=> ParserT s st m String string "--"
hyphenDash = do (char '-' >> return (B.str "\8212"))
try $ string "--" <|> return (B.str "\8211")
option "\8211" (char '-' >> return "\8212")
emDash :: Stream s m Char
=> ParserT s st m String
emDash = do
try (charOrRef "\8212\151")
return "\8212"
enDash :: Stream s m Char
=> ParserT s st m String
enDash = do
try (charOrRef "\8212\151")
return "\8211"
enDashOld :: Stream s m Char
=> ParserT s st m Inlines
enDashOld = do
try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '')
return (B.str "\8211")
emDashOld :: Stream s m Char
=> ParserT s st m Inlines
emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
return (B.str "\8212")
-- This is used to prevent exponential blowups for things like: -- This is used to prevent exponential blowups for things like:
-- a**a*a**a*a**a*a**a*a**a*a**a*a** -- a**a*a**a*a**a*a**a*a**a*a**a*a**