Parsing: Simplified dash and ellipsis.
This originated with @dubiousjim's observation in #1419 that there was a typo in the definition of enDash. It returned an em dash character instead of an en dash. I thought about why this had not been noticed before, and realized that en dashes were just being parsed as regular symbols. That made me realize that, now that we no longer have dedicate EnDash, EmDash, and Ellipses inline elements, as we used to in pandoc, we no longer need to parse the unicode characters specially. This allowed a considerable simplification of the code. Partially resolves #1419.
This commit is contained in:
parent
4676bfdf82
commit
47a5f04761
1 changed files with 13 additions and 40 deletions
|
@ -169,7 +169,7 @@ import Text.Pandoc.XML (fromEntities)
|
|||
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
|
||||
import Text.Parsec
|
||||
import Text.Parsec.Pos (newPos)
|
||||
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit,
|
||||
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
|
||||
isHexDigit, isSpace )
|
||||
import Data.List ( intercalate, transpose )
|
||||
import Text.Pandoc.Shared
|
||||
|
@ -1124,48 +1124,21 @@ doubleQuoteEnd = void (charOrRef "\"\8221\148")
|
|||
|
||||
ellipses :: Stream s m Char
|
||||
=> ParserT s st m Inlines
|
||||
ellipses = do
|
||||
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
|
||||
return (B.str "\8230")
|
||||
ellipses = try (string "..." >> return (B.str "\8230"))
|
||||
|
||||
dash :: Stream s m Char => ParserT s ParserState m Inlines
|
||||
dash = do
|
||||
dash :: (HasReaderOptions st, Stream s m Char)
|
||||
=> ParserT s st m Inlines
|
||||
dash = try $ do
|
||||
oldDashes <- getOption readerOldDashes
|
||||
if oldDashes
|
||||
then emDashOld <|> enDashOld
|
||||
else B.str <$> (hyphenDash <|> emDash <|> enDash)
|
||||
|
||||
-- Two hyphens = en-dash, three = em-dash
|
||||
hyphenDash :: Stream s m Char
|
||||
=> ParserT s st m String
|
||||
hyphenDash = do
|
||||
try $ string "--"
|
||||
option "\8211" (char '-' >> return "\8212")
|
||||
|
||||
emDash :: Stream s m Char
|
||||
=> ParserT s st m String
|
||||
emDash = do
|
||||
try (charOrRef "\8212\151")
|
||||
return "\8212"
|
||||
|
||||
enDash :: Stream s m Char
|
||||
=> ParserT s st m String
|
||||
enDash = do
|
||||
try (charOrRef "\8212\151")
|
||||
return "\8211"
|
||||
|
||||
enDashOld :: Stream s m Char
|
||||
=> ParserT s st m Inlines
|
||||
enDashOld = do
|
||||
try (charOrRef "\8211\150") <|>
|
||||
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
|
||||
return (B.str "\8211")
|
||||
|
||||
emDashOld :: Stream s m Char
|
||||
=> ParserT s st m Inlines
|
||||
emDashOld = do
|
||||
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
|
||||
return (B.str "\8212")
|
||||
then do
|
||||
char '-'
|
||||
(char '-' >> return (B.str "\8212"))
|
||||
<|> (lookAhead digit >> return (B.str "\8211"))
|
||||
else do
|
||||
string "--"
|
||||
(char '-' >> return (B.str "\8212"))
|
||||
<|> return (B.str "\8211")
|
||||
|
||||
-- This is used to prevent exponential blowups for things like:
|
||||
-- a**a*a**a*a**a*a**a*a**a*a**a*a**
|
||||
|
|
Loading…
Reference in a new issue