New treatment of dashes in --smart mode.
* `---` is always em-dash, `--` is always en-dash. * pandoc no longer tries to guess when `-` should be en-dash. * A new option, `--old-dashes`, is provided for legacy documents. Rationale: The rules for en-dash are too complex and language-dependent for a guesser to work reliably. This change gives users greater control. The alternative of using unicode isn't very good, since unicode em- and en- dashes are barely distinguishable in a monospace font.
This commit is contained in:
parent
3cf60c7306
commit
da8425598a
6 changed files with 56 additions and 15 deletions
16
README
16
README
|
@ -206,11 +206,17 @@ Options
|
|||
|
||||
`-S`, `--smart`
|
||||
: Produce typographically correct output, converting straight quotes
|
||||
to curly quotes, `---` and `--` to dashes, and `...` to ellipses.
|
||||
Nonbreaking spaces are inserted after certain abbreviations, such
|
||||
as "Mr." (Note: This option is significant only when the input format is
|
||||
`markdown` or `textile`. It is selected automatically when the input
|
||||
format is `textile` or the output format is `latex` or `context`.)
|
||||
to curly quotes, `---` to em-dashes, `--` to en-dashes, and
|
||||
`...` to ellipses. Nonbreaking spaces are inserted after certain
|
||||
abbreviations, such as "Mr." (Note: This option is significant only when
|
||||
the input format is `markdown` or `textile`. It is selected automatically
|
||||
when the input format is `textile` or the output format is `latex` or
|
||||
`context`.)
|
||||
|
||||
`--old-dashes`
|
||||
: Selects the pandoc <= 1.8.2.1 behavior for parsing smart dashes: `-` before
|
||||
a numeral is an en-dash, and `--` is an em-dash. This option is selected
|
||||
automatically for `textile` input.
|
||||
|
||||
`-5`, `--html5`
|
||||
: Produce HTML5 instead of HTML4. This option has no effect for writers
|
||||
|
|
|
@ -614,6 +614,9 @@ data ParserState = ParserState
|
|||
stateDate :: [Inline], -- ^ Date of document
|
||||
stateStrict :: Bool, -- ^ Use strict markdown syntax?
|
||||
stateSmart :: Bool, -- ^ Use smart typography?
|
||||
stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior
|
||||
-- in parsing dashes; -- is em-dash;
|
||||
-- before numeral is en-dash
|
||||
stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
|
||||
stateColumns :: Int, -- ^ Number of columns in terminal
|
||||
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
|
||||
|
@ -642,6 +645,7 @@ defaultParserState =
|
|||
stateDate = [],
|
||||
stateStrict = False,
|
||||
stateSmart = False,
|
||||
stateOldDashes = False,
|
||||
stateLiterateHaskell = False,
|
||||
stateColumns = 80,
|
||||
stateHeaderTable = [],
|
||||
|
@ -788,17 +792,37 @@ ellipses = do
|
|||
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
|
||||
return (Str "\8230")
|
||||
|
||||
dash :: GenParser Char st Inline
|
||||
dash = enDash <|> emDash
|
||||
dash :: GenParser Char ParserState Inline
|
||||
dash = do
|
||||
oldDashes <- stateOldDashes `fmap` getState
|
||||
if oldDashes
|
||||
then emDashOld <|> enDashOld
|
||||
else Str `fmap` (hyphenDash <|> emDash <|> enDash)
|
||||
|
||||
enDash :: GenParser Char st Inline
|
||||
-- Two hyphens = en-dash, three = em-dash
|
||||
hyphenDash :: GenParser Char st String
|
||||
hyphenDash = do
|
||||
try $ string "--"
|
||||
option "\8211" (char '-' >> return "\8212")
|
||||
|
||||
emDash :: GenParser Char st String
|
||||
emDash = do
|
||||
try (charOrRef "\8212\151")
|
||||
return "\8212"
|
||||
|
||||
enDash :: GenParser Char st String
|
||||
enDash = do
|
||||
try (charOrRef "\8212\151")
|
||||
return "\8211"
|
||||
|
||||
enDashOld :: GenParser Char st Inline
|
||||
enDashOld = do
|
||||
try (charOrRef "\8211\150") <|>
|
||||
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
|
||||
return (Str "\8211")
|
||||
|
||||
emDash :: GenParser Char st Inline
|
||||
emDash = do
|
||||
emDashOld :: GenParser Char st Inline
|
||||
emDashOld = do
|
||||
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
|
||||
return (Str "\8212")
|
||||
|
||||
|
|
|
@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM )
|
|||
readTextile :: ParserState -- ^ Parser state, including options for parser
|
||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||
-> Pandoc
|
||||
readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
|
||||
readTextile state s =
|
||||
(readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
|
||||
|
||||
|
||||
--
|
||||
|
|
|
@ -103,6 +103,7 @@ data Opt = Opt
|
|||
, optSelfContained :: Bool -- ^ Make HTML accessible offline
|
||||
, optXeTeX :: Bool -- ^ Format latex for xetex
|
||||
, optSmart :: Bool -- ^ Use smart typography
|
||||
, optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1
|
||||
, optHtml5 :: Bool -- ^ Produce HTML5 in HTML
|
||||
, optHighlight :: Bool -- ^ Highlight source code
|
||||
, optHighlightStyle :: Style -- ^ Style to use for highlighted code
|
||||
|
@ -149,6 +150,7 @@ defaultOpts = Opt
|
|||
, optSelfContained = False
|
||||
, optXeTeX = False
|
||||
, optSmart = False
|
||||
, optOldDashes = False
|
||||
, optHtml5 = False
|
||||
, optHighlight = True
|
||||
, optHighlightStyle = pygments
|
||||
|
@ -245,6 +247,12 @@ options =
|
|||
(\opt -> return opt { optSmart = True }))
|
||||
"" -- "Use smart quotes, dashes, and ellipses"
|
||||
|
||||
, Option "" ["old-dashes"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optSmart = True
|
||||
, optOldDashes = True }))
|
||||
"" -- "Use smart quotes, dashes, and ellipses"
|
||||
|
||||
, Option "5" ["html5"]
|
||||
(NoArg
|
||||
(\opt -> do
|
||||
|
@ -735,6 +743,7 @@ main = do
|
|||
, optIncremental = incremental
|
||||
, optSelfContained = selfContained
|
||||
, optSmart = smart
|
||||
, optOldDashes = oldDashes
|
||||
, optHtml5 = html5
|
||||
, optHighlight = highlight
|
||||
, optHighlightStyle = highlightStyle
|
||||
|
@ -858,6 +867,7 @@ main = do
|
|||
stateCitations = map CSL.refId refs,
|
||||
stateSmart = smart || writerName' `elem`
|
||||
["latex", "context", "latex+lhs", "beamer"],
|
||||
stateOldDashes = oldDashes,
|
||||
stateColumns = columns,
|
||||
stateStrict = strict,
|
||||
stateIndentedCodeClasses = codeBlockClasses,
|
||||
|
|
|
@ -165,14 +165,14 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
|
|||
,([Str "city"],
|
||||
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
|
||||
,([Str "phone"],
|
||||
[[Para [Str "123",Str "\8211",Str "4567"]]])]]
|
||||
[[Para [Str "123",Str "-",Str "4567"]]])]]
|
||||
,DefinitionList
|
||||
[([Str "address"],
|
||||
[[Para [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
|
||||
,([Str "city"],
|
||||
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
|
||||
,([Str "phone"],
|
||||
[[Para [Str "123",Str "\8211",Str "4567"]]])]
|
||||
[[Para [Str "123",Str "-",Str "4567"]]])]
|
||||
,Header 1 [Str "HTML",Space,Str "Blocks"]
|
||||
,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line",Str ":"]
|
||||
,RawBlock "html" "<div>foo</div>\n"
|
||||
|
|
|
@ -492,9 +492,9 @@ So is 'pine.'
|
|||
|
||||
Here is some quoted '`code`' and a "[quoted link][1]".
|
||||
|
||||
Some dashes: one---two --- three--four -- five.
|
||||
Some dashes: one---two --- three---four --- five.
|
||||
|
||||
Dashes between numbers: 5-7, 255-66, 1987-1999.
|
||||
Dashes between numbers: 5--7, 255--66, 1987--1999.
|
||||
|
||||
Ellipses...and...and....
|
||||
|
||||
|
|
Loading…
Reference in a new issue