New treatment of dashes in --smart mode.
* `---` is always em-dash, `--` is always en-dash. * pandoc no longer tries to guess when `-` should be en-dash. * A new option, `--old-dashes`, is provided for legacy documents. Rationale: The rules for en-dash are too complex and language-dependent for a guesser to work reliably. This change gives users greater control. The alternative of using unicode isn't very good, since unicode em- and en- dashes are barely distinguishable in a monospace font.
This commit is contained in:
parent
3cf60c7306
commit
da8425598a
6 changed files with 56 additions and 15 deletions
16
README
16
README
|
@ -206,11 +206,17 @@ Options
|
||||||
|
|
||||||
`-S`, `--smart`
|
`-S`, `--smart`
|
||||||
: Produce typographically correct output, converting straight quotes
|
: Produce typographically correct output, converting straight quotes
|
||||||
to curly quotes, `---` and `--` to dashes, and `...` to ellipses.
|
to curly quotes, `---` to em-dashes, `--` to en-dashes, and
|
||||||
Nonbreaking spaces are inserted after certain abbreviations, such
|
`...` to ellipses. Nonbreaking spaces are inserted after certain
|
||||||
as "Mr." (Note: This option is significant only when the input format is
|
abbreviations, such as "Mr." (Note: This option is significant only when
|
||||||
`markdown` or `textile`. It is selected automatically when the input
|
the input format is `markdown` or `textile`. It is selected automatically
|
||||||
format is `textile` or the output format is `latex` or `context`.)
|
when the input format is `textile` or the output format is `latex` or
|
||||||
|
`context`.)
|
||||||
|
|
||||||
|
`--old-dashes`
|
||||||
|
: Selects the pandoc <= 1.8.2.1 behavior for parsing smart dashes: `-` before
|
||||||
|
a numeral is an en-dash, and `--` is an em-dash. This option is selected
|
||||||
|
automatically for `textile` input.
|
||||||
|
|
||||||
`-5`, `--html5`
|
`-5`, `--html5`
|
||||||
: Produce HTML5 instead of HTML4. This option has no effect for writers
|
: Produce HTML5 instead of HTML4. This option has no effect for writers
|
||||||
|
|
|
@ -614,6 +614,9 @@ data ParserState = ParserState
|
||||||
stateDate :: [Inline], -- ^ Date of document
|
stateDate :: [Inline], -- ^ Date of document
|
||||||
stateStrict :: Bool, -- ^ Use strict markdown syntax?
|
stateStrict :: Bool, -- ^ Use strict markdown syntax?
|
||||||
stateSmart :: Bool, -- ^ Use smart typography?
|
stateSmart :: Bool, -- ^ Use smart typography?
|
||||||
|
stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior
|
||||||
|
-- in parsing dashes; -- is em-dash;
|
||||||
|
-- before numeral is en-dash
|
||||||
stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
|
stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
|
||||||
stateColumns :: Int, -- ^ Number of columns in terminal
|
stateColumns :: Int, -- ^ Number of columns in terminal
|
||||||
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
|
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
|
||||||
|
@ -642,6 +645,7 @@ defaultParserState =
|
||||||
stateDate = [],
|
stateDate = [],
|
||||||
stateStrict = False,
|
stateStrict = False,
|
||||||
stateSmart = False,
|
stateSmart = False,
|
||||||
|
stateOldDashes = False,
|
||||||
stateLiterateHaskell = False,
|
stateLiterateHaskell = False,
|
||||||
stateColumns = 80,
|
stateColumns = 80,
|
||||||
stateHeaderTable = [],
|
stateHeaderTable = [],
|
||||||
|
@ -788,17 +792,37 @@ ellipses = do
|
||||||
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
|
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
|
||||||
return (Str "\8230")
|
return (Str "\8230")
|
||||||
|
|
||||||
dash :: GenParser Char st Inline
|
dash :: GenParser Char ParserState Inline
|
||||||
dash = enDash <|> emDash
|
dash = do
|
||||||
|
oldDashes <- stateOldDashes `fmap` getState
|
||||||
|
if oldDashes
|
||||||
|
then emDashOld <|> enDashOld
|
||||||
|
else Str `fmap` (hyphenDash <|> emDash <|> enDash)
|
||||||
|
|
||||||
enDash :: GenParser Char st Inline
|
-- Two hyphens = en-dash, three = em-dash
|
||||||
|
hyphenDash :: GenParser Char st String
|
||||||
|
hyphenDash = do
|
||||||
|
try $ string "--"
|
||||||
|
option "\8211" (char '-' >> return "\8212")
|
||||||
|
|
||||||
|
emDash :: GenParser Char st String
|
||||||
|
emDash = do
|
||||||
|
try (charOrRef "\8212\151")
|
||||||
|
return "\8212"
|
||||||
|
|
||||||
|
enDash :: GenParser Char st String
|
||||||
enDash = do
|
enDash = do
|
||||||
|
try (charOrRef "\8212\151")
|
||||||
|
return "\8211"
|
||||||
|
|
||||||
|
enDashOld :: GenParser Char st Inline
|
||||||
|
enDashOld = do
|
||||||
try (charOrRef "\8211\150") <|>
|
try (charOrRef "\8211\150") <|>
|
||||||
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
|
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
|
||||||
return (Str "\8211")
|
return (Str "\8211")
|
||||||
|
|
||||||
emDash :: GenParser Char st Inline
|
emDashOld :: GenParser Char st Inline
|
||||||
emDash = do
|
emDashOld = do
|
||||||
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
|
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
|
||||||
return (Str "\8212")
|
return (Str "\8212")
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM )
|
||||||
readTextile :: ParserState -- ^ Parser state, including options for parser
|
readTextile :: ParserState -- ^ Parser state, including options for parser
|
||||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||||
-> Pandoc
|
-> Pandoc
|
||||||
readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
|
readTextile state s =
|
||||||
|
(readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
|
@ -103,6 +103,7 @@ data Opt = Opt
|
||||||
, optSelfContained :: Bool -- ^ Make HTML accessible offline
|
, optSelfContained :: Bool -- ^ Make HTML accessible offline
|
||||||
, optXeTeX :: Bool -- ^ Format latex for xetex
|
, optXeTeX :: Bool -- ^ Format latex for xetex
|
||||||
, optSmart :: Bool -- ^ Use smart typography
|
, optSmart :: Bool -- ^ Use smart typography
|
||||||
|
, optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1
|
||||||
, optHtml5 :: Bool -- ^ Produce HTML5 in HTML
|
, optHtml5 :: Bool -- ^ Produce HTML5 in HTML
|
||||||
, optHighlight :: Bool -- ^ Highlight source code
|
, optHighlight :: Bool -- ^ Highlight source code
|
||||||
, optHighlightStyle :: Style -- ^ Style to use for highlighted code
|
, optHighlightStyle :: Style -- ^ Style to use for highlighted code
|
||||||
|
@ -149,6 +150,7 @@ defaultOpts = Opt
|
||||||
, optSelfContained = False
|
, optSelfContained = False
|
||||||
, optXeTeX = False
|
, optXeTeX = False
|
||||||
, optSmart = False
|
, optSmart = False
|
||||||
|
, optOldDashes = False
|
||||||
, optHtml5 = False
|
, optHtml5 = False
|
||||||
, optHighlight = True
|
, optHighlight = True
|
||||||
, optHighlightStyle = pygments
|
, optHighlightStyle = pygments
|
||||||
|
@ -245,6 +247,12 @@ options =
|
||||||
(\opt -> return opt { optSmart = True }))
|
(\opt -> return opt { optSmart = True }))
|
||||||
"" -- "Use smart quotes, dashes, and ellipses"
|
"" -- "Use smart quotes, dashes, and ellipses"
|
||||||
|
|
||||||
|
, Option "" ["old-dashes"]
|
||||||
|
(NoArg
|
||||||
|
(\opt -> return opt { optSmart = True
|
||||||
|
, optOldDashes = True }))
|
||||||
|
"" -- "Use smart quotes, dashes, and ellipses"
|
||||||
|
|
||||||
, Option "5" ["html5"]
|
, Option "5" ["html5"]
|
||||||
(NoArg
|
(NoArg
|
||||||
(\opt -> do
|
(\opt -> do
|
||||||
|
@ -735,6 +743,7 @@ main = do
|
||||||
, optIncremental = incremental
|
, optIncremental = incremental
|
||||||
, optSelfContained = selfContained
|
, optSelfContained = selfContained
|
||||||
, optSmart = smart
|
, optSmart = smart
|
||||||
|
, optOldDashes = oldDashes
|
||||||
, optHtml5 = html5
|
, optHtml5 = html5
|
||||||
, optHighlight = highlight
|
, optHighlight = highlight
|
||||||
, optHighlightStyle = highlightStyle
|
, optHighlightStyle = highlightStyle
|
||||||
|
@ -858,6 +867,7 @@ main = do
|
||||||
stateCitations = map CSL.refId refs,
|
stateCitations = map CSL.refId refs,
|
||||||
stateSmart = smart || writerName' `elem`
|
stateSmart = smart || writerName' `elem`
|
||||||
["latex", "context", "latex+lhs", "beamer"],
|
["latex", "context", "latex+lhs", "beamer"],
|
||||||
|
stateOldDashes = oldDashes,
|
||||||
stateColumns = columns,
|
stateColumns = columns,
|
||||||
stateStrict = strict,
|
stateStrict = strict,
|
||||||
stateIndentedCodeClasses = codeBlockClasses,
|
stateIndentedCodeClasses = codeBlockClasses,
|
||||||
|
|
|
@ -165,14 +165,14 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
|
||||||
,([Str "city"],
|
,([Str "city"],
|
||||||
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
|
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
|
||||||
,([Str "phone"],
|
,([Str "phone"],
|
||||||
[[Para [Str "123",Str "\8211",Str "4567"]]])]]
|
[[Para [Str "123",Str "-",Str "4567"]]])]]
|
||||||
,DefinitionList
|
,DefinitionList
|
||||||
[([Str "address"],
|
[([Str "address"],
|
||||||
[[Para [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
|
[[Para [Str "61",Space,Str "Main",Space,Str "St",Str "."]]])
|
||||||
,([Str "city"],
|
,([Str "city"],
|
||||||
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
|
[[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
|
||||||
,([Str "phone"],
|
,([Str "phone"],
|
||||||
[[Para [Str "123",Str "\8211",Str "4567"]]])]
|
[[Para [Str "123",Str "-",Str "4567"]]])]
|
||||||
,Header 1 [Str "HTML",Space,Str "Blocks"]
|
,Header 1 [Str "HTML",Space,Str "Blocks"]
|
||||||
,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line",Str ":"]
|
,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line",Str ":"]
|
||||||
,RawBlock "html" "<div>foo</div>\n"
|
,RawBlock "html" "<div>foo</div>\n"
|
||||||
|
|
|
@ -492,9 +492,9 @@ So is 'pine.'
|
||||||
|
|
||||||
Here is some quoted '`code`' and a "[quoted link][1]".
|
Here is some quoted '`code`' and a "[quoted link][1]".
|
||||||
|
|
||||||
Some dashes: one---two --- three--four -- five.
|
Some dashes: one---two --- three---four --- five.
|
||||||
|
|
||||||
Dashes between numbers: 5-7, 255-66, 1987-1999.
|
Dashes between numbers: 5--7, 255--66, 1987--1999.
|
||||||
|
|
||||||
Ellipses...and...and....
|
Ellipses...and...and....
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue