CommonMark writer: respect --ascii (#5043)

This commit is contained in:
quasicomputational 2018-11-05 17:33:10 +00:00 committed by John MacFarlane
parent 249fd40838
commit a747268823
3 changed files with 60 additions and 8 deletions

View file

@ -859,13 +859,12 @@ Options affecting specific writers {.options}
`--ascii`
: Use only ASCII characters in output. Currently supported for
XML and HTML formats (which use entities instead of
UTF-8 when this option is selected), Markdown (which uses
entities), roff ms (which use hexadecimal escapes), and to
a limited degree LaTeX (which uses standard commands for
accented characters when possible). roff man output uses
ASCII by default.
: Use only ASCII characters in output. Currently supported for XML
and HTML formats (which use entities instead of UTF-8 when this
option is selected), CommonMark and Markdown (which uses
entities), roff ms (which use hexadecimal escapes), and to a
limited degree LaTeX (which uses standard commands for accented
characters when possible). roff man output uses ASCII by default.
`--reference-links`

View file

@ -36,6 +36,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
import Prelude
import CMarkGFM
import Control.Monad.State.Strict (State, get, modify, runState)
import Data.Char (isAscii)
import Data.Foldable (foldrM)
import Data.List (transpose)
import Data.Monoid (Any (..))
@ -50,6 +51,7 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (query, walk, walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML (toHtml5Entities)
-- | Convert Pandoc to CommonMark.
writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@ -238,7 +240,7 @@ inlinesToNodes :: WriterOptions -> [Inline] -> [Node]
inlinesToNodes opts = foldr (inlineToNodes opts) []
inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :)
inlineToNodes opts (Str s) = stringToNodes opts s'
where s' = if isEnabled Ext_smart opts
then unsmartify opts s
else s
@ -336,6 +338,21 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
inlineToNodes _ (Note _) = id -- should not occur
-- we remove Note elements in preprocessing
stringToNodes :: WriterOptions -> String -> [Node] -> [Node]
stringToNodes opts s
| not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :)
| otherwise = step s
where
step input =
let (ascii, rest) = span isAscii input
this = node (TEXT (T.pack ascii)) []
nodes = case rest of
[] -> id
(nonAscii : rest') ->
let escaped = toHtml5Entities (T.singleton nonAscii)
in (node (HTML_INLINE escaped) [] :) . step rest'
in (this :) . nodes
toSubscriptInline :: Inline -> Maybe Inline
toSubscriptInline Space = Just Space
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils

View file

@ -51,3 +51,39 @@ pandoc -t markdown-smart --ascii
&ldquo;&auml;&eacute;&imath;&aring;&rdquo;
```
# CommonMark tests
```
% pandoc -f commonmark -t commonmark --ascii
hello … ok? … bye
^D
hello &mldr; ok? &mldr; bye
```
```
% pandoc -f commonmark+smart -t commonmark-smart --ascii --wrap=none
"hi"...dog's breath---cat 5--6
^D
&ldquo;hi&rdquo;&mldr;dog&rsquo;s breath&mdash;cat 5&ndash;6
```
```
% pandoc -f commonmark+smart -t commonmark+smart --ascii
"hi"...dog's breath---cat 5--6
^D
"hi"...dog's breath---cat 5--6
```
```
% pandoc -f commonmark -t commonmark --ascii
foo &#1234; bar
^D
foo &#1234; bar
```
```
% pandoc -f commonmark -t commonmark --ascii
\[foo\](bar)
^D
\[foo\](bar)
```