CommonMark writer: respect --ascii ()

This commit is contained in:
quasicomputational 2018-11-05 17:33:10 +00:00 committed by John MacFarlane
parent 249fd40838
commit a747268823
3 changed files with 60 additions and 8 deletions
MANUAL.txt
src/Text/Pandoc/Writers
test/command

View file

@ -859,13 +859,12 @@ Options affecting specific writers {.options}
`--ascii` `--ascii`
: Use only ASCII characters in output. Currently supported for : Use only ASCII characters in output. Currently supported for XML
XML and HTML formats (which use entities instead of and HTML formats (which use entities instead of UTF-8 when this
UTF-8 when this option is selected), Markdown (which uses option is selected), CommonMark and Markdown (which uses
entities), roff ms (which use hexadecimal escapes), and to entities), roff ms (which use hexadecimal escapes), and to a
a limited degree LaTeX (which uses standard commands for limited degree LaTeX (which uses standard commands for accented
accented characters when possible). roff man output uses characters when possible). roff man output uses ASCII by default.
ASCII by default.
`--reference-links` `--reference-links`

View file

@ -36,6 +36,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
import Prelude import Prelude
import CMarkGFM import CMarkGFM
import Control.Monad.State.Strict (State, get, modify, runState) import Control.Monad.State.Strict (State, get, modify, runState)
import Data.Char (isAscii)
import Data.Foldable (foldrM) import Data.Foldable (foldrM)
import Data.List (transpose) import Data.List (transpose)
import Data.Monoid (Any (..)) import Data.Monoid (Any (..))
@ -50,6 +51,7 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Walk (query, walk, walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
import Text.Pandoc.Writers.Shared import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML (toHtml5Entities)
-- | Convert Pandoc to CommonMark. -- | Convert Pandoc to CommonMark.
writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@ -238,7 +240,7 @@ inlinesToNodes :: WriterOptions -> [Inline] -> [Node]
inlinesToNodes opts = foldr (inlineToNodes opts) [] inlinesToNodes opts = foldr (inlineToNodes opts) []
inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node] inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :) inlineToNodes opts (Str s) = stringToNodes opts s'
where s' = if isEnabled Ext_smart opts where s' = if isEnabled Ext_smart opts
then unsmartify opts s then unsmartify opts s
else s else s
@ -336,6 +338,21 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
inlineToNodes _ (Note _) = id -- should not occur inlineToNodes _ (Note _) = id -- should not occur
-- we remove Note elements in preprocessing -- we remove Note elements in preprocessing
stringToNodes :: WriterOptions -> String -> [Node] -> [Node]
stringToNodes opts s
| not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :)
| otherwise = step s
where
step input =
let (ascii, rest) = span isAscii input
this = node (TEXT (T.pack ascii)) []
nodes = case rest of
[] -> id
(nonAscii : rest') ->
let escaped = toHtml5Entities (T.singleton nonAscii)
in (node (HTML_INLINE escaped) [] :) . step rest'
in (this :) . nodes
toSubscriptInline :: Inline -> Maybe Inline toSubscriptInline :: Inline -> Maybe Inline
toSubscriptInline Space = Just Space toSubscriptInline Space = Just Space
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils

View file

@ -51,3 +51,39 @@ pandoc -t markdown-smart --ascii
&ldquo;&auml;&eacute;&imath;&aring;&rdquo; &ldquo;&auml;&eacute;&imath;&aring;&rdquo;
``` ```
# CommonMark tests
```
% pandoc -f commonmark -t commonmark --ascii
hello … ok? … bye
^D
hello &mldr; ok? &mldr; bye
```
```
% pandoc -f commonmark+smart -t commonmark-smart --ascii --wrap=none
"hi"...dog's breath---cat 5--6
^D
&ldquo;hi&rdquo;&mldr;dog&rsquo;s breath&mdash;cat 5&ndash;6
```
```
% pandoc -f commonmark+smart -t commonmark+smart --ascii
"hi"...dog's breath---cat 5--6
^D
"hi"...dog's breath---cat 5--6
```
```
% pandoc -f commonmark -t commonmark --ascii
foo &#1234; bar
^D
foo &#1234; bar
```
```
% pandoc -f commonmark -t commonmark --ascii
\[foo\](bar)
^D
\[foo\](bar)
```