CommonMark writer: respect --ascii (#5043)
This commit is contained in:
parent
249fd40838
commit
a747268823
3 changed files with 60 additions and 8 deletions
13
MANUAL.txt
13
MANUAL.txt
|
@ -859,13 +859,12 @@ Options affecting specific writers {.options}
|
||||||
|
|
||||||
`--ascii`
|
`--ascii`
|
||||||
|
|
||||||
: Use only ASCII characters in output. Currently supported for
|
: Use only ASCII characters in output. Currently supported for XML
|
||||||
XML and HTML formats (which use entities instead of
|
and HTML formats (which use entities instead of UTF-8 when this
|
||||||
UTF-8 when this option is selected), Markdown (which uses
|
option is selected), CommonMark and Markdown (which uses
|
||||||
entities), roff ms (which use hexadecimal escapes), and to
|
entities), roff ms (which use hexadecimal escapes), and to a
|
||||||
a limited degree LaTeX (which uses standard commands for
|
limited degree LaTeX (which uses standard commands for accented
|
||||||
accented characters when possible). roff man output uses
|
characters when possible). roff man output uses ASCII by default.
|
||||||
ASCII by default.
|
|
||||||
|
|
||||||
`--reference-links`
|
`--reference-links`
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
|
||||||
import Prelude
|
import Prelude
|
||||||
import CMarkGFM
|
import CMarkGFM
|
||||||
import Control.Monad.State.Strict (State, get, modify, runState)
|
import Control.Monad.State.Strict (State, get, modify, runState)
|
||||||
|
import Data.Char (isAscii)
|
||||||
import Data.Foldable (foldrM)
|
import Data.Foldable (foldrM)
|
||||||
import Data.List (transpose)
|
import Data.List (transpose)
|
||||||
import Data.Monoid (Any (..))
|
import Data.Monoid (Any (..))
|
||||||
|
@ -50,6 +51,7 @@ import Text.Pandoc.Templates (renderTemplate')
|
||||||
import Text.Pandoc.Walk (query, walk, walkM)
|
import Text.Pandoc.Walk (query, walk, walkM)
|
||||||
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
|
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
|
||||||
import Text.Pandoc.Writers.Shared
|
import Text.Pandoc.Writers.Shared
|
||||||
|
import Text.Pandoc.XML (toHtml5Entities)
|
||||||
|
|
||||||
-- | Convert Pandoc to CommonMark.
|
-- | Convert Pandoc to CommonMark.
|
||||||
writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
|
writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
|
||||||
|
@ -238,7 +240,7 @@ inlinesToNodes :: WriterOptions -> [Inline] -> [Node]
|
||||||
inlinesToNodes opts = foldr (inlineToNodes opts) []
|
inlinesToNodes opts = foldr (inlineToNodes opts) []
|
||||||
|
|
||||||
inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
|
inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
|
||||||
inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :)
|
inlineToNodes opts (Str s) = stringToNodes opts s'
|
||||||
where s' = if isEnabled Ext_smart opts
|
where s' = if isEnabled Ext_smart opts
|
||||||
then unsmartify opts s
|
then unsmartify opts s
|
||||||
else s
|
else s
|
||||||
|
@ -336,6 +338,21 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
|
||||||
inlineToNodes _ (Note _) = id -- should not occur
|
inlineToNodes _ (Note _) = id -- should not occur
|
||||||
-- we remove Note elements in preprocessing
|
-- we remove Note elements in preprocessing
|
||||||
|
|
||||||
|
stringToNodes :: WriterOptions -> String -> [Node] -> [Node]
|
||||||
|
stringToNodes opts s
|
||||||
|
| not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :)
|
||||||
|
| otherwise = step s
|
||||||
|
where
|
||||||
|
step input =
|
||||||
|
let (ascii, rest) = span isAscii input
|
||||||
|
this = node (TEXT (T.pack ascii)) []
|
||||||
|
nodes = case rest of
|
||||||
|
[] -> id
|
||||||
|
(nonAscii : rest') ->
|
||||||
|
let escaped = toHtml5Entities (T.singleton nonAscii)
|
||||||
|
in (node (HTML_INLINE escaped) [] :) . step rest'
|
||||||
|
in (this :) . nodes
|
||||||
|
|
||||||
toSubscriptInline :: Inline -> Maybe Inline
|
toSubscriptInline :: Inline -> Maybe Inline
|
||||||
toSubscriptInline Space = Just Space
|
toSubscriptInline Space = Just Space
|
||||||
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
|
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
|
||||||
|
|
|
@ -51,3 +51,39 @@ pandoc -t markdown-smart --ascii
|
||||||
“äéıå”
|
“äéıå”
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# CommonMark tests
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc -f commonmark -t commonmark --ascii
|
||||||
|
hello … ok? … bye
|
||||||
|
^D
|
||||||
|
hello … ok? … bye
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc -f commonmark+smart -t commonmark-smart --ascii --wrap=none
|
||||||
|
"hi"...dog's breath---cat 5--6
|
||||||
|
^D
|
||||||
|
“hi”…dog’s breath—cat 5–6
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc -f commonmark+smart -t commonmark+smart --ascii
|
||||||
|
"hi"...dog's breath---cat 5--6
|
||||||
|
^D
|
||||||
|
"hi"...dog's breath---cat 5--6
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc -f commonmark -t commonmark --ascii
|
||||||
|
foo Ӓ bar
|
||||||
|
^D
|
||||||
|
foo Ӓ bar
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc -f commonmark -t commonmark --ascii
|
||||||
|
\[foo\](bar)
|
||||||
|
^D
|
||||||
|
\[foo\](bar)
|
||||||
|
```
|
||||||
|
|
Loading…
Add table
Reference in a new issue