CommonMark writer: respect --ascii (#5043)
This commit is contained in:
parent
249fd40838
commit
a747268823
3 changed files with 60 additions and 8 deletions
13
MANUAL.txt
13
MANUAL.txt
|
@ -859,13 +859,12 @@ Options affecting specific writers {.options}
|
|||
|
||||
`--ascii`
|
||||
|
||||
: Use only ASCII characters in output. Currently supported for
|
||||
XML and HTML formats (which use entities instead of
|
||||
UTF-8 when this option is selected), Markdown (which uses
|
||||
entities), roff ms (which use hexadecimal escapes), and to
|
||||
a limited degree LaTeX (which uses standard commands for
|
||||
accented characters when possible). roff man output uses
|
||||
ASCII by default.
|
||||
: Use only ASCII characters in output. Currently supported for XML
|
||||
and HTML formats (which use entities instead of UTF-8 when this
|
||||
option is selected), CommonMark and Markdown (which uses
|
||||
entities), roff ms (which use hexadecimal escapes), and to a
|
||||
limited degree LaTeX (which uses standard commands for accented
|
||||
characters when possible). roff man output uses ASCII by default.
|
||||
|
||||
`--reference-links`
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
|
|||
import Prelude
|
||||
import CMarkGFM
|
||||
import Control.Monad.State.Strict (State, get, modify, runState)
|
||||
import Data.Char (isAscii)
|
||||
import Data.Foldable (foldrM)
|
||||
import Data.List (transpose)
|
||||
import Data.Monoid (Any (..))
|
||||
|
@ -50,6 +51,7 @@ import Text.Pandoc.Templates (renderTemplate')
|
|||
import Text.Pandoc.Walk (query, walk, walkM)
|
||||
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
|
||||
import Text.Pandoc.Writers.Shared
|
||||
import Text.Pandoc.XML (toHtml5Entities)
|
||||
|
||||
-- | Convert Pandoc to CommonMark.
|
||||
writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
|
||||
|
@ -238,7 +240,7 @@ inlinesToNodes :: WriterOptions -> [Inline] -> [Node]
|
|||
inlinesToNodes opts = foldr (inlineToNodes opts) []
|
||||
|
||||
inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
|
||||
inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :)
|
||||
inlineToNodes opts (Str s) = stringToNodes opts s'
|
||||
where s' = if isEnabled Ext_smart opts
|
||||
then unsmartify opts s
|
||||
else s
|
||||
|
@ -336,6 +338,21 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
|
|||
inlineToNodes _ (Note _) = id -- should not occur
|
||||
-- we remove Note elements in preprocessing
|
||||
|
||||
stringToNodes :: WriterOptions -> String -> [Node] -> [Node]
|
||||
stringToNodes opts s
|
||||
| not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :)
|
||||
| otherwise = step s
|
||||
where
|
||||
step input =
|
||||
let (ascii, rest) = span isAscii input
|
||||
this = node (TEXT (T.pack ascii)) []
|
||||
nodes = case rest of
|
||||
[] -> id
|
||||
(nonAscii : rest') ->
|
||||
let escaped = toHtml5Entities (T.singleton nonAscii)
|
||||
in (node (HTML_INLINE escaped) [] :) . step rest'
|
||||
in (this :) . nodes
|
||||
|
||||
toSubscriptInline :: Inline -> Maybe Inline
|
||||
toSubscriptInline Space = Just Space
|
||||
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
|
||||
|
|
|
@ -51,3 +51,39 @@ pandoc -t markdown-smart --ascii
|
|||
“äéıå”
|
||||
```
|
||||
|
||||
# CommonMark tests
|
||||
|
||||
```
|
||||
% pandoc -f commonmark -t commonmark --ascii
|
||||
hello … ok? … bye
|
||||
^D
|
||||
hello … ok? … bye
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc -f commonmark+smart -t commonmark-smart --ascii --wrap=none
|
||||
"hi"...dog's breath---cat 5--6
|
||||
^D
|
||||
“hi”…dog’s breath—cat 5–6
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc -f commonmark+smart -t commonmark+smart --ascii
|
||||
"hi"...dog's breath---cat 5--6
|
||||
^D
|
||||
"hi"...dog's breath---cat 5--6
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc -f commonmark -t commonmark --ascii
|
||||
foo Ӓ bar
|
||||
^D
|
||||
foo Ӓ bar
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc -f commonmark -t commonmark --ascii
|
||||
\[foo\](bar)
|
||||
^D
|
||||
\[foo\](bar)
|
||||
```
|
||||
|
|
Loading…
Add table
Reference in a new issue