CommonMark writer: add plain text fallbacks. (#4531)
Previously, the writer would unconditionally emit HTMLish output for subscripts, superscripts, strikeouts (if the strikeout extension is disabled) and small caps, even with raw_html disabled. Now there are plain-text (and, where possible, fancy Unicode) fallbacks for all of these corresponding (mostly) to the Markdown fallbacks, and the HTMLish output is only used when raw_html is enabled. This commit adds exported functions `toSuperscript` and `toSubscript` to `Text.Pandoc.Writers.Shared`. [API change] Closes #4528.
This commit is contained in:
parent
a26b3a2d6a
commit
6207bdeb68
5 changed files with 237 additions and 41 deletions
10
MANUAL.txt
10
MANUAL.txt
|
@ -3381,8 +3381,14 @@ Markdown allows it, but it has been made an extension so that it can
|
|||
be disabled if desired.)
|
||||
|
||||
The raw HTML is passed through unchanged in HTML, S5, Slidy, Slideous,
|
||||
DZSlides, EPUB, Markdown, Emacs Org mode, and Textile output, and suppressed
|
||||
in other formats.
|
||||
DZSlides, EPUB, Markdown, CommonMark, Emacs Org mode, and Textile
|
||||
output, and suppressed in other formats.
|
||||
|
||||
In the CommonMark format, if `raw_html` is enabled, superscripts,
|
||||
subscripts, strikeouts and small capitals will be represented as HTML.
|
||||
Otherwise, plain-text fallbacks will be used. Note that even if
|
||||
`raw_html` is disabled, tables will be rendered with HTML syntax if
|
||||
they cannot use pipe syntax.
|
||||
|
||||
#### Extension: `markdown_in_html_blocks` ####
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ import Network.HTTP (urlEncode)
|
|||
import Text.Pandoc.Class (PandocMonad)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Shared (isTightList, linesToPara, substitute)
|
||||
import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize)
|
||||
import Text.Pandoc.Templates (renderTemplate')
|
||||
import Text.Pandoc.Walk (query, walk, walkM)
|
||||
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
|
||||
|
@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :)
|
|||
inlineToNodes opts (Strikeout xs) =
|
||||
if isEnabled Ext_strikeout opts
|
||||
then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :)
|
||||
else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</s>")) []]) ++ )
|
||||
else if isEnabled Ext_raw_html opts
|
||||
then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</s>")) []]) ++ )
|
||||
else (inlinesToNodes opts xs ++)
|
||||
inlineToNodes opts (Superscript xs) =
|
||||
((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
|
||||
if isEnabled Ext_raw_html opts
|
||||
then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
|
||||
else case traverse toSuperscriptInline xs of
|
||||
Nothing ->
|
||||
((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++
|
||||
[node (TEXT (T.pack ")")) []]) ++ )
|
||||
Just xs' -> (inlinesToNodes opts xs' ++)
|
||||
inlineToNodes opts (Subscript xs) =
|
||||
((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
|
||||
if isEnabled Ext_raw_html opts
|
||||
then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
|
||||
else case traverse toSubscriptInline xs of
|
||||
Nothing ->
|
||||
((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++
|
||||
[node (TEXT (T.pack ")")) []]) ++ )
|
||||
Just xs' -> (inlinesToNodes opts xs' ++)
|
||||
inlineToNodes opts (SmallCaps xs) =
|
||||
((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
|
||||
: inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</span>")) []]) ++ )
|
||||
if isEnabled Ext_raw_html opts
|
||||
then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
|
||||
: inlinesToNodes opts xs ++
|
||||
[node (HTML_INLINE (T.pack "</span>")) []]) ++ )
|
||||
else (inlinesToNodes opts (capitalize xs) ++)
|
||||
inlineToNodes opts (Link _ ils (url,tit)) =
|
||||
(node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
|
||||
-- title beginning with fig: indicates implicit figure
|
||||
|
@ -319,3 +335,19 @@ inlineToNodes opts (Span attr ils) =
|
|||
inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
|
||||
inlineToNodes _ (Note _) = id -- should not occur
|
||||
-- we remove Note elements in preprocessing
|
||||
|
||||
toSubscriptInline :: Inline -> Maybe Inline
|
||||
toSubscriptInline Space = Just Space
|
||||
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
|
||||
toSubscriptInline (Str s) = Str <$> traverse toSubscript s
|
||||
toSubscriptInline LineBreak = Just LineBreak
|
||||
toSubscriptInline SoftBreak = Just SoftBreak
|
||||
toSubscriptInline _ = Nothing
|
||||
|
||||
toSuperscriptInline :: Inline -> Maybe Inline
|
||||
toSuperscriptInline Space = Just Space
|
||||
toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
|
||||
toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s
|
||||
toSuperscriptInline LineBreak = Just LineBreak
|
||||
toSuperscriptInline SoftBreak = Just SoftBreak
|
||||
toSuperscriptInline _ = Nothing
|
||||
|
|
|
@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
|
|||
import Prelude
|
||||
import Control.Monad.Reader
|
||||
import Control.Monad.State.Strict
|
||||
import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum)
|
||||
import Data.Char (isPunctuation, isSpace, isAlphaNum)
|
||||
import Data.Default
|
||||
import qualified Data.HashMap.Strict as H
|
||||
import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
|
||||
|
@ -1249,33 +1249,6 @@ makeMathPlainer = walk go
|
|||
go (Emph xs) = Span nullAttr xs
|
||||
go x = x
|
||||
|
||||
toSuperscript :: Char -> Maybe Char
|
||||
toSuperscript '1' = Just '\x00B9'
|
||||
toSuperscript '2' = Just '\x00B2'
|
||||
toSuperscript '3' = Just '\x00B3'
|
||||
toSuperscript '+' = Just '\x207A'
|
||||
toSuperscript '-' = Just '\x207B'
|
||||
toSuperscript '=' = Just '\x207C'
|
||||
toSuperscript '(' = Just '\x207D'
|
||||
toSuperscript ')' = Just '\x207E'
|
||||
toSuperscript c
|
||||
| c >= '0' && c <= '9' =
|
||||
Just $ chr (0x2070 + (ord c - 48))
|
||||
| isSpace c = Just c
|
||||
| otherwise = Nothing
|
||||
|
||||
toSubscript :: Char -> Maybe Char
|
||||
toSubscript '+' = Just '\x208A'
|
||||
toSubscript '-' = Just '\x208B'
|
||||
toSubscript '=' = Just '\x208C'
|
||||
toSubscript '(' = Just '\x208D'
|
||||
toSubscript ')' = Just '\x208E'
|
||||
toSubscript c
|
||||
| c >= '0' && c <= '9' =
|
||||
Just $ chr (0x2080 + (ord c - 48))
|
||||
| isSpace c = Just c
|
||||
| otherwise = Nothing
|
||||
|
||||
lineBreakToSpace :: Inline -> Inline
|
||||
lineBreakToSpace LineBreak = Space
|
||||
lineBreakToSpace SoftBreak = Space
|
||||
|
|
|
@ -48,12 +48,15 @@ module Text.Pandoc.Writers.Shared (
|
|||
, lookupMetaString
|
||||
, stripLeadingTrailingSpace
|
||||
, groffEscape
|
||||
, toSubscript
|
||||
, toSuperscript
|
||||
)
|
||||
where
|
||||
import Prelude
|
||||
import Control.Monad (zipWithM)
|
||||
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
|
||||
encode, fromJSON)
|
||||
import Data.Char (chr, ord, isAscii, isSpace)
|
||||
import qualified Data.HashMap.Strict as H
|
||||
import Data.List (groupBy, intersperse, transpose)
|
||||
import qualified Data.Map as M
|
||||
|
@ -68,7 +71,6 @@ import Text.Pandoc.Shared (stringify)
|
|||
import Text.Pandoc.UTF8 (toStringLazy)
|
||||
import Text.Pandoc.XML (escapeStringForXML)
|
||||
import Text.Printf (printf)
|
||||
import Data.Char (isAscii, ord)
|
||||
|
||||
-- | Create JSON value for template from a 'Meta' and an association list
|
||||
-- of variables, specified at the command line or in the writer.
|
||||
|
@ -392,3 +394,30 @@ groffEscape = T.concatMap toUchar
|
|||
| isAscii c = T.singleton c
|
||||
| otherwise = T.pack $ printf "\\[u%04X]" (ord c)
|
||||
|
||||
|
||||
toSuperscript :: Char -> Maybe Char
|
||||
toSuperscript '1' = Just '\x00B9'
|
||||
toSuperscript '2' = Just '\x00B2'
|
||||
toSuperscript '3' = Just '\x00B3'
|
||||
toSuperscript '+' = Just '\x207A'
|
||||
toSuperscript '-' = Just '\x207B'
|
||||
toSuperscript '=' = Just '\x207C'
|
||||
toSuperscript '(' = Just '\x207D'
|
||||
toSuperscript ')' = Just '\x207E'
|
||||
toSuperscript c
|
||||
| c >= '0' && c <= '9' =
|
||||
Just $ chr (0x2070 + (ord c - 48))
|
||||
| isSpace c = Just c
|
||||
| otherwise = Nothing
|
||||
|
||||
toSubscript :: Char -> Maybe Char
|
||||
toSubscript '+' = Just '\x208A'
|
||||
toSubscript '-' = Just '\x208B'
|
||||
toSubscript '=' = Just '\x208C'
|
||||
toSubscript '(' = Just '\x208D'
|
||||
toSubscript ')' = Just '\x208E'
|
||||
toSubscript c
|
||||
| c >= '0' && c <= '9' =
|
||||
Just $ chr (0x2080 + (ord c - 48))
|
||||
| isSpace c = Just c
|
||||
| otherwise = Nothing
|
||||
|
|
156
test/command/4528.md
Normal file
156
test/command/4528.md
Normal file
|
@ -0,0 +1,156 @@
|
|||
# Rendering small caps, superscripts and subscripts with and without `raw_html`
|
||||
|
||||
## Small caps
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f latex -t commonmark-raw_html
|
||||
This has \textsc{small caps} in it.
|
||||
^D
|
||||
This has SMALL CAPS in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f latex -t commonmark+raw_html
|
||||
This has \textsc{small caps} in it.
|
||||
^D
|
||||
This has <span class="smallcaps">small caps</span> in it.
|
||||
```
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f latex -t markdown_strict+raw_html
|
||||
This has \textsc{small caps} in it.
|
||||
^D
|
||||
This has <span class="smallcaps">small caps</span> in it.
|
||||
```
|
||||
|
||||
## Strikeout
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark-raw_html-strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has strikeout in it.
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark+raw_html-strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has <s>strikeout</s> in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark-raw_html+strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has ~~strikeout~~ in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark+raw_html+strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has ~~strikeout~~ in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict-raw_html-strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has strikeout in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict+raw_html-strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has <s>strikeout</s> in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict-raw_html+strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has ~~strikeout~~ in it.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict+raw_html+strikeout
|
||||
This has <s>strikeout</s> in it.
|
||||
^D
|
||||
This has ~~strikeout~~ in it.
|
||||
```
|
||||
|
||||
## Superscript
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark-raw_html
|
||||
This has <sup>superscript</sup> in it and <sup>2 3</sup> again. With emphasis: <sup><em>2</em> 3</sup>. With letters: <sup>foo</sup>. With a span: <sup><span class=foo>2</span></sup>.
|
||||
^D
|
||||
This has ^(superscript) in it and ² ³ again. With emphasis: ^(*2* 3). With letters: ^(foo). With a span: ².
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark+raw_html
|
||||
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||
^D
|
||||
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict-raw_html-superscript
|
||||
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||
^D
|
||||
This has ^(superscript) in it and ² again.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict+raw_html-superscript
|
||||
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||
^D
|
||||
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict+raw_html+superscript
|
||||
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||
^D
|
||||
This has ^superscript^ in it and ^2^ again.
|
||||
```
|
||||
|
||||
## Subscript
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark-raw_html
|
||||
This has <sub>subscript</sub> in it and <sub>2 3</sub> again. With emphasis: <sub><em>2</em> 3</sub>. With letters: <sub>foo</sub>. With a span: <sub><span class=foo>2</span></sub>.
|
||||
^D
|
||||
This has \_(subscript) in it and ₂ ₃ again. With emphasis: \_(*2* 3). With letters: \_(foo). With a span: ₂.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t commonmark+raw_html
|
||||
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||
^D
|
||||
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict-raw_html-subscript
|
||||
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||
^D
|
||||
This has _(subscript) in it and ₂ again.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict+raw_html-subscript
|
||||
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||
^D
|
||||
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc --wrap=none -f html -t markdown_strict+raw_html+subscript
|
||||
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||
^D
|
||||
This has ~subscript~ in it and ~2~ again.
|
||||
```
|
Loading…
Reference in a new issue