CommonMark writer: add plain text fallbacks. (#4531)
Previously, the writer would unconditionally emit HTMLish output for subscripts, superscripts, strikeouts (if the strikeout extension is disabled) and small caps, even with raw_html disabled. Now there are plain-text (and, where possible, fancy Unicode) fallbacks for all of these corresponding (mostly) to the Markdown fallbacks, and the HTMLish output is only used when raw_html is enabled. This commit adds exported functions `toSuperscript` and `toSubscript` to `Text.Pandoc.Writers.Shared`. [API change] Closes #4528.
This commit is contained in:
parent
a26b3a2d6a
commit
6207bdeb68
5 changed files with 237 additions and 41 deletions
10
MANUAL.txt
10
MANUAL.txt
|
@ -3381,8 +3381,14 @@ Markdown allows it, but it has been made an extension so that it can
|
||||||
be disabled if desired.)
|
be disabled if desired.)
|
||||||
|
|
||||||
The raw HTML is passed through unchanged in HTML, S5, Slidy, Slideous,
|
The raw HTML is passed through unchanged in HTML, S5, Slidy, Slideous,
|
||||||
DZSlides, EPUB, Markdown, Emacs Org mode, and Textile output, and suppressed
|
DZSlides, EPUB, Markdown, CommonMark, Emacs Org mode, and Textile
|
||||||
in other formats.
|
output, and suppressed in other formats.
|
||||||
|
|
||||||
|
In the CommonMark format, if `raw_html` is enabled, superscripts,
|
||||||
|
subscripts, strikeouts and small capitals will be represented as HTML.
|
||||||
|
Otherwise, plain-text fallbacks will be used. Note that even if
|
||||||
|
`raw_html` is disabled, tables will be rendered with HTML syntax if
|
||||||
|
they cannot use pipe syntax.
|
||||||
|
|
||||||
#### Extension: `markdown_in_html_blocks` ####
|
#### Extension: `markdown_in_html_blocks` ####
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ import Network.HTTP (urlEncode)
|
||||||
import Text.Pandoc.Class (PandocMonad)
|
import Text.Pandoc.Class (PandocMonad)
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Options
|
import Text.Pandoc.Options
|
||||||
import Text.Pandoc.Shared (isTightList, linesToPara, substitute)
|
import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize)
|
||||||
import Text.Pandoc.Templates (renderTemplate')
|
import Text.Pandoc.Templates (renderTemplate')
|
||||||
import Text.Pandoc.Walk (query, walk, walkM)
|
import Text.Pandoc.Walk (query, walk, walkM)
|
||||||
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
|
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
|
||||||
|
@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :)
|
||||||
inlineToNodes opts (Strikeout xs) =
|
inlineToNodes opts (Strikeout xs) =
|
||||||
if isEnabled Ext_strikeout opts
|
if isEnabled Ext_strikeout opts
|
||||||
then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :)
|
then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :)
|
||||||
else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
|
else if isEnabled Ext_raw_html opts
|
||||||
|
then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
|
||||||
[node (HTML_INLINE (T.pack "</s>")) []]) ++ )
|
[node (HTML_INLINE (T.pack "</s>")) []]) ++ )
|
||||||
|
else (inlinesToNodes opts xs ++)
|
||||||
inlineToNodes opts (Superscript xs) =
|
inlineToNodes opts (Superscript xs) =
|
||||||
((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
|
if isEnabled Ext_raw_html opts
|
||||||
|
then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
|
||||||
[node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
|
[node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
|
||||||
|
else case traverse toSuperscriptInline xs of
|
||||||
|
Nothing ->
|
||||||
|
((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++
|
||||||
|
[node (TEXT (T.pack ")")) []]) ++ )
|
||||||
|
Just xs' -> (inlinesToNodes opts xs' ++)
|
||||||
inlineToNodes opts (Subscript xs) =
|
inlineToNodes opts (Subscript xs) =
|
||||||
((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
|
if isEnabled Ext_raw_html opts
|
||||||
|
then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
|
||||||
[node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
|
[node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
|
||||||
|
else case traverse toSubscriptInline xs of
|
||||||
|
Nothing ->
|
||||||
|
((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++
|
||||||
|
[node (TEXT (T.pack ")")) []]) ++ )
|
||||||
|
Just xs' -> (inlinesToNodes opts xs' ++)
|
||||||
inlineToNodes opts (SmallCaps xs) =
|
inlineToNodes opts (SmallCaps xs) =
|
||||||
((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
|
if isEnabled Ext_raw_html opts
|
||||||
|
then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
|
||||||
: inlinesToNodes opts xs ++
|
: inlinesToNodes opts xs ++
|
||||||
[node (HTML_INLINE (T.pack "</span>")) []]) ++ )
|
[node (HTML_INLINE (T.pack "</span>")) []]) ++ )
|
||||||
|
else (inlinesToNodes opts (capitalize xs) ++)
|
||||||
inlineToNodes opts (Link _ ils (url,tit)) =
|
inlineToNodes opts (Link _ ils (url,tit)) =
|
||||||
(node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
|
(node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
|
||||||
-- title beginning with fig: indicates implicit figure
|
-- title beginning with fig: indicates implicit figure
|
||||||
|
@ -319,3 +335,19 @@ inlineToNodes opts (Span attr ils) =
|
||||||
inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
|
inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
|
||||||
inlineToNodes _ (Note _) = id -- should not occur
|
inlineToNodes _ (Note _) = id -- should not occur
|
||||||
-- we remove Note elements in preprocessing
|
-- we remove Note elements in preprocessing
|
||||||
|
|
||||||
|
toSubscriptInline :: Inline -> Maybe Inline
|
||||||
|
toSubscriptInline Space = Just Space
|
||||||
|
toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
|
||||||
|
toSubscriptInline (Str s) = Str <$> traverse toSubscript s
|
||||||
|
toSubscriptInline LineBreak = Just LineBreak
|
||||||
|
toSubscriptInline SoftBreak = Just SoftBreak
|
||||||
|
toSubscriptInline _ = Nothing
|
||||||
|
|
||||||
|
toSuperscriptInline :: Inline -> Maybe Inline
|
||||||
|
toSuperscriptInline Space = Just Space
|
||||||
|
toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
|
||||||
|
toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s
|
||||||
|
toSuperscriptInline LineBreak = Just LineBreak
|
||||||
|
toSuperscriptInline SoftBreak = Just SoftBreak
|
||||||
|
toSuperscriptInline _ = Nothing
|
||||||
|
|
|
@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
|
||||||
import Prelude
|
import Prelude
|
||||||
import Control.Monad.Reader
|
import Control.Monad.Reader
|
||||||
import Control.Monad.State.Strict
|
import Control.Monad.State.Strict
|
||||||
import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum)
|
import Data.Char (isPunctuation, isSpace, isAlphaNum)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import qualified Data.HashMap.Strict as H
|
import qualified Data.HashMap.Strict as H
|
||||||
import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
|
import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
|
||||||
|
@ -1249,33 +1249,6 @@ makeMathPlainer = walk go
|
||||||
go (Emph xs) = Span nullAttr xs
|
go (Emph xs) = Span nullAttr xs
|
||||||
go x = x
|
go x = x
|
||||||
|
|
||||||
toSuperscript :: Char -> Maybe Char
|
|
||||||
toSuperscript '1' = Just '\x00B9'
|
|
||||||
toSuperscript '2' = Just '\x00B2'
|
|
||||||
toSuperscript '3' = Just '\x00B3'
|
|
||||||
toSuperscript '+' = Just '\x207A'
|
|
||||||
toSuperscript '-' = Just '\x207B'
|
|
||||||
toSuperscript '=' = Just '\x207C'
|
|
||||||
toSuperscript '(' = Just '\x207D'
|
|
||||||
toSuperscript ')' = Just '\x207E'
|
|
||||||
toSuperscript c
|
|
||||||
| c >= '0' && c <= '9' =
|
|
||||||
Just $ chr (0x2070 + (ord c - 48))
|
|
||||||
| isSpace c = Just c
|
|
||||||
| otherwise = Nothing
|
|
||||||
|
|
||||||
toSubscript :: Char -> Maybe Char
|
|
||||||
toSubscript '+' = Just '\x208A'
|
|
||||||
toSubscript '-' = Just '\x208B'
|
|
||||||
toSubscript '=' = Just '\x208C'
|
|
||||||
toSubscript '(' = Just '\x208D'
|
|
||||||
toSubscript ')' = Just '\x208E'
|
|
||||||
toSubscript c
|
|
||||||
| c >= '0' && c <= '9' =
|
|
||||||
Just $ chr (0x2080 + (ord c - 48))
|
|
||||||
| isSpace c = Just c
|
|
||||||
| otherwise = Nothing
|
|
||||||
|
|
||||||
lineBreakToSpace :: Inline -> Inline
|
lineBreakToSpace :: Inline -> Inline
|
||||||
lineBreakToSpace LineBreak = Space
|
lineBreakToSpace LineBreak = Space
|
||||||
lineBreakToSpace SoftBreak = Space
|
lineBreakToSpace SoftBreak = Space
|
||||||
|
|
|
@ -48,12 +48,15 @@ module Text.Pandoc.Writers.Shared (
|
||||||
, lookupMetaString
|
, lookupMetaString
|
||||||
, stripLeadingTrailingSpace
|
, stripLeadingTrailingSpace
|
||||||
, groffEscape
|
, groffEscape
|
||||||
|
, toSubscript
|
||||||
|
, toSuperscript
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
import Prelude
|
import Prelude
|
||||||
import Control.Monad (zipWithM)
|
import Control.Monad (zipWithM)
|
||||||
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
|
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
|
||||||
encode, fromJSON)
|
encode, fromJSON)
|
||||||
|
import Data.Char (chr, ord, isAscii, isSpace)
|
||||||
import qualified Data.HashMap.Strict as H
|
import qualified Data.HashMap.Strict as H
|
||||||
import Data.List (groupBy, intersperse, transpose)
|
import Data.List (groupBy, intersperse, transpose)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -68,7 +71,6 @@ import Text.Pandoc.Shared (stringify)
|
||||||
import Text.Pandoc.UTF8 (toStringLazy)
|
import Text.Pandoc.UTF8 (toStringLazy)
|
||||||
import Text.Pandoc.XML (escapeStringForXML)
|
import Text.Pandoc.XML (escapeStringForXML)
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
import Data.Char (isAscii, ord)
|
|
||||||
|
|
||||||
-- | Create JSON value for template from a 'Meta' and an association list
|
-- | Create JSON value for template from a 'Meta' and an association list
|
||||||
-- of variables, specified at the command line or in the writer.
|
-- of variables, specified at the command line or in the writer.
|
||||||
|
@ -392,3 +394,30 @@ groffEscape = T.concatMap toUchar
|
||||||
| isAscii c = T.singleton c
|
| isAscii c = T.singleton c
|
||||||
| otherwise = T.pack $ printf "\\[u%04X]" (ord c)
|
| otherwise = T.pack $ printf "\\[u%04X]" (ord c)
|
||||||
|
|
||||||
|
|
||||||
|
toSuperscript :: Char -> Maybe Char
|
||||||
|
toSuperscript '1' = Just '\x00B9'
|
||||||
|
toSuperscript '2' = Just '\x00B2'
|
||||||
|
toSuperscript '3' = Just '\x00B3'
|
||||||
|
toSuperscript '+' = Just '\x207A'
|
||||||
|
toSuperscript '-' = Just '\x207B'
|
||||||
|
toSuperscript '=' = Just '\x207C'
|
||||||
|
toSuperscript '(' = Just '\x207D'
|
||||||
|
toSuperscript ')' = Just '\x207E'
|
||||||
|
toSuperscript c
|
||||||
|
| c >= '0' && c <= '9' =
|
||||||
|
Just $ chr (0x2070 + (ord c - 48))
|
||||||
|
| isSpace c = Just c
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
toSubscript :: Char -> Maybe Char
|
||||||
|
toSubscript '+' = Just '\x208A'
|
||||||
|
toSubscript '-' = Just '\x208B'
|
||||||
|
toSubscript '=' = Just '\x208C'
|
||||||
|
toSubscript '(' = Just '\x208D'
|
||||||
|
toSubscript ')' = Just '\x208E'
|
||||||
|
toSubscript c
|
||||||
|
| c >= '0' && c <= '9' =
|
||||||
|
Just $ chr (0x2080 + (ord c - 48))
|
||||||
|
| isSpace c = Just c
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
156
test/command/4528.md
Normal file
156
test/command/4528.md
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
# Rendering small caps, superscripts and subscripts with and without `raw_html`
|
||||||
|
|
||||||
|
## Small caps
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f latex -t commonmark-raw_html
|
||||||
|
This has \textsc{small caps} in it.
|
||||||
|
^D
|
||||||
|
This has SMALL CAPS in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f latex -t commonmark+raw_html
|
||||||
|
This has \textsc{small caps} in it.
|
||||||
|
^D
|
||||||
|
This has <span class="smallcaps">small caps</span> in it.
|
||||||
|
```
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f latex -t markdown_strict+raw_html
|
||||||
|
This has \textsc{small caps} in it.
|
||||||
|
^D
|
||||||
|
This has <span class="smallcaps">small caps</span> in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Strikeout
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark-raw_html-strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has strikeout in it.
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark+raw_html-strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark-raw_html+strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has ~~strikeout~~ in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark+raw_html+strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has ~~strikeout~~ in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict-raw_html-strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has strikeout in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict+raw_html-strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict-raw_html+strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has ~~strikeout~~ in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict+raw_html+strikeout
|
||||||
|
This has <s>strikeout</s> in it.
|
||||||
|
^D
|
||||||
|
This has ~~strikeout~~ in it.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Superscript
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark-raw_html
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2 3</sup> again. With emphasis: <sup><em>2</em> 3</sup>. With letters: <sup>foo</sup>. With a span: <sup><span class=foo>2</span></sup>.
|
||||||
|
^D
|
||||||
|
This has ^(superscript) in it and ² ³ again. With emphasis: ^(*2* 3). With letters: ^(foo). With a span: ².
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark+raw_html
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||||
|
^D
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict-raw_html-superscript
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||||
|
^D
|
||||||
|
This has ^(superscript) in it and ² again.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict+raw_html-superscript
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||||
|
^D
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict+raw_html+superscript
|
||||||
|
This has <sup>superscript</sup> in it and <sup>2</sup> again.
|
||||||
|
^D
|
||||||
|
This has ^superscript^ in it and ^2^ again.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Subscript
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark-raw_html
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2 3</sub> again. With emphasis: <sub><em>2</em> 3</sub>. With letters: <sub>foo</sub>. With a span: <sub><span class=foo>2</span></sub>.
|
||||||
|
^D
|
||||||
|
This has \_(subscript) in it and ₂ ₃ again. With emphasis: \_(*2* 3). With letters: \_(foo). With a span: ₂.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t commonmark+raw_html
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||||
|
^D
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict-raw_html-subscript
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||||
|
^D
|
||||||
|
This has _(subscript) in it and ₂ again.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict+raw_html-subscript
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||||
|
^D
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
% pandoc --wrap=none -f html -t markdown_strict+raw_html+subscript
|
||||||
|
This has <sub>subscript</sub> in it and <sub>2</sub> again.
|
||||||
|
^D
|
||||||
|
This has ~subscript~ in it and ~2~ again.
|
||||||
|
```
|
Loading…
Reference in a new issue