Multimarkdown sub- and superscripts (#5512) (#7188)

Added an extension `short_subsuperscripts` which modifies the behavior
of `subscript` and `superscript`, allowing subscripts or superscripts containing only
alphanumerics to end with a space character (eg. `x^2 = 4` or `H~2 is
combustible`).  This improves support for multimarkdown. Closes #5512.

Add `Ext_short_subsuperscripts` constructor to `Extension` [API change].
This is enabled by default for `markdown_mmd`.
This commit is contained in:
OCzarnecki 2021-08-16 06:57:57 +02:00 committed by GitHub
parent 72447a563c
commit e37cf4484d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 80 additions and 15 deletions

View file

@ -5304,6 +5304,18 @@ For elements that accept attributes, a `data-pos` attribute
is added; other elements are placed in a surrounding
Div or Span elemnet with a `data-pos` attribute.
#### Extension: `short_subsuperscript` ####
Parse multimarkdown style subscripts and superscripts, which start with
a '~' or '^' character, respectively, and include the alphanumeric sequence
that follows. For example:
x^2 = 4
or
Oxygen is O~2.
## Markdown variants
In addition to pandoc's extended Markdown, the following Markdown

View file

@ -124,6 +124,7 @@ data Extension =
| Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
| Ext_mmd_link_attributes -- ^ MMD style reference link attributes
| Ext_mmd_title_block -- ^ Multimarkdown metadata block
| Ext_short_subsuperscripts -- ^ sub-&superscripts w/o closing char (v~i)
| Ext_multiline_tables -- ^ Pandoc-style multiline tables
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
| Ext_native_spans -- ^ Use Span inlines for contents of <span>
@ -286,14 +287,9 @@ multimarkdownExtensions = extensionsFromList
, Ext_auto_identifiers
, Ext_mmd_header_identifiers
, Ext_implicit_figures
-- Note: MMD's syntax for superscripts and subscripts
-- is a bit more permissive than pandoc's, allowing
-- e^2 and a~1 instead of e^2^ and a~1~, so even with
-- these options we don't have full support for MMD
-- superscripts and subscripts, but there's no reason
-- not to include these:
, Ext_superscript
, Ext_short_subsuperscripts
, Ext_subscript
, Ext_superscript
, Ext_backtick_code_blocks
, Ext_spaced_reference_links
-- So far only in dev version of mmd:
@ -464,6 +460,7 @@ getAllExtensions f = universalExtensions <> getAll f
, Ext_gutenberg
, Ext_smart
, Ext_literate_haskell
, Ext_short_subsuperscripts
, Ext_rebase_relative_paths
]
getAll "markdown_strict" = allMarkdownExtensions

View file

@ -1692,21 +1692,29 @@ strikeout = fmap B.strikeout <$>
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = do
guardEnabled Ext_superscript
fmap B.superscript <$> try (do
char '^'
mconcat <$> many1Till (do notFollowedBy spaceChar
notFollowedBy newline
inline) (char '^'))
mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript))
where regularSuperscript = many1Till (do guardEnabled Ext_superscript
notFollowedBy spaceChar
notFollowedBy newline
inline) (char '^')
mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts
result <- take1WhileP isAlphaNum
return $ return $ return $ B.str result
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = do
guardEnabled Ext_subscript
fmap B.subscript <$> try (do
char '~'
mconcat <$> many1Till (do notFollowedBy spaceChar
notFollowedBy newline
inline) (char '~'))
mconcat <$> (try regularSubscript <|> mmdShortSubscript))
where regularSubscript = many1Till (do guardEnabled Ext_subscript
notFollowedBy spaceChar
notFollowedBy newline
inline) (char '~')
mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts
result <- take1WhileP isAlphaNum
return $ return $ return $ B.str result
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"

View file

@ -36,6 +36,9 @@ markdownGH :: Text -> Pandoc
markdownGH = purely $ readMarkdown def {
readerExtensions = githubMarkdownExtensions }
markdownMMD :: Text -> Pandoc
markdownMMD = purely $ readMarkdown def {
readerExtensions = multimarkdownExtensions }
infix 4 =:
(=:) :: ToString c
=> String -> (Text, c) -> TestTree
@ -360,6 +363,51 @@ tests = [ testGroup "inline code"
("**this should \"be bold**"
=?> para (strong "this should \8220be bold"))
]
, testGroup "sub- and superscripts"
[
test markdownMMD "normal subscript"
("H~2~"
=?> para ("H" <> subscript "2"))
, test markdownMMD "normal superscript"
("x^3^"
=?> para ("x" <> superscript "3"))
, test markdownMMD "short subscript delimeted by space"
("O~2 is dangerous"
=?> para ("O" <> subscript "2" <> space <> "is dangerous"))
, test markdownMMD "short subscript delimeted by newline"
("O~2\n"
=?> para ("O" <> subscript "2"))
, test markdownMMD "short subscript delimeted by EOF"
("O~2"
=?> para ("O" <> subscript "2"))
, test markdownMMD "short subscript delimited by punctuation"
("O~2."
=?> para ("O" <> subscript "2" <> "."))
, test markdownMMD "short subscript delimited by emph"
("O~2*combustible!*"
=?> para ("O" <> subscript "2" <> emph "combustible!"))
, test markdownMMD "no nesting in short subscripts"
("y~*2*"
=?> para ("y~" <> emph "2"))
, test markdownMMD "short superscript delimeted by space"
("x^2 = y"
=?> para ("x" <> superscript "2" <> space <> "= y"))
, test markdownMMD "short superscript delimeted by newline"
("x^2\n"
=?> para ("x" <> superscript "2"))
, test markdownMMD "short superscript delimeted by ExF"
("x^2"
=?> para ("x" <> superscript "2"))
, test markdownMMD "short superscript delimited by punctuation"
("x^2."
=?> para ("x" <> superscript "2" <> "."))
, test markdownMMD "short superscript delimited by emph"
("x^2*combustible!*"
=?> para ("x" <> superscript "2" <> emph "combustible!"))
, test markdownMMD "no nesting in short superscripts"
("y^*2*"
=?> para ("y^" <> emph "2"))
]
, testGroup "footnotes"
[ "indent followed by newline and flush-left text" =:
"[^1]\n\n[^1]: my note\n\n \nnot in note\n"