From 82cc7fb0d462401b54bfe5172e7e49ab7b7302d9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 6 May 2017 22:56:16 +0200 Subject: [PATCH] Markdown reader: improved parsing of indented raw HTML blocks. Previously we inadvertently interpreted indented HTML as code blocks. This was a regression. We now seek to determine the indentation level of the contents of an HTML block, and (optionally) skip that much indentation. As a side effect, indentation may be stripped off of raw HTML blocks, if `markdown_in_html_blocks` is used. This is better than having things interpreted as indented code blocks. Closes #1841. --- src/Text/Pandoc/Readers/Markdown.hs | 8 +++++- test/command/1841.md | 42 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 test/command/1841.md diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5515c735b..691d4d5cf 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1088,13 +1088,19 @@ rawTeXBlock = do rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag + -- we don't want '<td> text' to be a code block: + skipMany spaceChar + indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } let closer = htmlTag (\x -> x ~== TagClose tagtype) - contents <- mconcat <$> many (notFollowedBy' closer >> block) + let block' = do notFollowedBy' closer + atMostSpaces indentlevel + block + contents <- mconcat <$> many block' result <- (closer >>= \(_, rawcloser) -> return ( return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> diff --git a/test/command/1841.md b/test/command/1841.md new file mode 100644 index 000000000..408f224bd --- /dev/null +++ b/test/command/1841.md @@ -0,0 +1,42 @@ +``` +% pandoc +<table> +<tr> +<td> *one*</td> +<td> [a link](http://google.com)</td> +</tr> +</table> +^D +<table> +<tr> +<td> +<em>one</em> +</td> +<td> +<a href="http://google.com">a link</a> +</td> +</tr> +</table> +``` + +``` +% pandoc +<table> + <tr> + <td>*one*</td> + <td>[a link](http://google.com)</td> + </tr> +</table> +^D +<table> +<tr> +<td> +<em>one</em> +</td> +<td> +<a href="http://google.com">a link</a> +</td> +</tr> +</table> +``` +