Markdown reader: improved parsing of indented raw HTML blocks.

Previously we inadvertently interpreted indented HTML as
code blocks.  This was a regression.

We now seek to determine the indentation level of the contents
of an HTML block, and (optionally) skip that much indentation.

As a side effect, indentation may be stripped off of raw
HTML blocks, if `markdown_in_html_blocks` is used. This
is better than having things interpreted as indented code
blocks.

Closes #1841.
This commit is contained in:
John MacFarlane 2017-05-06 22:56:16 +02:00
parent f20c89e243
commit 82cc7fb0d4
2 changed files with 49 additions and 1 deletions

View file

@ -1088,13 +1088,19 @@ rawTeXBlock = do
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
-- we don't want '<td> text' to be a code block:
skipMany spaceChar
indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
let closer = htmlTag (\x -> x ~== TagClose tagtype)
contents <- mconcat <$> many (notFollowedBy' closer >> block)
let block' = do notFollowedBy' closer
atMostSpaces indentlevel
block
contents <- mconcat <$> many block'
result <-
(closer >>= \(_, rawcloser) -> return (
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>

42
test/command/1841.md Normal file
View file

@ -0,0 +1,42 @@
```
% pandoc
<table>
<tr>
<td> *one*</td>
<td> [a link](http://google.com)</td>
</tr>
</table>
^D
<table>
<tr>
<td>
<em>one</em>
</td>
<td>
<a href="http://google.com">a link</a>
</td>
</tr>
</table>
```
```
% pandoc
<table>
<tr>
<td>*one*</td>
<td>[a link](http://google.com)</td>
</tr>
</table>
^D
<table>
<tr>
<td>
<em>one</em>
</td>
<td>
<a href="http://google.com">a link</a>
</td>
</tr>
</table>
```