Markdown reader: improved parsing of indented raw HTML blocks.
Previously we inadvertently interpreted indented HTML as code blocks. This was a regression. We now seek to determine the indentation level of the contents of an HTML block, and (optionally) skip that much indentation. As a side effect, indentation may be stripped off of raw HTML blocks, if `markdown_in_html_blocks` is used. This is better than having things interpreted as indented code blocks. Closes #1841.
This commit is contained in:
parent
f20c89e243
commit
82cc7fb0d4
2 changed files with 49 additions and 1 deletions
|
@ -1088,13 +1088,19 @@ rawTeXBlock = do
|
|||
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
|
||||
rawHtmlBlocks = do
|
||||
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
|
||||
-- we don't want '<td> text' to be a code block:
|
||||
skipMany spaceChar
|
||||
indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
|
||||
-- try to find closing tag
|
||||
-- we set stateInHtmlBlock so that closing tags that can be either block or
|
||||
-- inline will not be parsed as inline tags
|
||||
oldInHtmlBlock <- stateInHtmlBlock <$> getState
|
||||
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
|
||||
let closer = htmlTag (\x -> x ~== TagClose tagtype)
|
||||
contents <- mconcat <$> many (notFollowedBy' closer >> block)
|
||||
let block' = do notFollowedBy' closer
|
||||
atMostSpaces indentlevel
|
||||
block
|
||||
contents <- mconcat <$> many block'
|
||||
result <-
|
||||
(closer >>= \(_, rawcloser) -> return (
|
||||
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
|
||||
|
|
42
test/command/1841.md
Normal file
42
test/command/1841.md
Normal file
|
@ -0,0 +1,42 @@
|
|||
```
|
||||
% pandoc
|
||||
<table>
|
||||
<tr>
|
||||
<td> *one*</td>
|
||||
<td> [a link](http://google.com)</td>
|
||||
</tr>
|
||||
</table>
|
||||
^D
|
||||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
<em>one</em>
|
||||
</td>
|
||||
<td>
|
||||
<a href="http://google.com">a link</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc
|
||||
<table>
|
||||
<tr>
|
||||
<td>*one*</td>
|
||||
<td>[a link](http://google.com)</td>
|
||||
</tr>
|
||||
</table>
|
||||
^D
|
||||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
<em>one</em>
|
||||
</td>
|
||||
<td>
|
||||
<a href="http://google.com">a link</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
```
|
||||
|
Loading…
Add table
Reference in a new issue