From 82cc7fb0d462401b54bfe5172e7e49ab7b7302d9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 6 May 2017 22:56:16 +0200
Subject: [PATCH] Markdown reader: improved parsing of indented raw HTML
 blocks.

Previously we inadvertently interpreted indented HTML as
code blocks.  This was a regression.

We now seek to determine the indentation level of the contents
of an HTML block, and (optionally) skip that much indentation.

As a side effect, indentation may be stripped off of raw
HTML blocks, if `markdown_in_html_blocks` is used. This
is better than having things interpreted as indented code
blocks.

Closes #1841.
---
 src/Text/Pandoc/Readers/Markdown.hs |  8 +++++-
 test/command/1841.md                | 42 +++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 test/command/1841.md

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5515c735b..691d4d5cf 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1088,13 +1088,19 @@ rawTeXBlock = do
 rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
 rawHtmlBlocks = do
   (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+  -- we don't want '<td>    text' to be a code block:
+  skipMany spaceChar
+  indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
   -- try to find closing tag
   -- we set stateInHtmlBlock so that closing tags that can be either block or
   -- inline will not be parsed as inline tags
   oldInHtmlBlock <- stateInHtmlBlock <$> getState
   updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
   let closer = htmlTag (\x -> x ~== TagClose tagtype)
-  contents <- mconcat <$> many (notFollowedBy' closer >> block)
+  let block' = do notFollowedBy' closer
+                  atMostSpaces indentlevel
+                  block
+  contents <- mconcat <$> many block'
   result <-
     (closer >>= \(_, rawcloser) -> return (
                 return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
diff --git a/test/command/1841.md b/test/command/1841.md
new file mode 100644
index 000000000..408f224bd
--- /dev/null
+++ b/test/command/1841.md
@@ -0,0 +1,42 @@
+```
+% pandoc
+<table>
+<tr>
+<td>    *one*</td>
+<td>    [a link](http://google.com)</td>
+</tr>
+</table>
+^D
+<table>
+<tr>
+<td>
+<em>one</em>
+</td>
+<td>
+<a href="http://google.com">a link</a>
+</td>
+</tr>
+</table>
+```
+
+```
+% pandoc
+<table>
+    <tr>
+        <td>*one*</td>
+        <td>[a link](http://google.com)</td>
+    </tr>
+</table>
+^D
+<table>
+<tr>
+<td>
+<em>one</em>
+</td>
+<td>
+<a href="http://google.com">a link</a>
+</td>
+</tr>
+</table>
+```
+