From 97c9691696744a6e56a28dea0221e6230b029ce4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 2 Jan 2016 22:28:07 -0800
Subject: [PATCH] Textile reader:  don't allow block HTML tags in inline
 contexts.

The reader previously did allow this, following redcloth,
which happily parses

    Html blocks can be <div>inlined</div> as well.

as

    <p>Html blocks can be <div>inlined</div> as well.</p>

This is invalid HTML, and this kind of thing can lead
to parsing problems (stack overflows) as well.  So this
commit undoes this behavior.  The above sample now produces;

    <p>Html blocks can be</p>
    <div>
    <p>inlined</p>
    </div>
    <p>as well.</p>
---
 src/Text/Pandoc/Readers/Textile.hs | 4 ++--
 tests/textile-reader.native        | 9 +++++----
 tests/textile-reader.textile       | 5 +----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 355285f54..dd1d289a3 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -57,7 +57,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
 import Text.Pandoc.Shared (trim)
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
@@ -504,7 +504,7 @@ endline = try $ do
   return B.linebreak
 
 rawHtmlInline :: Parser [Char] ParserState Inlines
-rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
 rawLaTeXInline' :: Parser [Char] ParserState Inlines
diff --git a/tests/textile-reader.native b/tests/textile-reader.native
index df727a8bb..fe2c7be24 100644
--- a/tests/textile-reader.native
+++ b/tests/textile-reader.native
@@ -150,10 +150,11 @@ Pandoc (Meta {unMeta = fromList []})
 ,RawBlock (Format "html") "<div class=\"foobar\">"
 ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
 ,RawBlock (Format "html") "</div>"
-,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "<div>",Str "inlined",RawInline (Format "html") "</div>",Space,Str "as",Space,Str "well."]
-,BulletList
- [[Plain [Str "this",Space,RawInline (Format "html") "<div>",Space,Str "won't",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "</div>"]]
- ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") "</strong>"]]]
+,Para [Str "Html",Space,Str "blocks",Space,Str "can"]
+,RawBlock (Format "html") "<div>"
+,Para [Str "interrupt",Space,Str "paragraphs"]
+,RawBlock (Format "html") "</div>"
+,Para [Str "as",Space,Str "well."]
 ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
 ,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"]
 ,Para [Str "PBS (Public Broadcasting System)"]
diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile
index dab73b39f..d5d7378b0 100644
--- a/tests/textile-reader.textile
+++ b/tests/textile-reader.textile
@@ -228,10 +228,7 @@ However, <strong> raw HTML inlines </strong> are accepted, as well as :
   any *Raw HTML Block* with bold
 </div>
 
-Html blocks can be <div>inlined</div> as well. 
-
-* this <div> won't produce raw html blocks </div>
-* but this <strong> will produce inline html </strong>
+Html blocks can <div>interrupt paragraphs</div> as well.
 
 Can you prove that 2 < 3 ?