Textile reader: don't allow block HTML tags in inline contexts.

The reader previously did allow this, following redcloth,
which happily parses

    Html blocks can be <div>inlined</div> as well.

as

    <p>Html blocks can be <div>inlined</div> as well.</p>

This is invalid HTML, and this kind of thing can lead
to parsing problems (stack overflows) as well.  So this
commit undoes this behavior.  The above sample now produces;

    <p>Html blocks can be</p>
    <div>
    <p>inlined</p>
    </div>
    <p>as well.</p>
This commit is contained in:
John MacFarlane 2016-01-02 22:28:07 -08:00
parent 90a2df3f7c
commit 97c9691696
3 changed files with 8 additions and 10 deletions

View file

@ -57,7 +57,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
import Text.Pandoc.Shared (trim)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
@ -504,7 +504,7 @@ endline = try $ do
return B.linebreak
rawHtmlInline :: Parser [Char] ParserState Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
rawLaTeXInline' :: Parser [Char] ParserState Inlines

View file

@ -150,10 +150,11 @@ Pandoc (Meta {unMeta = fromList []})
,RawBlock (Format "html") "<div class=\"foobar\">"
,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
,RawBlock (Format "html") "</div>"
,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "<div>",Str "inlined",RawInline (Format "html") "</div>",Space,Str "as",Space,Str "well."]
,BulletList
[[Plain [Str "this",Space,RawInline (Format "html") "<div>",Space,Str "won't",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "</div>"]]
,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") "</strong>"]]]
,Para [Str "Html",Space,Str "blocks",Space,Str "can"]
,RawBlock (Format "html") "<div>"
,Para [Str "interrupt",Space,Str "paragraphs"]
,RawBlock (Format "html") "</div>"
,Para [Str "as",Space,Str "well."]
,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"]
,Para [Str "PBS (Public Broadcasting System)"]

View file

@ -228,10 +228,7 @@ However, <strong> raw HTML inlines </strong> are accepted, as well as :
any *Raw HTML Block* with bold
</div>
Html blocks can be <div>inlined</div> as well.
* this <div> won't produce raw html blocks </div>
* but this <strong> will produce inline html </strong>
Html blocks can <div>interrupt paragraphs</div> as well.
Can you prove that 2 < 3 ?