Textile reader: don't allow block HTML tags in inline contexts.
The reader previously did allow this, following redcloth, which happily parses Html blocks can be <div>inlined</div> as well. as <p>Html blocks can be <div>inlined</div> as well.</p> This is invalid HTML, and this kind of thing can lead to parsing problems (stack overflows) as well. So this commit undoes this behavior. The above sample now produces; <p>Html blocks can be</p> <div> <p>inlined</p> </div> <p>as well.</p>
This commit is contained in:
parent
90a2df3f7c
commit
97c9691696
3 changed files with 8 additions and 10 deletions
|
@ -57,7 +57,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
|
|||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
|
||||
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
|
||||
import Text.Pandoc.Shared (trim)
|
||||
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
|
||||
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
|
||||
|
@ -504,7 +504,7 @@ endline = try $ do
|
|||
return B.linebreak
|
||||
|
||||
rawHtmlInline :: Parser [Char] ParserState Inlines
|
||||
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
|
||||
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
|
||||
|
||||
-- | Raw LaTeX Inline
|
||||
rawLaTeXInline' :: Parser [Char] ParserState Inlines
|
||||
|
|
|
@ -150,10 +150,11 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
,RawBlock (Format "html") "<div class=\"foobar\">"
|
||||
,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
|
||||
,RawBlock (Format "html") "</div>"
|
||||
,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "<div>",Str "inlined",RawInline (Format "html") "</div>",Space,Str "as",Space,Str "well."]
|
||||
,BulletList
|
||||
[[Plain [Str "this",Space,RawInline (Format "html") "<div>",Space,Str "won't",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "</div>"]]
|
||||
,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") "</strong>"]]]
|
||||
,Para [Str "Html",Space,Str "blocks",Space,Str "can"]
|
||||
,RawBlock (Format "html") "<div>"
|
||||
,Para [Str "interrupt",Space,Str "paragraphs"]
|
||||
,RawBlock (Format "html") "</div>"
|
||||
,Para [Str "as",Space,Str "well."]
|
||||
,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
|
||||
,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"]
|
||||
,Para [Str "PBS (Public Broadcasting System)"]
|
||||
|
|
|
@ -228,10 +228,7 @@ However, <strong> raw HTML inlines </strong> are accepted, as well as :
|
|||
any *Raw HTML Block* with bold
|
||||
</div>
|
||||
|
||||
Html blocks can be <div>inlined</div> as well.
|
||||
|
||||
* this <div> won't produce raw html blocks </div>
|
||||
* but this <strong> will produce inline html </strong>
|
||||
Html blocks can <div>interrupt paragraphs</div> as well.
|
||||
|
||||
Can you prove that 2 < 3 ?
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue