HTML reader: improved handling of tags that can be block or inline.

Previously a section like this would be enclosed in a paragraph,
with RawInline for the video tags (since video is a tag that can
be either block or inline):

    <video controls="controls">
       <source src="../videos/test.mp4" type="video/mp4" />
       <source src="../videos/test.webm" type="video/webm" />
       <p>
          The videos can not be played back on your system.<br/>
          Try viewing on Youtube (requires Internet connection):
          <a href="http://youtu.be/etE5urBps_w">Relative Velocity on
    Youtube</a>.
       </p>
    </video>

This change will cause the video and source tags to be parsed
as RawBlock instead, giving better output.

The general change is this:  when we're parsing a "plain" sequence
of inlines, we don't parse anything that COULD be a block-level tag.
This commit is contained in:
John MacFarlane 2014-08-18 12:41:09 -07:00
parent ee88f5662b
commit 6dce8c6760

View file

@ -91,16 +91,20 @@ replaceNotes' x = return x
data HTMLState =
HTMLState
{ parserState :: ParserState,
noteTable :: [(String, Blocks)]
noteTable :: [(String, Blocks)]
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
, inChapter :: Bool -- ^ Set if in chapter section
, inPlain :: Bool -- ^ Set if in pPlain
}
setInChapter :: HTMLParser s a -> HTMLParser s a
setInChapter = local (\s -> s {inChapter = True})
setInPlain :: HTMLParser s a -> HTMLParser s a
setInPlain = local (\s -> s {inPlain = True})
type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
type TagParser = HTMLParser [Tag String]
@ -141,8 +145,8 @@ block = do
, pTable
, pHead
, pBody
, pPlain
, pDiv
, pPlain
, pRawHtmlBlock
]
when tr $ trace (printf "line %d: %s" (sourceLine pos)
@ -422,7 +426,7 @@ pBlockQuote = do
pPlain :: TagParser Blocks
pPlain = do
contents <- trimInlines . mconcat <$> many1 inline
contents <- setInPlain $ trimInlines . mconcat <$> many1 inline
if B.isNull contents
then return mempty
else return $ B.plain contents
@ -579,7 +583,11 @@ pSpan = try $ do
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
inplain <- asks inPlain
result <- pSatisfy (tagComment (const True))
<|> if inplain
then pSatisfy (not . isBlockTag)
else pSatisfy isInlineTag
parseRaw <- getOption readerParseRaw
if parseRaw
then return $ B.rawInline "html" $ renderTags' [result]
@ -919,7 +927,7 @@ instance HasMeta HTMLState where
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
instance Default HTMLLocal where
def = HTMLLocal NoQuote False
def = HTMLLocal NoQuote False False
instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}