Modified rawHtmlBlock in HTML reader so it parses </html> and </body> tags.

This allows these tags to be handled correctly in Markdown. HTML reader now uses rawHtmlBlock', which excludes </html> and </body>, since these are handled in parseHtml. (Resolves Issue #38.) git-svn-id: https://pandoc.googlecode.com/svn/trunk@1152 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-12-23 03:46:12 +00:00 · 2007-12-23 03:46:12 +00:00 · 6802d287cf
commit 6802d287cf
parent fbecb49790
1 changed files with 7 additions and 2 deletions
--- a/Text/Pandoc/Readers/HTML.hs
+++ b/Text/Pandoc/Readers/HTML.hs
@ -199,12 +199,16 @@ htmlScript = try $ do
 htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]

 rawHtmlBlock = try $ do
-  notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
  body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag
  sp <- many space
  state <- getState
  if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null

+-- We don't want to parse </body> or </html> as raw HTML, since these
+-- are handled in parseHtml.
+rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+                   rawHtmlBlock
+
 -- | Parses an HTML comment.
 htmlComment = try $ do
  string "<!--"
@ -284,7 +288,8 @@ block = choice [ codeBlock
               , blockQuote
               , para
               , plain
-               , rawHtmlBlock ] <?> "block"
+               , rawHtmlBlock'
+               ] <?> "block"

 --
 -- header blocks