Changed parsing of code blocks in HTML reader:

+ <code> tag is no longer needed. <pre> suffices. + all HTML tags in the code block (e.g. for syntax highlighting) are skipped, because they are not portable to other output formats. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1022 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-09-17 02:49:28 +00:00 · 2007-09-17 02:49:28 +00:00 · 6f16d52c11
commit 6f16d52c11
parent 2094534b3c
1 changed files with 8 additions and 7 deletions
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@ -100,7 +100,7 @@ extractTagType ('<':rest) =
  map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
 extractTagType _ = ""

-- | Parse any HTML tag (closing or opening) and return text of tag
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
 anyHtmlTag = try $ do
  char '<'
  spaces
@ -313,15 +313,16 @@ hrule = try  $ do
 -- code blocks
 --

+-- Note:  HTML tags in code blocks (e.g. for syntax highlighting) are 
+-- skipped, because they are not portable to output formats other than HTML.
 codeBlock = try $ do
    htmlTag "pre" 
-    spaces
-    htmlTag "code"
-    result <- manyTill anyChar (htmlEndTag "code")
-    spaces
-    htmlEndTag "pre"
+    result <- manyTill 
+              (many1 (satisfy (/= '<')) <|> 
+               ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+              (htmlEndTag "pre")
    return $ CodeBlock $ stripTrailingNewlines $ 
-             decodeCharacterReferences result
+             decodeCharacterReferences $ concat result

 --
 -- block quotes