Changed parsing of code blocks in HTML reader:

+ <code> tag is no longer needed.  <pre> suffices.
+ all HTML tags in the code block (e.g. for syntax highlighting)
  are skipped, because they are not portable to other output formats.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1022 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-09-17 02:49:28 +00:00
parent 2094534b3c
commit 6f16d52c11

View file

@ -100,7 +100,7 @@ extractTagType ('<':rest) =
map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
extractTagType _ = ""
-- | Parse any HTML tag (closing or opening) and return text of tag
-- | Parse any HTML tag (opening or self-closing) and return text of tag
anyHtmlTag = try $ do
char '<'
spaces
@ -313,15 +313,16 @@ hrule = try $ do
-- code blocks
--
-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
-- skipped, because they are not portable to output formats other than HTML.
codeBlock = try $ do
htmlTag "pre"
spaces
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
spaces
htmlEndTag "pre"
result <- manyTill
(many1 (satisfy (/= '<')) <|>
((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
(htmlEndTag "pre")
return $ CodeBlock $ stripTrailingNewlines $
decodeCharacterReferences result
decodeCharacterReferences $ concat result
--
-- block quotes