From c08ca6fa6d58c6a52c93b126d1a704b8202f9a36 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 22 Dec 2010 19:20:27 -0800 Subject: [PATCH] HTML reader: Simplified parsing of occuring in a string literal or javascript comment. It turns out, though, that browsers aren't that smart. --- src/Text/Pandoc/Readers/HTML.hs | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 1bbb11e62..c25a73418 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -288,32 +288,9 @@ htmlScript :: GenParser Char ParserState [Char] htmlScript = try $ do lookAhead $ htmlOpenTag "script" open <- anyHtmlTag - rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") + rest <- manyTill anyChar (htmlEndTag "script") return $ open ++ rest ++ "" -scriptChunk :: GenParser Char ParserState [Char] -scriptChunk = jsComment <|> jsString <|> jsChars - where jsComment = jsEndlineComment <|> jsMultilineComment - jsString = jsSingleQuoteString <|> jsDoubleQuoteString - jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar - jsEndlineComment = try $ do - string "//" - res <- manyTill anyChar newline - return ("//" ++ res) - jsMultilineComment = try $ do - string "/*" - res <- manyTill anyChar (try $ string "*/") - return ("/*" ++ res ++ "*/") - jsSingleQuoteString = stringwith '\'' - jsDoubleQuoteString = stringwith '"' - charWithEsc escapable = try $ - (try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x]) - <|> count 1 anyChar - stringwith c = try $ do - char c - res <- liftM concat $ manyTill (charWithEsc [c]) (char c) - return (c : (res ++ [c])) - -- | Parses material between style tags. -- Style tags must be treated differently, because they can contain CSS htmlStyle :: GenParser Char ParserState [Char]