HTML reader: Simplified parsing of <script> sections.

I had previously assumed that we needed to ignore
</script> occuring in a string literal or javascript
comment.  It turns out, though, that browsers aren't
that smart.
This commit is contained in:
John MacFarlane 2010-12-22 19:20:27 -08:00
parent 4bfe140ed1
commit c08ca6fa6d

View file

@ -288,32 +288,9 @@ htmlScript :: GenParser Char ParserState [Char]
htmlScript = try $ do
lookAhead $ htmlOpenTag "script"
open <- anyHtmlTag
rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
rest <- manyTill anyChar (htmlEndTag "script")
return $ open ++ rest ++ "</script>"
scriptChunk :: GenParser Char ParserState [Char]
scriptChunk = jsComment <|> jsString <|> jsChars
where jsComment = jsEndlineComment <|> jsMultilineComment
jsString = jsSingleQuoteString <|> jsDoubleQuoteString
jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar
jsEndlineComment = try $ do
string "//"
res <- manyTill anyChar newline
return ("//" ++ res)
jsMultilineComment = try $ do
string "/*"
res <- manyTill anyChar (try $ string "*/")
return ("/*" ++ res ++ "*/")
jsSingleQuoteString = stringwith '\''
jsDoubleQuoteString = stringwith '"'
charWithEsc escapable = try $
(try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x])
<|> count 1 anyChar
stringwith c = try $ do
char c
res <- liftM concat $ manyTill (charWithEsc [c]) (char c)
return (c : (res ++ [c]))
-- | Parses material between style tags.
-- Style tags must be treated differently, because they can contain CSS
htmlStyle :: GenParser Char ParserState [Char]