HTML reader: Simplified parsing of <script> sections.
I had previously assumed that we needed to ignore </script> occuring in a string literal or javascript comment. It turns out, though, that browsers aren't that smart.
This commit is contained in:
parent
4bfe140ed1
commit
c08ca6fa6d
1 changed files with 1 additions and 24 deletions
|
@ -288,32 +288,9 @@ htmlScript :: GenParser Char ParserState [Char]
|
|||
htmlScript = try $ do
|
||||
lookAhead $ htmlOpenTag "script"
|
||||
open <- anyHtmlTag
|
||||
rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
|
||||
rest <- manyTill anyChar (htmlEndTag "script")
|
||||
return $ open ++ rest ++ "</script>"
|
||||
|
||||
scriptChunk :: GenParser Char ParserState [Char]
|
||||
scriptChunk = jsComment <|> jsString <|> jsChars
|
||||
where jsComment = jsEndlineComment <|> jsMultilineComment
|
||||
jsString = jsSingleQuoteString <|> jsDoubleQuoteString
|
||||
jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar
|
||||
jsEndlineComment = try $ do
|
||||
string "//"
|
||||
res <- manyTill anyChar newline
|
||||
return ("//" ++ res)
|
||||
jsMultilineComment = try $ do
|
||||
string "/*"
|
||||
res <- manyTill anyChar (try $ string "*/")
|
||||
return ("/*" ++ res ++ "*/")
|
||||
jsSingleQuoteString = stringwith '\''
|
||||
jsDoubleQuoteString = stringwith '"'
|
||||
charWithEsc escapable = try $
|
||||
(try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x])
|
||||
<|> count 1 anyChar
|
||||
stringwith c = try $ do
|
||||
char c
|
||||
res <- liftM concat $ manyTill (charWithEsc [c]) (char c)
|
||||
return (c : (res ++ [c]))
|
||||
|
||||
-- | Parses material between style tags.
|
||||
-- Style tags must be treated differently, because they can contain CSS
|
||||
htmlStyle :: GenParser Char ParserState [Char]
|
||||
|
|
Loading…
Reference in a new issue