HTML reader: Simplified parsing of <script> sections.

I had previously assumed that we needed to ignore </script> occuring in a string literal or javascript comment. It turns out, though, that browsers aren't that smart.
2010-12-22 19:20:27 -08:00 · 2010-12-22 19:20:27 -08:00 · c08ca6fa6d
commit c08ca6fa6d
parent 4bfe140ed1
1 changed files with 1 additions and 24 deletions
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@ -288,32 +288,9 @@ htmlScript :: GenParser Char ParserState [Char]
 htmlScript = try $ do
  lookAhead $ htmlOpenTag "script"
  open <- anyHtmlTag
-  rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
+  rest <- manyTill anyChar (htmlEndTag "script")
  return $ open ++ rest ++ "</script>"

-scriptChunk :: GenParser Char ParserState [Char]
-scriptChunk = jsComment <|> jsString <|> jsChars
-  where jsComment = jsEndlineComment <|> jsMultilineComment
-        jsString  = jsSingleQuoteString <|> jsDoubleQuoteString
-        jsChars   = many1 (noneOf "<\"'*/") <|> count 1 anyChar
-        jsEndlineComment = try $ do
-           string "//"
-           res <- manyTill anyChar newline
-           return ("//" ++ res)
-        jsMultilineComment = try $ do
-           string "/*"
-           res <- manyTill anyChar (try $ string "*/")
-           return ("/*" ++ res ++ "*/")
-        jsSingleQuoteString = stringwith '\''
-        jsDoubleQuoteString = stringwith '"'
-        charWithEsc escapable = try $
-           (try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x])
-          <|> count 1 anyChar
-        stringwith c = try $ do
-           char c
-           res <- liftM concat $ manyTill (charWithEsc [c]) (char c)
-           return (c : (res ++ [c]))
-
 -- | Parses material between style tags.
 -- Style tags must be treated differently, because they can contain CSS
 htmlStyle :: GenParser Char ParserState [Char]