More principled fix for #1820.

If the tag parses as a comment, we check to see if the
input starts with `<!--`. If not, it's bogus comment mode
and we fail htmlTag.

Includes test case.  Closes #1820.
This commit is contained in:
John MacFarlane 2015-04-17 22:55:39 -07:00
parent aaf5e67624
commit 10e28ef750
2 changed files with 10 additions and 5 deletions

View file

@ -51,7 +51,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isInfixOf )
import Data.List ( intercalate, isInfixOf, isPrefixOf )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero, void, unless )
import Control.Arrow ((***))
@ -887,16 +887,18 @@ htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
lookAhead (char '<')
inp <- getInput
let (next : _) = canonicalizeTags $ parseTags inp
guard $ f next
-- advance the parser
case next of
TagComment s -> do
TagComment s
| "<!--" `isPrefixOf` inp -> do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
return (next, "<!--" ++ s ++ "-->")
| otherwise -> fail "bogus comment mode, HTML5 parse error"
_ -> do
rendered <- manyTill anyChar (char '>')
return (next, rendered ++ ">")

View file

@ -169,6 +169,9 @@ tests = [ testGroup "inline code"
"<del>test</del>" =?>
rawBlock "html" "<del>" <> plain (str "test") <>
rawBlock "html" "</del>"
, "invalid tag (issue #1820" =:
"</ div></.div>" =?>
para (text "</ div></.div>")
]
, "unbalanced brackets" =:
"[[[[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[[[[hi")