diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 52358e51e..9eeab620b 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -51,7 +51,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf ) +import Data.List ( intercalate, isInfixOf, isPrefixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) @@ -887,16 +887,18 @@ htmlTag :: Monad m => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do - lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter) - (next : _) <- getInput >>= return . canonicalizeTags . parseTags + lookAhead (char '<') + inp <- getInput + let (next : _) = canonicalizeTags $ parseTags inp guard $ f next - -- advance the parser case next of - TagComment s -> do + TagComment s + | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' return (next, "<!--" ++ s ++ "-->") + | otherwise -> fail "bogus comment mode, HTML5 parse error" _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 1cc00fd5e..8965d1d6e 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -169,6 +169,9 @@ tests = [ testGroup "inline code" "<del>test</del>" =?> rawBlock "html" "<del>" <> plain (str "test") <> rawBlock "html" "</del>" + , "invalid tag (issue #1820" =: + "</ div></.div>" =?> + para (text "</ div></.div>") ] , "unbalanced brackets" =: "[[[[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[[[[hi")