HTML reader: Modified htmlTag for fewer false positives.

A tag must start with `<` followed by `!`,`?`, `/`, or a letter.

This makes it more useful in the wikimedia and markdown parsers.
This commit is contained in:
John MacFarlane 2012-09-15 15:46:16 -04:00
parent c5de3c411f
commit 887fc14f3d

View file

@ -599,7 +599,7 @@ htmlInBalanced f = try $ do
-- | Matches a tag meeting a certain condition. -- | Matches a tag meeting a certain condition.
htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String) htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
htmlTag f = try $ do htmlTag f = try $ do
lookAhead (char '<') lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags (next : _) <- getInput >>= return . canonicalizeTags . parseTags
guard $ f next guard $ f next
-- advance the parser -- advance the parser