Markdown reader: Fixed link parser to avoid exponential slowdowns.

Previously the parser would hang on input like this:

    [[[[[[[[[[[[[[[[[[hi

We fixed this by making the link parser parser characters
between balanced brackets (skipping brackets in inline code spans),
then parsing the result as an inline list.

One change is that

    [hi *there]* bud](/url)

is now no longer parsed as a link.  But in this respect pandoc behaved
differently from most other implementations anyway, so that seems okay.
All current tests pass.  Added test for this case.

Closes #620.
This commit is contained in:
John MacFarlane 2012-09-22 13:59:30 -07:00
parent 1038606036
commit 22bd34fa33
2 changed files with 14 additions and 8 deletions

View file

@ -132,15 +132,19 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines)
inlinesInBalancedBrackets = try $ do
inlinesInBalancedBrackets = charsInBalancedBrackets >>=
parseFromString (trimInlinesF . mconcat <$> many inline)
charsInBalancedBrackets :: Parser [Char] ParserState [Char]
charsInBalancedBrackets = do
char '['
result <- manyTill ( (do lookAhead $ try $ do x <- inline
guard (runF x def == B.str "[")
bal <- inlinesInBalancedBrackets
return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal)
<|> inline)
(char ']')
return $ mconcat result
result <- manyTill ( many1 (noneOf "`[]\n")
<|> (snd <$> withRaw code)
<|> ((\xs -> '[' : xs ++ "]") <$> charsInBalancedBrackets)
<|> count 1 (satisfy (/='\n'))
<|> (newline >> notFollowedBy blankline >> return "\n")
) (char ']')
return $ concat result
--
-- document structure

View file

@ -48,6 +48,8 @@ tests = [ testGroup "inline code"
[ "in URL" =:
"\\begin\n" =?> para (text "\\begin")
]
, "unbalanced brackets" =:
"[[[[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[[[[hi")
, testGroup "backslash escapes"
[ "in URL" =:
"[hi](/there\\))"