From 22bd34fa338edc5fb38cce5adfa11ffbc451b0d4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 22 Sep 2012 13:59:30 -0700 Subject: [PATCH] Markdown reader: Fixed link parser to avoid exponential slowdowns. Previously the parser would hang on input like this: [[[[[[[[[[[[[[[[[[hi We fixed this by making the link parser parser characters between balanced brackets (skipping brackets in inline code spans), then parsing the result as an inline list. One change is that [hi *there]* bud](/url) is now no longer parsed as a link. But in this respect pandoc behaved differently from most other implementations anyway, so that seems okay. All current tests pass. Added test for this case. Closes #620. --- src/Text/Pandoc/Readers/Markdown.hs | 20 ++++++++++++-------- tests/Tests/Readers/Markdown.hs | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1c2cc12f1..d69348e30 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -132,15 +132,19 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines) -inlinesInBalancedBrackets = try $ do +inlinesInBalancedBrackets = charsInBalancedBrackets >>= + parseFromString (trimInlinesF . mconcat <$> many inline) + +charsInBalancedBrackets :: Parser [Char] ParserState [Char] +charsInBalancedBrackets = do char '[' - result <- manyTill ( (do lookAhead $ try $ do x <- inline - guard (runF x def == B.str "[") - bal <- inlinesInBalancedBrackets - return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal) - <|> inline) - (char ']') - return $ mconcat result + result <- manyTill ( many1 (noneOf "`[]\n") + <|> (snd <$> withRaw code) + <|> ((\xs -> '[' : xs ++ "]") <$> charsInBalancedBrackets) + <|> count 1 (satisfy (/='\n')) + <|> (newline >> notFollowedBy blankline >> return "\n") + ) (char ']') + return $ concat result -- -- document structure diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 9a79f12f4..1eb2ce31e 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -48,6 +48,8 @@ tests = [ testGroup "inline code" [ "in URL" =: "\\begin\n" =?> para (text "\\begin") ] + , "unbalanced brackets" =: + "[[[[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[[[[hi") , testGroup "backslash escapes" [ "in URL" =: "[hi](/there\\))"