From 22bd34fa338edc5fb38cce5adfa11ffbc451b0d4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 Sep 2012 13:59:30 -0700
Subject: [PATCH] Markdown reader: Fixed link parser to avoid exponential
 slowdowns.

Previously the parser would hang on input like this:

    [[[[[[[[[[[[[[[[[[hi

We fixed this by making the link parser parser characters
between balanced brackets (skipping brackets in inline code spans),
then parsing the result as an inline list.

One change is that

    [hi *there]* bud](/url)

is now no longer parsed as a link.  But in this respect pandoc behaved
differently from most other implementations anyway, so that seems okay.
All current tests pass.  Added test for this case.

Closes #620.
---
 src/Text/Pandoc/Readers/Markdown.hs | 20 ++++++++++++--------
 tests/Tests/Readers/Markdown.hs     |  2 ++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1c2cc12f1..d69348e30 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -132,15 +132,19 @@ litChar = escapedChar'
 -- | Parse a sequence of inline elements between square brackets,
 -- including inlines between balanced pairs of square brackets.
 inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines)
-inlinesInBalancedBrackets = try $ do
+inlinesInBalancedBrackets = charsInBalancedBrackets >>=
+  parseFromString (trimInlinesF . mconcat <$> many inline)
+
+charsInBalancedBrackets :: Parser [Char] ParserState [Char]
+charsInBalancedBrackets = do
   char '['
-  result <- manyTill ( (do lookAhead $ try $ do x <- inline
-                                                guard (runF x def == B.str "[")
-                           bal <- inlinesInBalancedBrackets
-                           return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal)
-                       <|> inline)
-                     (char ']')
-  return $ mconcat result
+  result <- manyTill (  many1 (noneOf "`[]\n")
+                    <|> (snd <$> withRaw code)
+                    <|> ((\xs -> '[' : xs ++ "]") <$> charsInBalancedBrackets)
+                    <|> count 1 (satisfy (/='\n'))
+                    <|> (newline >> notFollowedBy blankline >> return "\n")
+                     ) (char ']')
+  return $ concat result
 
 --
 -- document structure
diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs
index 9a79f12f4..1eb2ce31e 100644
--- a/tests/Tests/Readers/Markdown.hs
+++ b/tests/Tests/Readers/Markdown.hs
@@ -48,6 +48,8 @@ tests = [ testGroup "inline code"
           [ "in URL" =:
             "\\begin\n" =?> para (text "\\begin")
           ]
+        , "unbalanced brackets" =:
+            "[[[[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[[[[hi")
         , testGroup "backslash escapes"
           [ "in URL" =:
             "[hi](/there\\))"