Changed list parser so that only the starting list marker matters:

1. one - two (b) three produces an ordered list with 1., 2., 3. This is the behavior of Markdown.pl. Modified README to document the new behavior. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1438 788f1e2b-df1e-0410-8736-df70ead52e1b
2008-09-12 00:05:32 +00:00 · 2008-09-12 00:05:32 +00:00 · 943c2f353d
commit 943c2f353d
parent c39a3fe7c4
2 changed files with 36 additions and 64 deletions
--- a/23
+++ b/23
@ -525,12 +525,13 @@ roman numerals:
          ii. subtwo
         iii. subthree
-Note that Pandoc pays attention only to the *starting* number in a list.
+Note that Pandoc pays attention only to the *starting* marker in a list.
 So, the following yields a list numbered sequentially starting from 2:
    (2) Two
    (5) Three
-    (2) Four
+    1.  Four
    *   Five
 If default list markers are desired, use '`#.`':
@ -538,24 +539,6 @@ If default list markers are desired, use '`#.`':
    #.  two
    #.  three
 If you change list style in mid-list, Pandoc will notice and assume you
 are starting a sublist. So,
    1.  One
    2.  Two
    A.  Sub
    B.  Sub
    3.  Three
 gets treated as if it were
    1.  One
    2.  Two
        A.  Sub
        B.  Sub
    3.  Three
 Definition lists
 ----------------
--- a/Text/Pandoc/Readers/Markdown.hs
+++ b/Text/Pandoc/Readers/Markdown.hs
@ -34,7 +34,7 @@ module Text.Pandoc.Readers.Markdown (
 import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate )
 import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit )
+import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit, isUpper )
 import Data.Maybe
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared 
@ -441,71 +441,59 @@ anyOrderedListStart = try $ do
             char '.'
             spaceChar
             return (1, DefaultStyle, DefaultDelim)
-     else anyOrderedListMarker >>~ spaceChar
+     else do (num, style, delim) <- anyOrderedListMarker
-
+             -- if it could be an abbreviated first name, insist on more than one space
 orderedListStart :: ListNumberStyle
                 -> ListNumberDelim
                 -> GenParser Char ParserState ()
 orderedListStart style delim = try $ do
  optional newline -- if preceded by a Plain block in a list context
  nonindentSpaces
  state <- getState
  num <- if stateStrict state
            then do many1 digit
                    char '.'
                    return 1
            else orderedListMarker style delim 
             if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
                num `elem` [1, 5, 10, 50, 100, 500, 1000]))
-     then char '\t' <|> (spaceChar >> spaceChar)
+                then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper))
                else spaceChar
             skipSpaces
             return (num, style, delim)
 listStart :: GenParser Char ParserState ()
 listStart = bulletListStart <|> (anyOrderedListStart >> return ())
 -- parse a line of a list item (start = parser for beginning of list item)
-listLine :: GenParser Char ParserState ()
+listLine :: GenParser Char ParserState [Char]
-         -> GenParser Char ParserState [Char]
+listLine = try $ do
-listLine start = try $ do
+  notFollowedBy' listStart
  notFollowedBy' start
  notFollowedBy blankline
  notFollowedBy' (do indentSpaces
                     many (spaceChar)
-                     bulletListStart <|> (anyOrderedListStart >> return ()))
+                     listStart)
  line <- manyTill anyChar newline
  return $ line ++ "\n"
 -- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState () 
+rawListItem :: GenParser Char ParserState [Char]
-            -> GenParser Char ParserState [Char]
+rawListItem = try $ do
-rawListItem start = try $ do
+  listStart
-  start
+  result <- many1 listLine
  result <- many1 (listLine start)
  blanks <- many blankline
  return $ concat result ++ blanks
 -- continuation of a list item - indented and separated by blankline 
 -- or (in compact lists) endline.
 -- note: nested lists are parsed as continuations
-listContinuation :: GenParser Char ParserState () -> GenParser Char ParserState [Char]
+listContinuation :: GenParser Char ParserState [Char]
-listContinuation start = try $ do
+listContinuation = try $ do
  lookAhead indentSpaces
-  result <- many1 (listContinuationLine start)
+  result <- many1 listContinuationLine
  blanks <- many blankline
  return $ concat result ++ blanks
-listContinuationLine :: GenParser Char ParserState ()
+listContinuationLine :: GenParser Char ParserState [Char]
-                     -> GenParser Char ParserState [Char]
+listContinuationLine = try $ do
 listContinuationLine start = try $ do
  notFollowedBy blankline
-  notFollowedBy' start
+  notFollowedBy' listStart
  optional indentSpaces
  result <- manyTill anyChar newline
  return $ result ++ "\n"
-listItem :: GenParser Char ParserState ()
+listItem :: GenParser Char ParserState [Block]
-         -> GenParser Char ParserState [Block]
+listItem = try $ do 
-listItem start = try $ do 
+  first <- rawListItem
-  first <- rawListItem start
+  continuations <- many listContinuation
  continuations <- many (listContinuation start)
  -- parsing with ListItemState forces markers at beginning of lines to
  -- count as list item markers, even if not separated by blank space.
  -- see definition of "endline"
@ -521,12 +509,13 @@ listItem start = try $ do
 orderedList :: GenParser Char ParserState Block
 orderedList = try $ do
  (start, style, delim) <- lookAhead anyOrderedListStart
-  items <- many1 (listItem (orderedListStart style delim))
+  items <- many1 listItem
  return $ OrderedList (start, style, delim) $ compactify items
 bulletList :: GenParser Char ParserState Block
-bulletList = many1 (listItem bulletListStart) >>= 
+bulletList = try $ do
-             return . BulletList . compactify
+  lookAhead bulletListStart
  many1 listItem >>= return . BulletList . compactify
 -- definition lists