From 943c2f353d20da99286e6487736cbccbaf1d2c72 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Fri, 12 Sep 2008 00:05:32 +0000 Subject: [PATCH] Changed list parser so that only the starting list marker matters: 1. one - two (b) three produces an ordered list with 1., 2., 3. This is the behavior of Markdown.pl. Modified README to document the new behavior. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1438 788f1e2b-df1e-0410-8736-df70ead52e1b --- README | 23 ++-------- Text/Pandoc/Readers/Markdown.hs | 77 ++++++++++++++------------------- 2 files changed, 36 insertions(+), 64 deletions(-) diff --git a/README b/README index d5cbcb340..5f26c8a13 100644 --- a/README +++ b/README @@ -525,12 +525,13 @@ roman numerals: ii. subtwo iii. subthree -Note that Pandoc pays attention only to the *starting* number in a list. +Note that Pandoc pays attention only to the *starting* marker in a list. So, the following yields a list numbered sequentially starting from 2: (2) Two (5) Three - (2) Four + 1. Four + * Five If default list markers are desired, use '`#.`': @@ -538,24 +539,6 @@ If default list markers are desired, use '`#.`': #. two #. three -If you change list style in mid-list, Pandoc will notice and assume you -are starting a sublist. So, - - 1. One - 2. Two - A. Sub - B. Sub - 3. Three - -gets treated as if it were - - 1. One - 2. Two - A. Sub - B. Sub - 3. Three - - Definition lists ---------------- diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs index e2a98dd6d..29e47e37b 100644 --- a/Text/Pandoc/Readers/Markdown.hs +++ b/Text/Pandoc/Readers/Markdown.hs @@ -34,7 +34,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate ) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit ) +import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit, isUpper ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -441,71 +441,59 @@ anyOrderedListStart = try $ do char '.' spaceChar return (1, DefaultStyle, DefaultDelim) - else anyOrderedListMarker >>~ spaceChar + else do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper)) + else spaceChar + skipSpaces + return (num, style, delim) -orderedListStart :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char ParserState () -orderedListStart style delim = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - state <- getState - num <- if stateStrict state - then do many1 digit - char '.' - return 1 - else orderedListMarker style delim - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (spaceChar >> spaceChar) - else spaceChar - skipSpaces +listStart :: GenParser Char ParserState () +listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) -listLine :: GenParser Char ParserState () - -> GenParser Char ParserState [Char] -listLine start = try $ do - notFollowedBy' start +listLine :: GenParser Char ParserState [Char] +listLine = try $ do + notFollowedBy' listStart notFollowedBy blankline notFollowedBy' (do indentSpaces many (spaceChar) - bulletListStart <|> (anyOrderedListStart >> return ())) + listStart) line <- manyTill anyChar newline return $ line ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState () - -> GenParser Char ParserState [Char] -rawListItem start = try $ do - start - result <- many1 (listLine start) +rawListItem :: GenParser Char ParserState [Char] +rawListItem = try $ do + listStart + result <- many1 listLine blanks <- many blankline return $ concat result ++ blanks -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: GenParser Char ParserState () -> GenParser Char ParserState [Char] -listContinuation start = try $ do +listContinuation :: GenParser Char ParserState [Char] +listContinuation = try $ do lookAhead indentSpaces - result <- many1 (listContinuationLine start) + result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -listContinuationLine :: GenParser Char ParserState () - -> GenParser Char ParserState [Char] -listContinuationLine start = try $ do +listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine = try $ do notFollowedBy blankline - notFollowedBy' start + notFollowedBy' listStart optional indentSpaces result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: GenParser Char ParserState () - -> GenParser Char ParserState [Block] -listItem start = try $ do - first <- rawListItem start - continuations <- many (listContinuation start) +listItem :: GenParser Char ParserState [Block] +listItem = try $ do + first <- rawListItem + continuations <- many listContinuation -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -521,12 +509,13 @@ listItem start = try $ do orderedList :: GenParser Char ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 (listItem (orderedListStart style delim)) + items <- many1 listItem return $ OrderedList (start, style, delim) $ compactify items bulletList :: GenParser Char ParserState Block -bulletList = many1 (listItem bulletListStart) >>= - return . BulletList . compactify +bulletList = try $ do + lookAhead bulletListStart + many1 listItem >>= return . BulletList . compactify -- definition lists