Allow markdown tables without headers.

Resolves Issue #50. The new syntax is described in README. Also allow optional line of dashes at bottom of simple tables. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1652 788f1e2b-df1e-0410-8736-df70ead52e1b
2009-12-05 21:34:46 +00:00 · 2009-12-05 21:34:46 +00:00 · ad5450266c
commit ad5450266c
parent de38adc7f3
2 changed files with 87 additions and 31 deletions
--- a/40
+++ b/40
@ -705,9 +705,23 @@ to the dashed line below it:[^3]
 [^3]:  This scheme is due to Michel Fortin, who proposed it on the
       [Markdown discussion list](http://six.pairlist.net/pipermail/markdown-discuss/2005-March/001097.html).
-The table must end with a blank line.  Optionally, a caption may be
+The table must end with a blank line, or a line of dashes followed by
-provided (as illustrated in the example above).  A caption is a paragraph
+a blank line. A caption may optionally be provided (as illustrated in
-beginning with the string `Table:`, which will be stripped off.
+the example above). A caption is a paragraph beginning with the string
 `Table:`, which will be stripped off.
 The column headers may be omitted, provided a dashed line is used
 to end the table. For example:
    -------     ------ ----------   -------
         12     12        12             12
        123     123       123           123
          1     1          1              1
    -------     ------ ----------   -------
 When headers are omitted, column alignments are determined on the basis
 of the first line of the table body. So, in the tables above, the columns
 would be right, left, center, and right aligned, respectively.
 Multiline tables allow headers and table rows to span multiple lines
 of text.  Here is an example:
@ -729,7 +743,8 @@ of text.  Here is an example:
 These work like simple tables, but with the following differences:
-  - They must begin with a row of dashes, before the header text.
+  - They must begin with a row of dashes, before the header text
    (unless the headers are omitted).
  - They must end with a row of dashes, then a blank line.
  - The rows must be separated by blank lines.
@ -738,6 +753,23 @@ the columns, and the writers try to reproduce these relative widths in
 the output. So, if you find that one of the columns is too narrow in the
 output, try widening it in the markdown source.
 Headers may be omitted in multiline tables as well as simple tables:
    ----------- ------- --------------- -------------------------
       First    row                12.0 Example of a row that
                                        spans multiple lines.
      Second    row                 5.0 Here's another one. Note
                                        the blank line between
                                        rows.
    -------------------------------------------------------------
    Table: Here's a multiline table without headers.
 It is possible for a multiline table to have just one row, but the row
 should be followed by a blank line (and then the row of dashes that ends
 the table), or the table may be interpreted as a simple table.
 Delimited Code blocks
 ---------------------
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@ -45,7 +45,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
                                  htmlBlockElement, htmlComment, unsanitaryURI )
 import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM)
+import Control.Monad (when, liftM, unless)
 -- | Read markdown from an input string and return a Pandoc document.
 readMarkdown :: ParserState -- ^ Parser state, including options for parser
@ -109,7 +109,7 @@ failUnlessBeginningOfLine = do
 failUnlessSmart :: GenParser tok ParserState ()
 failUnlessSmart = do
  state <- getState
-  if stateSmart state then return () else fail "Smart typography feature"
+  if stateSmart state then return () else pzero
 -- | Parse a sequence of inline elements between square brackets,
 -- including inlines between balanced pairs of square brackets.
@ -118,9 +118,7 @@ inlinesInBalancedBrackets :: GenParser Char ParserState Inline
 inlinesInBalancedBrackets parser = try $ do
  char '['
  result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
-                                                if res == "["
+                                                unless (res == "[") pzero
                                                   then return ()
                                                   else pzero
                           bal <- inlinesInBalancedBrackets parser
                           return $ [Str "["] ++ bal ++ [Str "]"])
                       <|> (count 1 parser))
@ -678,26 +676,36 @@ dashedLine ch = do
  return $ (length dashes, length $ dashes ++ sp)
 -- Parse a table header with dashed lines of '-' preceded by 
-- one line of text.
+-- one (or zero) line of text.
-simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
+simpleTableHeader :: Bool  -- ^ Headerless table 
-simpleTableHeader = try $ do
+                  -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
-  rawContent  <- anyLine
+simpleTableHeader headless = try $ do
  rawContent  <- if headless
                    then return ""
                    else anyLine
  initSp      <- nonindentSpaces
  dashes      <- many1 (dashedLine '-')
  newline
  let (lengths, lines') = unzip dashes
  let indices  = scanl (+) (length initSp) lines'
-  let rawHeads = tail $ splitByIndices (init indices) rawContent
+  -- If no header, calculate alignment on basis of first row of text
  rawHeads <- liftM (tail . splitByIndices (init indices)) $
              if headless
                 then lookAhead anyLine 
                 else return rawContent
  let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
-  return (rawHeads, aligns, indices)
+  let rawHeads' = if headless
                     then replicate (length dashes) ""
                     else rawHeads 
  return (rawHeads', aligns, indices)
 -- Parse a table footer - dashed lines followed by blank line.
 tableFooter :: GenParser Char ParserState [Char]
 tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
 -- Parse a table separator - dashed line.
-tableSep :: GenParser Char ParserState String
+tableSep :: GenParser Char ParserState Char
-tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> string "\n"
+tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
 -- Parse a raw line and split it into chunks by indices.
 rawTableLine :: [Int]
@ -772,9 +780,11 @@ tableWith headerParser lineParser footerParser = try $ do
    return $ Table caption aligns widths heads lines'
 -- Parse a simple table with '---' header and one line per row.
-simpleTable :: GenParser Char ParserState Block
+simpleTable :: Bool  -- ^ Headerless table
-simpleTable = do
+            -> GenParser Char ParserState Block
-  Table c a _w h l <- tableWith simpleTableHeader tableLine blanklines
+simpleTable headless = do
  Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
               (if headless then tableFooter else tableFooter <|> blanklines)
  -- Simple tables get 0s for relative column widths (i.e., use default)
  return $ Table c a (replicate (length a) 0) h l
@ -782,23 +792,36 @@ simpleTable = do
 -- (which may be multiline), then the rows,
 -- which may be multiline, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-multilineTable :: GenParser Char ParserState Block
+multilineTable :: Bool -- ^ Headerless table
-multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+               -> GenParser Char ParserState Block
 multilineTable headless =
  tableWith (multilineTableHeader headless) multilineRow tableFooter
-multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int])
+multilineTableHeader :: Bool -- ^ Headerless table
-multilineTableHeader = try $ do
+                     -> GenParser Char ParserState ([String], [Alignment], [Int])
-  tableSep 
+multilineTableHeader headless = try $ do
-  rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+  if headless
     then return '\n'
     else tableSep
  rawContent  <- if headless
                    then return $ repeat "" 
                    else many1
                         (notFollowedBy tableSep >> many1Till anyChar newline)
  initSp      <- nonindentSpaces
  dashes      <- many1 (dashedLine '-')
  newline
  let (lengths, lines') = unzip dashes
  let indices  = scanl (+) (length initSp) lines'
-  let rawHeadsList = transpose $ map 
+  rawHeadsList <- if headless
                     then liftM (map (:[]) . tail .
                              splitByIndices (init indices)) $ lookAhead anyLine
                     else return $ transpose $ map 
                           (\ln -> tail $ splitByIndices (init indices) ln)
                           rawContent
  let rawHeads = map (intercalate " ") rawHeadsList
  let aligns   = zipWith alignType rawHeadsList lengths
  let rawHeads = if headless
                    then replicate (length dashes) ""
                    else map (intercalate " ") rawHeadsList
  return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
 -- Returns an alignment type for a table, based on a list of strings
@ -820,7 +843,8 @@ alignType strLst len =
        (False, False)   -> AlignDefault
 table :: GenParser Char ParserState Block
-table = simpleTable <|> multilineTable <?> "table"
+table = multilineTable False <|> simpleTable True <|>
        simpleTable False <|> multilineTable True <?> "table"
 -- 
 -- inline