diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index b663f7fa9..fdf5aa332 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -59,9 +59,14 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, charRef, lineBlockLines, tableWith, + tableWith', widthsFromIndices, gridTableWith, gridTableWith', + TableComponents (..), + TableNormalization (..), + toTableComponents, + toTableComponents', readWith, readWithM, testStringWith, diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs index bdfcb2bb3..1c029df8a 100644 --- a/src/Text/Pandoc/Parsing/GridTable.hs +++ b/src/Text/Pandoc/Parsing/GridTable.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Parsing.GridTable @@ -14,6 +15,11 @@ module Text.Pandoc.Parsing.GridTable , tableWith , tableWith' , widthsFromIndices + -- * Components of a plain-text table + , TableComponents (..) + , TableNormalization (..) + , toTableComponents + , toTableComponents' ) where @@ -34,6 +40,65 @@ import Text.Parsec import qualified Data.Text as T import qualified Text.Pandoc.Builder as B +-- | Collection of components making up a Table block. +data TableComponents = TableComponents + { tableAttr :: Attr + , tableCaption :: Caption + , tableColSpecs :: [ColSpec] + , tableHead :: TableHead + , tableBodies :: [TableBody] + , tableFoot :: TableFoot + } + +-- | Creates a table block from the collection of table parts. +tableFromComponents :: TableComponents -> Blocks +tableFromComponents (TableComponents attr capt colspecs th tb tf) = + B.tableWith attr capt colspecs th tb tf + +-- | Bundles basic table components into a single value. +toTableComponents :: [Alignment] -> [Double] -> [Blocks] -> [[Blocks]] + -> TableComponents +toTableComponents = toTableComponents' NoNormalization + +-- | Bundles basic table components into a single value, performing +-- normalizations as necessary. +toTableComponents' :: TableNormalization + -> [Alignment] -> [Double] -> [Blocks] -> [[Blocks]] + -> TableComponents +toTableComponents' normalization aligns widths heads rows = + let th = TableHead nullAttr (toHeaderRow normalization heads) + tb = TableBody nullAttr 0 [] (map toRow rows) + tf = TableFoot nullAttr [] + colspecs = toColSpecs aligns widths + in TableComponents nullAttr B.emptyCaption colspecs th [tb] tf + +-- | Combine a list of column alignments and column widths into a list +-- of column specifiers. Both input lists should have the same length. +toColSpecs :: [Alignment] -- ^ column alignments + -> [Double] -- ^ column widths + -> [ColSpec] +toColSpecs aligns widths = zip aligns (map fromWidth widths') + where + fromWidth n + | n > 0 = ColWidth n + | otherwise = ColWidthDefault + + -- renormalize widths if greater than 100%: + totalWidth = sum widths + widths' = if totalWidth < 1 + then widths + else map (/ totalWidth) widths + +-- | Whether the table header should be normalized, i.e., whether an header row +-- with only empty cells should be omitted. +data TableNormalization + = NoNormalization + | NormalizeHeader + +-- +-- Grid Tables +-- + -- | Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, which may be grid, separated by -- blank lines, and ending with a footer (dashed line followed by blank @@ -50,11 +115,13 @@ gridTableWith blocks headless = -- Table. gridTableWith' :: (Monad m, Monad mf, HasReaderOptions st, HasLastStrPosition st) - => ParserT Sources st m (mf Blocks) -- ^ Block list parser + => TableNormalization + -> ParserT Sources st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table - -> ParserT Sources st m (TableComponents mf) -gridTableWith' blocks headless = - tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) + -> ParserT Sources st m (mf TableComponents) +gridTableWith' normalization blocks headless = + tableWith' normalization + (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter gridTableSplitLine :: [Int] -> Text -> [Text] @@ -162,44 +229,39 @@ gridTableFooter = optional blanklines -- 'lineParser', and 'footerParser'. tableWith :: (Stream s m Char, UpdateSourcePos s Char, HasReaderOptions st, Monad mf) - => ParserT s st m (mf [Blocks], [Alignment], [Int]) - -> ([Int] -> ParserT s st m (mf [Blocks])) - -> ParserT s st m sep - -> ParserT s st m end + => ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser + -> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser + -> ParserT s st m sep -- ^ line parser + -> ParserT s st m end -- ^ footer parser -> ParserT s st m (mf Blocks) -tableWith headerParser rowParser lineParser footerParser = try $ do - (aligns, widths, heads, rows) <- tableWith' headerParser rowParser - lineParser footerParser - let th = TableHead nullAttr <$> heads - tb = (:[]) . TableBody nullAttr 0 [] <$> rows - tf = pure $ TableFoot nullAttr [] - colspecs = zip aligns (map fromWidth widths) - return $ B.table B.emptyCaption colspecs <$> th <*> tb <*> tf - where - fromWidth n - | n > 0 = ColWidth n - | otherwise = ColWidthDefault - -type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row]) +tableWith hp rp lp fp = fmap tableFromComponents <$> + tableWith' NoNormalization hp rp lp fp tableWith' :: (Stream s m Char, UpdateSourcePos s Char, HasReaderOptions st, Monad mf) - => ParserT s st m (mf [Blocks], [Alignment], [Int]) - -> ([Int] -> ParserT s st m (mf [Blocks])) - -> ParserT s st m sep - -> ParserT s st m end - -> ParserT s st m (TableComponents mf) -tableWith' headerParser rowParser lineParser footerParser = try $ do - (heads, aligns, indices) <- headerParser - lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser - footerParser - numColumns <- getOption readerColumns - let widths = if null indices - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - let toRow = Row nullAttr . map B.simpleCell - toHeaderRow l = [toRow l | not (null l)] - return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines') + => TableNormalization + -> ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser + -> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser + -> ParserT s st m sep -- ^ line parser + -> ParserT s st m end -- ^ footer parser + -> ParserT s st m (mf TableComponents) +tableWith' n11n headerParser rowParser lineParser footerParser = try $ do + (heads, aligns, indices) <- headerParser + lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser + footerParser + numColumns <- getOption readerColumns + let widths = if null indices + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ toTableComponents' n11n aligns widths <$> heads <*> lines' + +toRow :: [Blocks] -> Row +toRow = Row nullAttr . map B.simpleCell + +toHeaderRow :: TableNormalization -> [Blocks] -> [Row] +toHeaderRow = \case + NoNormalization -> \l -> [toRow l | not (null l)] + NormalizeHeader -> \l -> [toRow l | not (null l) && not (all null l)] -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 307d09a12..4e0a1fa6a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) +import Data.Bifunctor (second) import Data.Char (isAlphaNum, isPunctuation, isSpace) import Text.DocLayout (realLength) import Data.List (transpose, elemIndex, sortOn, foldl') @@ -44,7 +45,7 @@ import Safe.Foldable (maximumBounded) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Walk (walk) -import Text.Pandoc.Parsing hiding (tableWith) +import Text.Pandoc.Parsing hiding (tableCaption) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) @@ -1298,14 +1299,18 @@ tableCaption = do -- Parse a simple table with '---' header and one line per row. simpleTable :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) + -> MarkdownParser m (F TableComponents) simpleTable headless = do - (aligns, _widths, heads', lines') <- - tableWith (simpleTableHeader headless) tableLine + tableComponents <- + tableWith' NormalizeHeader + (simpleTableHeader headless) tableLine (return ()) (if headless then tableFooter else tableFooter <|> blanklines') - -- Simple tables get 0s for relative column widths (i.e., use default) - return (aligns, replicate (length aligns) 0, heads', lines') + -- All columns in simple tables have default widths. + let useDefaultColumnWidths tc = + let cs' = map (second (const ColWidthDefault)) $ tableColSpecs tc + in tc {tableColSpecs = cs'} + return $ useDefaultColumnWidths <$> tableComponents -- Parse a multiline table: starts with row of '-' on top, then header -- (which may be multiline), then the rows, @@ -1313,9 +1318,10 @@ simpleTable headless = do -- ending with a footer (dashed line followed by blank line). multilineTable :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) + -> MarkdownParser m (F TableComponents) multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter + tableWith' NormalizeHeader (multilineTableHeader headless) + multilineRow blanklines tableFooter multilineTableHeader :: PandocMonad m => Bool -- ^ Headerless table @@ -1355,8 +1361,8 @@ multilineTableHeader headless = try $ do -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTable :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) -gridTable headless = gridTableWith' parseBlocks headless + -> MarkdownParser m (F TableComponents) +gridTable headless = gridTableWith' NormalizeHeader parseBlocks headless pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do @@ -1370,7 +1376,7 @@ pipeBreak = try $ do blankline return $ unzip (first:rest) -pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) +pipeTable :: PandocMonad m => MarkdownParser m (F TableComponents) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar @@ -1390,7 +1396,8 @@ pipeTable = try $ do else replicate (length aligns) 0.0 (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads' (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines'' - return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows) + return $ + toTableComponents' NormalizeHeader aligns widths <$> headCells <*> rows sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do @@ -1446,29 +1453,10 @@ scanForPipe = do (_, T.uncons -> Just ('|', _)) -> return () _ -> mzero --- | Parse a table using 'headerParser', 'rowParser', --- 'lineParser', and 'footerParser'. Variant of the version in --- Text.Pandoc.Parsing. -tableWith :: PandocMonad m - => MarkdownParser m (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser m (F [Blocks])) - -> MarkdownParser m sep - -> MarkdownParser m end - -> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) -tableWith headerParser rowParser lineParser footerParser = try $ do - (heads, aligns, indices) <- headerParser - lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser - footerParser - numColumns <- getOption readerColumns - let widths = if null indices - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines') - table :: PandocMonad m => MarkdownParser m (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) - (aligns, widths, heads, lns) <- + tableComponents <- (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|> (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|> (guardEnabled Ext_simple_tables >> @@ -1481,23 +1469,10 @@ table = try $ do caption <- case frontCaption of Nothing -> option (return mempty) tableCaption Just c -> return c - -- renormalize widths if greater than 100%: - let totalWidth = sum widths - let widths' = if totalWidth < 1 - then widths - else map (/ totalWidth) widths - let strictPos w - | w > 0 = ColWidth w - | otherwise = ColWidthDefault return $ do caption' <- caption - heads' <- heads - lns' <- lns - return $ B.table (B.simpleCaption $ B.plain caption') - (zip aligns (strictPos <$> widths')) - (TableHead nullAttr heads') - [TableBody nullAttr 0 [] lns'] - (TableFoot nullAttr []) + (TableComponents _attr _capt colspecs th tb tf) <- tableComponents + return $ B.table (B.simpleCaption $ B.plain caption') colspecs th tb tf -- -- inline @@ -2283,9 +2258,3 @@ doubleQuoted = do fmap B.doubleQuoted . trimInlinesF . mconcat <$> many1Till inline doubleQuoteEnd)) <|> (return (return (B.str "\8220"))) - -toRow :: [Blocks] -> Row -toRow = Row nullAttr . map B.simpleCell - -toHeaderRow :: [Blocks] -> [Row] -toHeaderRow l = [toRow l | not (null l) && not (all null l)] diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index b0101213b..7a406ec4b 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (nested) +import Text.Pandoc.Parsing hiding (nested, tableCaption) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow) diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 276d28aaa..7ce4e593c 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -221,9 +221,9 @@ listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent) table :: PandocMonad m => TWParser m B.Blocks table = try $ do - tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline) + thead <- optionMaybe (unzip <$> many1Till tableParseHeader newline) rows <- many1 tableParseRow - return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead + return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) thead where buildTable caption rows (aligns, heads) = B.table (B.simpleCaption $ B.plain caption)