[API change] Unify grid table parsing (#7971)

Grid table parsing in Markdown and rst are updated use the same
functions. Functions are generalized to meet requirements for both
formats.

This change also lays the ground for further generalizations in table
parsers, including support for advanced table features.

API changes in Text.Pandoc.Parsing:

- Parse results of functions `tableWith'` and `gridTableWith'` are now a
  `mf TableComponents` instead of a quadruple of alignments, column
  widths, header rows and body rows.

Additional exports from Text.Pandoc.Parsing:

- `tableWith'`
- `TableComponents`
- `TableNormalization`
- `toTableComponents`
- `toTableComponents'`
This commit is contained in:
Albert Krewinkel 2022-03-24 19:59:20 +01:00 committed by GitHub
parent 9fa2aeb489
commit b9eeb77df5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 130 additions and 94 deletions

View file

@ -59,9 +59,14 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
charRef, charRef,
lineBlockLines, lineBlockLines,
tableWith, tableWith,
tableWith',
widthsFromIndices, widthsFromIndices,
gridTableWith, gridTableWith,
gridTableWith', gridTableWith',
TableComponents (..),
TableNormalization (..),
toTableComponents,
toTableComponents',
readWith, readWith,
readWithM, readWithM,
testStringWith, testStringWith,

View file

@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{- | {- |
Module : Text.Pandoc.Parsing.GridTable Module : Text.Pandoc.Parsing.GridTable
@ -14,6 +15,11 @@ module Text.Pandoc.Parsing.GridTable
, tableWith , tableWith
, tableWith' , tableWith'
, widthsFromIndices , widthsFromIndices
-- * Components of a plain-text table
, TableComponents (..)
, TableNormalization (..)
, toTableComponents
, toTableComponents'
) )
where where
@ -34,6 +40,65 @@ import Text.Parsec
import qualified Data.Text as T import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Builder as B
-- | Collection of components making up a Table block.
data TableComponents = TableComponents
{ tableAttr :: Attr
, tableCaption :: Caption
, tableColSpecs :: [ColSpec]
, tableHead :: TableHead
, tableBodies :: [TableBody]
, tableFoot :: TableFoot
}
-- | Creates a table block from the collection of table parts.
tableFromComponents :: TableComponents -> Blocks
tableFromComponents (TableComponents attr capt colspecs th tb tf) =
B.tableWith attr capt colspecs th tb tf
-- | Bundles basic table components into a single value.
toTableComponents :: [Alignment] -> [Double] -> [Blocks] -> [[Blocks]]
-> TableComponents
toTableComponents = toTableComponents' NoNormalization
-- | Bundles basic table components into a single value, performing
-- normalizations as necessary.
toTableComponents' :: TableNormalization
-> [Alignment] -> [Double] -> [Blocks] -> [[Blocks]]
-> TableComponents
toTableComponents' normalization aligns widths heads rows =
let th = TableHead nullAttr (toHeaderRow normalization heads)
tb = TableBody nullAttr 0 [] (map toRow rows)
tf = TableFoot nullAttr []
colspecs = toColSpecs aligns widths
in TableComponents nullAttr B.emptyCaption colspecs th [tb] tf
-- | Combine a list of column alignments and column widths into a list
-- of column specifiers. Both input lists should have the same length.
toColSpecs :: [Alignment] -- ^ column alignments
-> [Double] -- ^ column widths
-> [ColSpec]
toColSpecs aligns widths = zip aligns (map fromWidth widths')
where
fromWidth n
| n > 0 = ColWidth n
| otherwise = ColWidthDefault
-- renormalize widths if greater than 100%:
totalWidth = sum widths
widths' = if totalWidth < 1
then widths
else map (/ totalWidth) widths
-- | Whether the table header should be normalized, i.e., whether an header row
-- with only empty cells should be omitted.
data TableNormalization
= NoNormalization
| NormalizeHeader
--
-- Grid Tables
--
-- | Parse a grid table: starts with row of '-' on top, then header -- | Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows, which may be grid, separated by -- (which may be grid), then the rows, which may be grid, separated by
-- blank lines, and ending with a footer (dashed line followed by blank -- blank lines, and ending with a footer (dashed line followed by blank
@ -50,11 +115,13 @@ gridTableWith blocks headless =
-- Table. -- Table.
gridTableWith' :: (Monad m, Monad mf, gridTableWith' :: (Monad m, Monad mf,
HasReaderOptions st, HasLastStrPosition st) HasReaderOptions st, HasLastStrPosition st)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser => TableNormalization
-> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table -> Bool -- ^ Headerless table
-> ParserT Sources st m (TableComponents mf) -> ParserT Sources st m (mf TableComponents)
gridTableWith' blocks headless = gridTableWith' normalization blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) tableWith' normalization
(gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter (gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> Text -> [Text] gridTableSplitLine :: [Int] -> Text -> [Text]
@ -162,44 +229,39 @@ gridTableFooter = optional blanklines
-- 'lineParser', and 'footerParser'. -- 'lineParser', and 'footerParser'.
tableWith :: (Stream s m Char, UpdateSourcePos s Char, tableWith :: (Stream s m Char, UpdateSourcePos s Char,
HasReaderOptions st, Monad mf) HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int]) => ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser
-> ([Int] -> ParserT s st m (mf [Blocks])) -> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser
-> ParserT s st m sep -> ParserT s st m sep -- ^ line parser
-> ParserT s st m end -> ParserT s st m end -- ^ footer parser
-> ParserT s st m (mf Blocks) -> ParserT s st m (mf Blocks)
tableWith headerParser rowParser lineParser footerParser = try $ do tableWith hp rp lp fp = fmap tableFromComponents <$>
(aligns, widths, heads, rows) <- tableWith' headerParser rowParser tableWith' NoNormalization hp rp lp fp
lineParser footerParser
let th = TableHead nullAttr <$> heads
tb = (:[]) . TableBody nullAttr 0 [] <$> rows
tf = pure $ TableFoot nullAttr []
colspecs = zip aligns (map fromWidth widths)
return $ B.table B.emptyCaption colspecs <$> th <*> tb <*> tf
where
fromWidth n
| n > 0 = ColWidth n
| otherwise = ColWidthDefault
type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
tableWith' :: (Stream s m Char, UpdateSourcePos s Char, tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
HasReaderOptions st, Monad mf) HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int]) => TableNormalization
-> ([Int] -> ParserT s st m (mf [Blocks])) -> ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser
-> ParserT s st m sep -> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser
-> ParserT s st m end -> ParserT s st m sep -- ^ line parser
-> ParserT s st m (TableComponents mf) -> ParserT s st m end -- ^ footer parser
tableWith' headerParser rowParser lineParser footerParser = try $ do -> ParserT s st m (mf TableComponents)
(heads, aligns, indices) <- headerParser tableWith' n11n headerParser rowParser lineParser footerParser = try $ do
lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser (heads, aligns, indices) <- headerParser
footerParser lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
numColumns <- getOption readerColumns footerParser
let widths = if null indices numColumns <- getOption readerColumns
then replicate (length aligns) 0.0 let widths = if null indices
else widthsFromIndices numColumns indices then replicate (length aligns) 0.0
let toRow = Row nullAttr . map B.simpleCell else widthsFromIndices numColumns indices
toHeaderRow l = [toRow l | not (null l)] return $ toTableComponents' n11n aligns widths <$> heads <*> lines'
return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines')
toRow :: [Blocks] -> Row
toRow = Row nullAttr . map B.simpleCell
toHeaderRow :: TableNormalization -> [Blocks] -> [Row]
toHeaderRow = \case
NoNormalization -> \l -> [toRow l | not (null l)]
NormalizeHeader -> \l -> [toRow l | not (null l) && not (all null l)]
-- Calculate relative widths of table columns, based on indices -- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal widthsFromIndices :: Int -- Number of columns on terminal

View file

@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Markdown (
import Control.Monad import Control.Monad
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError)
import Data.Bifunctor (second)
import Data.Char (isAlphaNum, isPunctuation, isSpace) import Data.Char (isAlphaNum, isPunctuation, isSpace)
import Text.DocLayout (realLength) import Text.DocLayout (realLength)
import Data.List (transpose, elemIndex, sortOn, foldl') import Data.List (transpose, elemIndex, sortOn, foldl')
@ -44,7 +45,7 @@ import Safe.Foldable (maximumBounded)
import Text.Pandoc.Logging import Text.Pandoc.Logging
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Walk (walk) import Text.Pandoc.Walk (walk)
import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Parsing hiding (tableCaption)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag) isCommentTag, isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
@ -1298,14 +1299,18 @@ tableCaption = do
-- Parse a simple table with '---' header and one line per row. -- Parse a simple table with '---' header and one line per row.
simpleTable :: PandocMonad m simpleTable :: PandocMonad m
=> Bool -- ^ Headerless table => Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) -> MarkdownParser m (F TableComponents)
simpleTable headless = do simpleTable headless = do
(aligns, _widths, heads', lines') <- tableComponents <-
tableWith (simpleTableHeader headless) tableLine tableWith' NormalizeHeader
(simpleTableHeader headless) tableLine
(return ()) (return ())
(if headless then tableFooter else tableFooter <|> blanklines') (if headless then tableFooter else tableFooter <|> blanklines')
-- Simple tables get 0s for relative column widths (i.e., use default) -- All columns in simple tables have default widths.
return (aligns, replicate (length aligns) 0, heads', lines') let useDefaultColumnWidths tc =
let cs' = map (second (const ColWidthDefault)) $ tableColSpecs tc
in tc {tableColSpecs = cs'}
return $ useDefaultColumnWidths <$> tableComponents
-- Parse a multiline table: starts with row of '-' on top, then header -- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows, -- (which may be multiline), then the rows,
@ -1313,9 +1318,10 @@ simpleTable headless = do
-- ending with a footer (dashed line followed by blank line). -- ending with a footer (dashed line followed by blank line).
multilineTable :: PandocMonad m multilineTable :: PandocMonad m
=> Bool -- ^ Headerless table => Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) -> MarkdownParser m (F TableComponents)
multilineTable headless = multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableWith' NormalizeHeader (multilineTableHeader headless)
multilineRow blanklines tableFooter
multilineTableHeader :: PandocMonad m multilineTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table => Bool -- ^ Headerless table
@ -1355,8 +1361,8 @@ multilineTableHeader headless = try $ do
-- which may be grid, separated by blank lines, and -- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line). -- ending with a footer (dashed line followed by blank line).
gridTable :: PandocMonad m => Bool -- ^ Headerless table gridTable :: PandocMonad m => Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) -> MarkdownParser m (F TableComponents)
gridTable headless = gridTableWith' parseBlocks headless gridTable headless = gridTableWith' NormalizeHeader parseBlocks headless
pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do pipeBreak = try $ do
@ -1370,7 +1376,7 @@ pipeBreak = try $ do
blankline blankline
return $ unzip (first:rest) return $ unzip (first:rest)
pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Row], F [Row]) pipeTable :: PandocMonad m => MarkdownParser m (F TableComponents)
pipeTable = try $ do pipeTable = try $ do
nonindentSpaces nonindentSpaces
lookAhead nonspaceChar lookAhead nonspaceChar
@ -1390,7 +1396,8 @@ pipeTable = try $ do
else replicate (length aligns) 0.0 else replicate (length aligns) 0.0
(headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads' (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
(rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines'' (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows) return $
toTableComponents' NormalizeHeader aligns widths <$> headCells <*> rows
sepPipe :: PandocMonad m => MarkdownParser m () sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do sepPipe = try $ do
@ -1446,29 +1453,10 @@ scanForPipe = do
(_, T.uncons -> Just ('|', _)) -> return () (_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero _ -> mzero
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
tableWith :: PandocMonad m
=> MarkdownParser m (F [Blocks], [Alignment], [Int])
-> ([Int] -> MarkdownParser m (F [Blocks]))
-> MarkdownParser m sep
-> MarkdownParser m end
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines')
table :: PandocMonad m => MarkdownParser m (F Blocks) table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption) frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <- tableComponents <-
(guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|> (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
(guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|> (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
(guardEnabled Ext_simple_tables >> (guardEnabled Ext_simple_tables >>
@ -1481,23 +1469,10 @@ table = try $ do
caption <- case frontCaption of caption <- case frontCaption of
Nothing -> option (return mempty) tableCaption Nothing -> option (return mempty) tableCaption
Just c -> return c Just c -> return c
-- renormalize widths if greater than 100%:
let totalWidth = sum widths
let widths' = if totalWidth < 1
then widths
else map (/ totalWidth) widths
let strictPos w
| w > 0 = ColWidth w
| otherwise = ColWidthDefault
return $ do return $ do
caption' <- caption caption' <- caption
heads' <- heads (TableComponents _attr _capt colspecs th tb tf) <- tableComponents
lns' <- lns return $ B.table (B.simpleCaption $ B.plain caption') colspecs th tb tf
return $ B.table (B.simpleCaption $ B.plain caption')
(zip aligns (strictPos <$> widths'))
(TableHead nullAttr heads')
[TableBody nullAttr 0 [] lns']
(TableFoot nullAttr [])
-- --
-- inline -- inline
@ -2283,9 +2258,3 @@ doubleQuoted = do
fmap B.doubleQuoted . trimInlinesF . mconcat <$> fmap B.doubleQuoted . trimInlinesF . mconcat <$>
many1Till inline doubleQuoteEnd)) many1Till inline doubleQuoteEnd))
<|> (return (return (B.str "\8220"))) <|> (return (return (B.str "\8220")))
toRow :: [Blocks] -> Row
toRow = Row nullAttr . map B.simpleCell
toHeaderRow :: [Blocks] -> [Row]
toHeaderRow l = [toRow l | not (null l) && not (all null l)]

View file

@ -34,7 +34,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Logging import Text.Pandoc.Logging
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Parsing hiding (nested, tableCaption)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow) trim, splitTextBy, tshow)

View file

@ -221,9 +221,9 @@ listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
table :: PandocMonad m => TWParser m B.Blocks table :: PandocMonad m => TWParser m B.Blocks
table = try $ do table = try $ do
tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline) thead <- optionMaybe (unzip <$> many1Till tableParseHeader newline)
rows <- many1 tableParseRow rows <- many1 tableParseRow
return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) thead
where where
buildTable caption rows (aligns, heads) buildTable caption rows (aligns, heads)
= B.table (B.simpleCaption $ B.plain caption) = B.table (B.simpleCaption $ B.plain caption)