[API change] Unify grid table parsing (#7971)

Grid table parsing in Markdown and rst are updated use the same
functions. Functions are generalized to meet requirements for both
formats.

This change also lays the ground for further generalizations in table
parsers, including support for advanced table features.

API changes in Text.Pandoc.Parsing:

- Parse results of functions `tableWith'` and `gridTableWith'` are now a
  `mf TableComponents` instead of a quadruple of alignments, column
  widths, header rows and body rows.

Additional exports from Text.Pandoc.Parsing:

- `tableWith'`
- `TableComponents`
- `TableNormalization`
- `toTableComponents`
- `toTableComponents'`
This commit is contained in:
Albert Krewinkel 2022-03-24 19:59:20 +01:00 committed by GitHub
parent 9fa2aeb489
commit b9eeb77df5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 130 additions and 94 deletions

View file

@ -59,9 +59,14 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
charRef,
lineBlockLines,
tableWith,
tableWith',
widthsFromIndices,
gridTableWith,
gridTableWith',
TableComponents (..),
TableNormalization (..),
toTableComponents,
toTableComponents',
readWith,
readWithM,
testStringWith,

View file

@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Parsing.GridTable
@ -14,6 +15,11 @@ module Text.Pandoc.Parsing.GridTable
, tableWith
, tableWith'
, widthsFromIndices
-- * Components of a plain-text table
, TableComponents (..)
, TableNormalization (..)
, toTableComponents
, toTableComponents'
)
where
@ -34,6 +40,65 @@ import Text.Parsec
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
-- | Collection of components making up a Table block.
data TableComponents = TableComponents
{ tableAttr :: Attr
, tableCaption :: Caption
, tableColSpecs :: [ColSpec]
, tableHead :: TableHead
, tableBodies :: [TableBody]
, tableFoot :: TableFoot
}
-- | Creates a table block from the collection of table parts.
tableFromComponents :: TableComponents -> Blocks
tableFromComponents (TableComponents attr capt colspecs th tb tf) =
B.tableWith attr capt colspecs th tb tf
-- | Bundles basic table components into a single value.
toTableComponents :: [Alignment] -> [Double] -> [Blocks] -> [[Blocks]]
-> TableComponents
toTableComponents = toTableComponents' NoNormalization
-- | Bundles basic table components into a single value, performing
-- normalizations as necessary.
toTableComponents' :: TableNormalization
-> [Alignment] -> [Double] -> [Blocks] -> [[Blocks]]
-> TableComponents
toTableComponents' normalization aligns widths heads rows =
let th = TableHead nullAttr (toHeaderRow normalization heads)
tb = TableBody nullAttr 0 [] (map toRow rows)
tf = TableFoot nullAttr []
colspecs = toColSpecs aligns widths
in TableComponents nullAttr B.emptyCaption colspecs th [tb] tf
-- | Combine a list of column alignments and column widths into a list
-- of column specifiers. Both input lists should have the same length.
toColSpecs :: [Alignment] -- ^ column alignments
-> [Double] -- ^ column widths
-> [ColSpec]
toColSpecs aligns widths = zip aligns (map fromWidth widths')
where
fromWidth n
| n > 0 = ColWidth n
| otherwise = ColWidthDefault
-- renormalize widths if greater than 100%:
totalWidth = sum widths
widths' = if totalWidth < 1
then widths
else map (/ totalWidth) widths
-- | Whether the table header should be normalized, i.e., whether an header row
-- with only empty cells should be omitted.
data TableNormalization
= NoNormalization
| NormalizeHeader
--
-- Grid Tables
--
-- | Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows, which may be grid, separated by
-- blank lines, and ending with a footer (dashed line followed by blank
@ -50,11 +115,13 @@ gridTableWith blocks headless =
-- Table.
gridTableWith' :: (Monad m, Monad mf,
HasReaderOptions st, HasLastStrPosition st)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
=> TableNormalization
-> ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
-> ParserT Sources st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
-> ParserT Sources st m (mf TableComponents)
gridTableWith' normalization blocks headless =
tableWith' normalization
(gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> Text -> [Text]
@ -162,44 +229,39 @@ gridTableFooter = optional blanklines
-- 'lineParser', and 'footerParser'.
tableWith :: (Stream s m Char, UpdateSourcePos s Char,
HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
-> ParserT s st m end
=> ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser
-> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser
-> ParserT s st m sep -- ^ line parser
-> ParserT s st m end -- ^ footer parser
-> ParserT s st m (mf Blocks)
tableWith headerParser rowParser lineParser footerParser = try $ do
(aligns, widths, heads, rows) <- tableWith' headerParser rowParser
lineParser footerParser
let th = TableHead nullAttr <$> heads
tb = (:[]) . TableBody nullAttr 0 [] <$> rows
tf = pure $ TableFoot nullAttr []
colspecs = zip aligns (map fromWidth widths)
return $ B.table B.emptyCaption colspecs <$> th <*> tb <*> tf
where
fromWidth n
| n > 0 = ColWidth n
| otherwise = ColWidthDefault
type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
tableWith hp rp lp fp = fmap tableFromComponents <$>
tableWith' NoNormalization hp rp lp fp
tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
-> ParserT s st m end
-> ParserT s st m (TableComponents mf)
tableWith' headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
let toRow = Row nullAttr . map B.simpleCell
toHeaderRow l = [toRow l | not (null l)]
return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines')
=> TableNormalization
-> ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser
-> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser
-> ParserT s st m sep -- ^ line parser
-> ParserT s st m end -- ^ footer parser
-> ParserT s st m (mf TableComponents)
tableWith' n11n headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
return $ toTableComponents' n11n aligns widths <$> heads <*> lines'
toRow :: [Blocks] -> Row
toRow = Row nullAttr . map B.simpleCell
toHeaderRow :: TableNormalization -> [Blocks] -> [Row]
toHeaderRow = \case
NoNormalization -> \l -> [toRow l | not (null l)]
NormalizeHeader -> \l -> [toRow l | not (null l) && not (all null l)]
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal

View file

@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Markdown (
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Bifunctor (second)
import Data.Char (isAlphaNum, isPunctuation, isSpace)
import Text.DocLayout (realLength)
import Data.List (transpose, elemIndex, sortOn, foldl')
@ -44,7 +45,7 @@ import Safe.Foldable (maximumBounded)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Parsing hiding (tableCaption)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
@ -1298,14 +1299,18 @@ tableCaption = do
-- Parse a simple table with '---' header and one line per row.
simpleTable :: PandocMonad m
=> Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row])
-> MarkdownParser m (F TableComponents)
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
tableComponents <-
tableWith' NormalizeHeader
(simpleTableHeader headless) tableLine
(return ())
(if headless then tableFooter else tableFooter <|> blanklines')
-- Simple tables get 0s for relative column widths (i.e., use default)
return (aligns, replicate (length aligns) 0, heads', lines')
-- All columns in simple tables have default widths.
let useDefaultColumnWidths tc =
let cs' = map (second (const ColWidthDefault)) $ tableColSpecs tc
in tc {tableColSpecs = cs'}
return $ useDefaultColumnWidths <$> tableComponents
-- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows,
@ -1313,9 +1318,10 @@ simpleTable headless = do
-- ending with a footer (dashed line followed by blank line).
multilineTable :: PandocMonad m
=> Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row])
-> MarkdownParser m (F TableComponents)
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
tableWith' NormalizeHeader (multilineTableHeader headless)
multilineRow blanklines tableFooter
multilineTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
@ -1355,8 +1361,8 @@ multilineTableHeader headless = try $ do
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
gridTable :: PandocMonad m => Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row])
gridTable headless = gridTableWith' parseBlocks headless
-> MarkdownParser m (F TableComponents)
gridTable headless = gridTableWith' NormalizeHeader parseBlocks headless
pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do
@ -1370,7 +1376,7 @@ pipeBreak = try $ do
blankline
return $ unzip (first:rest)
pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Row], F [Row])
pipeTable :: PandocMonad m => MarkdownParser m (F TableComponents)
pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
@ -1390,7 +1396,8 @@ pipeTable = try $ do
else replicate (length aligns) 0.0
(headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
(rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows)
return $
toTableComponents' NormalizeHeader aligns widths <$> headCells <*> rows
sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
@ -1446,29 +1453,10 @@ scanForPipe = do
(_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
tableWith :: PandocMonad m
=> MarkdownParser m (F [Blocks], [Alignment], [Int])
-> ([Int] -> MarkdownParser m (F [Blocks]))
-> MarkdownParser m sep
-> MarkdownParser m end
-> MarkdownParser m ([Alignment], [Double], F [Row], F [Row])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines')
table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
tableComponents <-
(guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
(guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
(guardEnabled Ext_simple_tables >>
@ -1481,23 +1469,10 @@ table = try $ do
caption <- case frontCaption of
Nothing -> option (return mempty) tableCaption
Just c -> return c
-- renormalize widths if greater than 100%:
let totalWidth = sum widths
let widths' = if totalWidth < 1
then widths
else map (/ totalWidth) widths
let strictPos w
| w > 0 = ColWidth w
| otherwise = ColWidthDefault
return $ do
caption' <- caption
heads' <- heads
lns' <- lns
return $ B.table (B.simpleCaption $ B.plain caption')
(zip aligns (strictPos <$> widths'))
(TableHead nullAttr heads')
[TableBody nullAttr 0 [] lns']
(TableFoot nullAttr [])
(TableComponents _attr _capt colspecs th tb tf) <- tableComponents
return $ B.table (B.simpleCaption $ B.plain caption') colspecs th tb tf
--
-- inline
@ -2283,9 +2258,3 @@ doubleQuoted = do
fmap B.doubleQuoted . trimInlinesF . mconcat <$>
many1Till inline doubleQuoteEnd))
<|> (return (return (B.str "\8220")))
toRow :: [Blocks] -> Row
toRow = Row nullAttr . map B.simpleCell
toHeaderRow :: [Blocks] -> [Row]
toHeaderRow l = [toRow l | not (null l) && not (all null l)]

View file

@ -34,7 +34,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Parsing hiding (nested, tableCaption)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow)

View file

@ -221,9 +221,9 @@ listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
table :: PandocMonad m => TWParser m B.Blocks
table = try $ do
tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline)
thead <- optionMaybe (unzip <$> many1Till tableParseHeader newline)
rows <- many1 tableParseRow
return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) thead
where
buildTable caption rows (aligns, heads)
= B.table (B.simpleCaption $ B.plain caption)