LaTeX reader: fix improper empty cell filtering (#6689)

This commit is contained in:
Christian Despres 2020-09-15 16:36:11 -04:00 committed by GitHub
parent 6ef38e9ab3
commit a2d343420f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 108 additions and 14 deletions

View file

@ -1,3 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
@ -2144,6 +2145,8 @@ parseAligns = try $ do
toColWidth _ = ColWidthDefault
toSpec (x, y, z) = (x, toColWidth y, z)
-- N.B. this parser returns a Row that may have erroneous empty cells
-- in it. See the note above fixTableHead for details.
parseTableRow :: PandocMonad m
=> Text -- ^ table environment name
-> [([Tok], [Tok])] -- ^ pref/suffixes
@ -2168,9 +2171,7 @@ parseTableRow envname prefsufs = do
cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
setInput oldInput
spaces
-- Because of table normalization performed by Text.Pandoc.Builder.table,
-- we need to remove empty cells
return $ Row nullAttr $ filter (\c -> c /= emptyCell) cells
return $ Row nullAttr cells
parseTableCell :: PandocMonad m => LP m Cell
parseTableCell = do
@ -2246,6 +2247,80 @@ multicolumnCell = controlSeq "multicolumn" >> do
parseSimpleCell :: PandocMonad m => LP m Cell
parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-- LaTeX tables are stored with empty cells underneath multirow cells
-- denoting the grid spaces taken up by them. More specifically, if a
-- cell spans m rows, then it will overwrite all the cells in the
-- columns it spans for (m-1) rows underneath it, requiring padding
-- cells in these places. These padding cells need to be removed for
-- proper table reading. See #6603.
--
-- These fixTable functions do not otherwise fix up malformed
-- input tables: that is left to the table builder.
fixTableHead :: TableHead -> TableHead
fixTableHead (TableHead attr rows) = TableHead attr rows'
where
rows' = fixTableRows rows
fixTableBody :: TableBody -> TableBody
fixTableBody (TableBody attr rhc th tb)
= TableBody attr rhc th' tb'
where
th' = fixTableRows th
tb' = fixTableRows tb
fixTableRows :: [Row] -> [Row]
fixTableRows = fixTableRows' $ repeat Nothing
where
fixTableRows' oldHang (Row attr cells : rs)
= let (newHang, cells') = fixTableRow oldHang cells
rs' = fixTableRows' newHang rs
in Row attr cells' : rs'
fixTableRows' _ [] = []
-- The overhang is represented as Just (relative cell dimensions) or
-- Nothing for an empty grid space.
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
fixTableRow oldHang cells
-- If there's overhang, drop cells until their total width meets the
-- width of the occupied grid spaces (or we run out)
| (n, prefHang, restHang) <- splitHang oldHang
, n > 0
= let cells' = dropToWidth getCellW n cells
(restHang', cells'') = fixTableRow restHang cells'
in (prefHang restHang', cells'')
-- Otherwise record the overhang of a pending cell and fix the rest
-- of the row
| c@(Cell _ _ h w _):cells' <- cells
= let h' = max 1 h
w' = max 1 w
oldHang' = dropToWidth getHangW w' oldHang
(newHang, cells'') = fixTableRow oldHang' cells'
in (toHang w' h' <> newHang, c : cells'')
| otherwise
= (oldHang, [])
where
getCellW (Cell _ _ _ w _) = w
getHangW = maybe 1 fst
getCS (ColSpan n) = n
toHang c r
| r > 1 = [Just (c, r)]
| otherwise = replicate (getCS c) Nothing
-- Take the prefix of the overhang list representing filled grid
-- spaces. Also return the remainder and the length of this prefix.
splitHang = splitHang' 0 id
splitHang' !n l (Just (c, r):xs)
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
splitHang' n l xs = (n, l, xs)
-- Drop list items until the total width of the dropped items
-- exceeds the passed width.
dropToWidth _ n l | n < 1 = l
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
dropToWidth _ _ [] = []
simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
simpTable envname hasWidthParameter = try $ do
when hasWidthParameter $ () <$ (spaces >> tok)
@ -2273,11 +2348,10 @@ simpTable envname hasWidthParameter = try $ do
optional lbreak
spaces
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table emptyCaption
(zip aligns widths)
(TableHead nullAttr header')
[TableBody nullAttr 0 [] rows]
(TableFoot nullAttr [])
let th = fixTableHead $ TableHead nullAttr header'
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
let tf = TableFoot nullAttr []
return $ table emptyCaption (zip aligns widths) th tbs tf
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go

View file

@ -174,20 +174,21 @@ tests = [ testGroup "tokenization"
, Row nullAttr [ simpleCell (plain "Two") ]
]
, "Table with nested multirow/multicolumn item" =:
T.unlines [ "\\begin{tabular}{c c c}"
, "\\multicolumn{2}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
, "& & Three\\\\"
, "Four&Five&Six\\\\"
T.unlines [ "\\begin{tabular}{c c c c}"
, "\\multicolumn{3}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
, "\\multicolumn{2}{c}{} & & Three\\\\"
, "Four&Five&Six&Seven\\\\"
, "\\end{tabular}"
] =?>
table' [AlignCenter, AlignCenter, AlignCenter]
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 2) (plain "One")
table' [AlignCenter, AlignCenter, AlignCenter, AlignCenter]
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 3) (plain "One")
, simpleCell (plain "Two")
]
, Row nullAttr [ simpleCell (plain "Three") ]
, Row nullAttr [ simpleCell (plain "Four")
, simpleCell (plain "Five")
, simpleCell (plain "Six")
, simpleCell (plain "Seven")
]
]
, "Table with multicolumn header" =:
@ -205,6 +206,25 @@ tests = [ testGroup "tokenization"
]
]
(TableFoot nullAttr [])
, "Table with normal empty cells" =:
T.unlines [ "\\begin{tabular}{|r|r|r|}"
, "A & & B \\\\"
, " & C &"
, "\\end{tabular}"
] =?>
table emptyCaption
(replicate 3 (AlignRight, ColWidthDefault))
(TableHead nullAttr [])
[TableBody nullAttr 0 []
[Row nullAttr [ simpleCell (plain "A")
, emptyCell
, simpleCell (plain "B")
]
,Row nullAttr [ emptyCell
, simpleCell (plain "C")
, emptyCell
]]]
(TableFoot nullAttr [])
]
, testGroup "citations"