LaTeX reader: fix improper empty cell filtering (#6689)
This commit is contained in:
parent
6ef38e9ab3
commit
a2d343420f
2 changed files with 108 additions and 14 deletions
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
|
@ -2144,6 +2145,8 @@ parseAligns = try $ do
|
|||
toColWidth _ = ColWidthDefault
|
||||
toSpec (x, y, z) = (x, toColWidth y, z)
|
||||
|
||||
-- N.B. this parser returns a Row that may have erroneous empty cells
|
||||
-- in it. See the note above fixTableHead for details.
|
||||
parseTableRow :: PandocMonad m
|
||||
=> Text -- ^ table environment name
|
||||
-> [([Tok], [Tok])] -- ^ pref/suffixes
|
||||
|
@ -2168,9 +2171,7 @@ parseTableRow envname prefsufs = do
|
|||
cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
|
||||
setInput oldInput
|
||||
spaces
|
||||
-- Because of table normalization performed by Text.Pandoc.Builder.table,
|
||||
-- we need to remove empty cells
|
||||
return $ Row nullAttr $ filter (\c -> c /= emptyCell) cells
|
||||
return $ Row nullAttr cells
|
||||
|
||||
parseTableCell :: PandocMonad m => LP m Cell
|
||||
parseTableCell = do
|
||||
|
@ -2246,6 +2247,80 @@ multicolumnCell = controlSeq "multicolumn" >> do
|
|||
parseSimpleCell :: PandocMonad m => LP m Cell
|
||||
parseSimpleCell = simpleCell <$> (plainify <$> blocks)
|
||||
|
||||
-- LaTeX tables are stored with empty cells underneath multirow cells
|
||||
-- denoting the grid spaces taken up by them. More specifically, if a
|
||||
-- cell spans m rows, then it will overwrite all the cells in the
|
||||
-- columns it spans for (m-1) rows underneath it, requiring padding
|
||||
-- cells in these places. These padding cells need to be removed for
|
||||
-- proper table reading. See #6603.
|
||||
--
|
||||
-- These fixTable functions do not otherwise fix up malformed
|
||||
-- input tables: that is left to the table builder.
|
||||
fixTableHead :: TableHead -> TableHead
|
||||
fixTableHead (TableHead attr rows) = TableHead attr rows'
|
||||
where
|
||||
rows' = fixTableRows rows
|
||||
|
||||
fixTableBody :: TableBody -> TableBody
|
||||
fixTableBody (TableBody attr rhc th tb)
|
||||
= TableBody attr rhc th' tb'
|
||||
where
|
||||
th' = fixTableRows th
|
||||
tb' = fixTableRows tb
|
||||
|
||||
fixTableRows :: [Row] -> [Row]
|
||||
fixTableRows = fixTableRows' $ repeat Nothing
|
||||
where
|
||||
fixTableRows' oldHang (Row attr cells : rs)
|
||||
= let (newHang, cells') = fixTableRow oldHang cells
|
||||
rs' = fixTableRows' newHang rs
|
||||
in Row attr cells' : rs'
|
||||
fixTableRows' _ [] = []
|
||||
|
||||
-- The overhang is represented as Just (relative cell dimensions) or
|
||||
-- Nothing for an empty grid space.
|
||||
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
|
||||
fixTableRow oldHang cells
|
||||
-- If there's overhang, drop cells until their total width meets the
|
||||
-- width of the occupied grid spaces (or we run out)
|
||||
| (n, prefHang, restHang) <- splitHang oldHang
|
||||
, n > 0
|
||||
= let cells' = dropToWidth getCellW n cells
|
||||
(restHang', cells'') = fixTableRow restHang cells'
|
||||
in (prefHang restHang', cells'')
|
||||
-- Otherwise record the overhang of a pending cell and fix the rest
|
||||
-- of the row
|
||||
| c@(Cell _ _ h w _):cells' <- cells
|
||||
= let h' = max 1 h
|
||||
w' = max 1 w
|
||||
oldHang' = dropToWidth getHangW w' oldHang
|
||||
(newHang, cells'') = fixTableRow oldHang' cells'
|
||||
in (toHang w' h' <> newHang, c : cells'')
|
||||
| otherwise
|
||||
= (oldHang, [])
|
||||
where
|
||||
getCellW (Cell _ _ _ w _) = w
|
||||
getHangW = maybe 1 fst
|
||||
getCS (ColSpan n) = n
|
||||
|
||||
toHang c r
|
||||
| r > 1 = [Just (c, r)]
|
||||
| otherwise = replicate (getCS c) Nothing
|
||||
|
||||
-- Take the prefix of the overhang list representing filled grid
|
||||
-- spaces. Also return the remainder and the length of this prefix.
|
||||
splitHang = splitHang' 0 id
|
||||
|
||||
splitHang' !n l (Just (c, r):xs)
|
||||
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
|
||||
splitHang' n l xs = (n, l, xs)
|
||||
|
||||
-- Drop list items until the total width of the dropped items
|
||||
-- exceeds the passed width.
|
||||
dropToWidth _ n l | n < 1 = l
|
||||
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
|
||||
dropToWidth _ _ [] = []
|
||||
|
||||
simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
|
||||
simpTable envname hasWidthParameter = try $ do
|
||||
when hasWidthParameter $ () <$ (spaces >> tok)
|
||||
|
@ -2273,11 +2348,10 @@ simpTable envname hasWidthParameter = try $ do
|
|||
optional lbreak
|
||||
spaces
|
||||
lookAhead $ controlSeq "end" -- make sure we're at end
|
||||
return $ table emptyCaption
|
||||
(zip aligns widths)
|
||||
(TableHead nullAttr header')
|
||||
[TableBody nullAttr 0 [] rows]
|
||||
(TableFoot nullAttr [])
|
||||
let th = fixTableHead $ TableHead nullAttr header'
|
||||
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
|
||||
let tf = TableFoot nullAttr []
|
||||
return $ table emptyCaption (zip aligns widths) th tbs tf
|
||||
|
||||
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
|
||||
addTableCaption = walkM go
|
||||
|
|
|
@ -174,20 +174,21 @@ tests = [ testGroup "tokenization"
|
|||
, Row nullAttr [ simpleCell (plain "Two") ]
|
||||
]
|
||||
, "Table with nested multirow/multicolumn item" =:
|
||||
T.unlines [ "\\begin{tabular}{c c c}"
|
||||
, "\\multicolumn{2}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
|
||||
, "& & Three\\\\"
|
||||
, "Four&Five&Six\\\\"
|
||||
T.unlines [ "\\begin{tabular}{c c c c}"
|
||||
, "\\multicolumn{3}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
|
||||
, "\\multicolumn{2}{c}{} & & Three\\\\"
|
||||
, "Four&Five&Six&Seven\\\\"
|
||||
, "\\end{tabular}"
|
||||
] =?>
|
||||
table' [AlignCenter, AlignCenter, AlignCenter]
|
||||
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 2) (plain "One")
|
||||
table' [AlignCenter, AlignCenter, AlignCenter, AlignCenter]
|
||||
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 3) (plain "One")
|
||||
, simpleCell (plain "Two")
|
||||
]
|
||||
, Row nullAttr [ simpleCell (plain "Three") ]
|
||||
, Row nullAttr [ simpleCell (plain "Four")
|
||||
, simpleCell (plain "Five")
|
||||
, simpleCell (plain "Six")
|
||||
, simpleCell (plain "Seven")
|
||||
]
|
||||
]
|
||||
, "Table with multicolumn header" =:
|
||||
|
@ -205,6 +206,25 @@ tests = [ testGroup "tokenization"
|
|||
]
|
||||
]
|
||||
(TableFoot nullAttr [])
|
||||
, "Table with normal empty cells" =:
|
||||
T.unlines [ "\\begin{tabular}{|r|r|r|}"
|
||||
, "A & & B \\\\"
|
||||
, " & C &"
|
||||
, "\\end{tabular}"
|
||||
] =?>
|
||||
table emptyCaption
|
||||
(replicate 3 (AlignRight, ColWidthDefault))
|
||||
(TableHead nullAttr [])
|
||||
[TableBody nullAttr 0 []
|
||||
[Row nullAttr [ simpleCell (plain "A")
|
||||
, emptyCell
|
||||
, simpleCell (plain "B")
|
||||
]
|
||||
,Row nullAttr [ emptyCell
|
||||
, simpleCell (plain "C")
|
||||
, emptyCell
|
||||
]]]
|
||||
(TableFoot nullAttr [])
|
||||
]
|
||||
|
||||
, testGroup "citations"
|
||||
|
|
Loading…
Reference in a new issue