LaTeX writer: support colspans and rowspans in tables. (#6950)

Note that the multirow package is needed for rowspans.
It is included in the latex template under a variable,
so that it won't be used unless needed for a table.
This commit is contained in:
Albert Krewinkel 2020-12-21 03:04:54 +01:00 committed by GitHub
parent 7e98562c04
commit 8f402beab9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 324 additions and 95 deletions

View file

@ -255,6 +255,9 @@ $highlighting-macros$
$endif$
$if(tables)$
\usepackage{longtable,booktabs,array}
$if(multirow)$
\usepackage{multirow}
$endif$
\usepackage{calc} % for calculating minipage widths
$if(beamer)$
\usepackage{caption}

View file

@ -299,6 +299,7 @@ extra-source-files:
test/tables.xwiki
test/tables/*.html4
test/tables/*.html5
test/tables/*.latex
test/tables/*.native
test/tables/*.jats_archiving
test/testsuite.txt

View file

@ -1,8 +1,12 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE UndecidableInstances #-}
{- |
Module : Text.Pandoc.Writers.AnnotatedTable
@ -45,6 +49,7 @@ import Data.Generics ( Data
import Data.List.NonEmpty ( NonEmpty(..) )
import GHC.Generics ( Generic )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Walk ( Walkable (..) )
-- | An annotated table type, corresponding to the Pandoc 'B.Table'
-- constructor and the HTML @\<table\>@ element. It records the data
@ -298,3 +303,21 @@ fromBodyRow (BodyRow attr _ rh rb) =
fromCell :: Cell -> B.Cell
fromCell (Cell _ _ c) = c
--
-- Instances
--
instance Walkable a B.Cell => Walkable a Cell where
walkM f (Cell colspecs colnum cell) =
Cell colspecs colnum <$> walkM f cell
query f (Cell _colspecs _colnum cell) = query f cell
instance Walkable a B.Cell => Walkable a HeaderRow where
walkM f (HeaderRow attr rownum cells) =
HeaderRow attr rownum <$> walkM f cells
query f (HeaderRow _attr _rownum cells) = query f cells
instance Walkable a B.Cell => Walkable a TableHead where
walkM f (TableHead attr rows) =
TableHead attr <$> walkM f rows
query f (TableHead _attr rows) = query f rows

View file

@ -48,6 +48,7 @@ import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
import Text.Pandoc.Writers.Shared
import Text.Printf (printf)
import qualified Data.Text.Normalize as Normalize
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
-- | Convert Pandoc to LaTeX.
writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@ -154,6 +155,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
defField "documentclass" documentClass $
defField "verbatim-in-note" (stVerbInNote st) $
defField "tables" (stTable st) $
defField "multirow" (stMultiRow st) $
defField "strikeout" (stStrikeout st) $
defField "url" (stUrl st) $
defField "numbersections" (writerNumberSections options) $
@ -716,9 +718,9 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
hdr <- sectionHeader classes id' level lst
modify $ \s -> s{stInHeading = False}
return hdr
blockToLaTeX (Table _ blkCapt specs thead tbodies tfoot) =
blockToLaTeX (Table attr blkCapt specs thead tbodies tfoot) =
tableToLaTeX inlineListToLaTeX blockListToLaTeX
blkCapt specs thead tbodies tfoot
(Ann.toTable attr blkCapt specs thead tbodies tfoot)
blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text)
blockListToLaTeX lst =

View file

@ -16,6 +16,7 @@ module Text.Pandoc.Writers.LaTeX.Table
) where
import Control.Monad.State.Strict
import Data.List (intersperse)
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Class.PandocMonad (PandocMonad)
@ -23,102 +24,196 @@ import Text.Pandoc.Definition
import Text.DocLayout
( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest
, text, vcat, ($$) )
import Text.Pandoc.Shared (splitBy)
import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Writers.Shared (toLegacyTable)
import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
import Text.Pandoc.Writers.LaTeX.Types
( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stNotes, stTable) )
( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stMultiRow
, stNotes, stTable) )
import Text.Printf (printf)
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
tableToLaTeX :: PandocMonad m
=> ([Inline] -> LW m (Doc Text))
-> ([Block] -> LW m (Doc Text))
-> Caption -> [ColSpec] -> TableHead -> [TableBody] -> TableFoot
-> Ann.Table
-> LW m (Doc Text)
tableToLaTeX inlnsToLaTeX blksToLaTeX blkCapt specs thead tbody tfoot = do
let (caption, aligns, widths, heads, rows) =
toLegacyTable blkCapt specs thead tbody tfoot
-- simple tables have to have simple cells:
let isSimple = \case
[Plain _] -> True
[Para _] -> True
[] -> True
_ -> False
let widths' = if all (== 0) widths && not (all (all isSimple) rows)
then replicate (length aligns)
(1 / fromIntegral (length aligns))
else widths
(captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption
let toHeaders hs = do contents <- tableRowToLaTeX blksToLaTeX True aligns hs
return ("\\toprule" $$ contents $$ "\\midrule")
tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do
let (Ann.Table _attr caption _specs thead tbodies tfoot) = tbl
CaptionDocs capt captNotes <- captionToLaTeX inlnsToLaTeX caption
let removeNote (Note _) = Span ("", [], []) []
removeNote x = x
firsthead <- if isEmpty captionText || all null heads
then return empty
else ($$ text "\\endfirsthead") <$> toHeaders heads
head' <- if all null heads
then return "\\toprule"
-- avoid duplicate notes in head and firsthead:
else toHeaders (if isEmpty firsthead
then heads
else walk removeNote heads)
let capt = if isEmpty captionText
then empty
else "\\caption" <> captForLof <> braces captionText
<> "\\tabularnewline"
rows' <- mapM (tableRowToLaTeX blksToLaTeX False aligns) rows
let colDescriptors =
(if all (== 0) widths'
then hcat . map literal
else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $
zipWith (toColDescriptor (length widths')) aligns widths'
firsthead <- if isEmpty capt || isEmptyHead thead
then return empty
else ($$ text "\\endfirsthead") <$>
headToLaTeX blksToLaTeX thead
head' <- if isEmptyHead thead
then return "\\toprule"
-- avoid duplicate notes in head and firsthead:
else headToLaTeX blksToLaTeX
(if isEmpty firsthead
then thead
else walk removeNote thead)
rows' <- mapM (rowToLaTeX blksToLaTeX BodyCell) $
mconcat (map bodyRows tbodies) <> footRows tfoot
modify $ \s -> s{ stTable = True }
notes <- notesToLaTeX <$> gets stNotes
return $ "\\begin{longtable}[]" <>
braces ("@{}" <> colDescriptors <> "@{}")
-- the @{} removes extra space at beginning and end
$$ capt
$$ firsthead
$$ head'
$$ "\\endhead"
$$ vcat rows'
$$ "\\bottomrule"
$$ "\\end{longtable}"
$$ captNotes
$$ notes
return
$ "\\begin{longtable}[]" <>
braces ("@{}" <> colDescriptors tbl <> "@{}")
-- the @{} removes extra space at beginning and end
$$ capt
$$ firsthead
$$ head'
$$ "\\endhead"
$$ vcat rows'
$$ "\\bottomrule"
$$ "\\end{longtable}"
$$ captNotes
$$ notes
toColDescriptor :: Int -> Alignment -> Double -> Text
toColDescriptor _numcols align 0 =
case align of
AlignLeft -> "l"
AlignRight -> "r"
AlignCenter -> "c"
AlignDefault -> "l"
toColDescriptor numcols align width =
T.pack $ printf
">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
align'
((numcols - 1) * 2)
width
where
align' :: String
align' = case align of
AlignLeft -> "\\raggedright"
AlignRight -> "\\raggedleft"
AlignCenter -> "\\centering"
AlignDefault -> "\\raggedright"
-- | Creates column descriptors for the table.
colDescriptors :: Ann.Table -> Doc Text
colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) =
let (aligns, widths) = unzip specs
tableRowToLaTeX :: PandocMonad m
=> ([Block] -> LW m (Doc Text))
-> Bool
-> [Alignment]
-> [[Block]]
-> LW m (Doc Text)
tableRowToLaTeX blockListToLaTeX header aligns cols = do
cells <- mapM (tableCellToLaTeX blockListToLaTeX header) $ zip aligns cols
return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace"
defaultWidthsOnly = all (== ColWidthDefault) widths
isSimpleTable = all (all isSimpleCell) $ mconcat
[ headRows thead
, concatMap bodyRows tbodies
, footRows tfoot
]
relativeWidths = if defaultWidthsOnly
then replicate (length specs)
(1 / fromIntegral (length specs))
else map toRelWidth widths
in if defaultWidthsOnly && isSimpleTable
then hcat $ map (literal . colAlign) aligns
else (cr <>) . nest 2 . vcat . map literal $
zipWith (toColDescriptor (length specs))
aligns
relativeWidths
where
toColDescriptor :: Int -> Alignment -> Double -> Text
toColDescriptor numcols align width =
T.pack $ printf
">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
(T.unpack (alignCommand align))
((numcols - 1) * 2)
width
isSimpleCell (Ann.Cell _ _ (Cell _attr _align _rowspan _colspan blocks)) =
case blocks of
[Para _] -> True
[Plain _] -> True
[] -> True
_ -> False
toRelWidth ColWidthDefault = 0
toRelWidth (ColWidth w) = w
alignCommand :: Alignment -> Text
alignCommand = \case
AlignLeft -> "\\raggedright"
AlignRight -> "\\raggedleft"
AlignCenter -> "\\centering"
AlignDefault -> "\\raggedright"
colAlign :: Alignment -> Text
colAlign = \case
AlignLeft -> "l"
AlignRight -> "r"
AlignCenter -> "c"
AlignDefault -> "l"
data CaptionDocs =
CaptionDocs
{ captionCommand :: Doc Text
, captionNotes :: Doc Text
}
captionToLaTeX :: PandocMonad m
=> ([Inline] -> LW m (Doc Text))
-> Caption
-> LW m CaptionDocs
captionToLaTeX inlnsToLaTeX (Caption _maybeShort longCaption) = do
let caption = blocksToInlines longCaption
(captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption
return $ CaptionDocs
{ captionNotes = captNotes
, captionCommand = if isEmpty captionText
then empty
else "\\caption" <> captForLof <>
braces captionText <> "\\tabularnewline"
}
type BlocksWriter m = [Block] -> LW m (Doc Text)
headToLaTeX :: PandocMonad m
=> BlocksWriter m
-> Ann.TableHead
-> LW m (Doc Text)
headToLaTeX blocksWriter (Ann.TableHead _attr headerRows) = do
rowsContents <- mapM (rowToLaTeX blocksWriter HeaderCell . headerRowCells)
headerRows
return ("\\toprule" $$ vcat rowsContents $$ "\\midrule")
-- | Converts a row of table cells into a LaTeX row.
rowToLaTeX :: PandocMonad m
=> BlocksWriter m
-> CellType
-> [Ann.Cell]
-> LW m (Doc Text)
rowToLaTeX blocksWriter celltype row = do
cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row)
return $ hsep (intersperse "&" cellsDocs) <> " \\\\ \\addlinespace"
-- | Pads row with empty cells to adjust for rowspans above this row.
fillRow :: [Ann.Cell] -> [Ann.Cell]
fillRow = go 0
where
go _ [] = []
go n (acell@(Ann.Cell _spec (Ann.ColNumber colnum) cell):cells) =
let (Cell _ _ _ (ColSpan colspan) _) = cell
in map mkEmptyCell [n .. colnum - 1] ++
acell : go (colnum + colspan) cells
mkEmptyCell :: Int -> Ann.Cell
mkEmptyCell colnum =
Ann.Cell ((AlignDefault, ColWidthDefault):|[])
(Ann.ColNumber colnum)
B.emptyCell
isEmptyHead :: Ann.TableHead -> Bool
isEmptyHead (Ann.TableHead _attr []) = True
isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows
-- | Gets all cells in a header row.
headerRowCells :: Ann.HeaderRow -> [Ann.Cell]
headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells
-- | Gets all cells in a body row.
bodyRowCells :: Ann.BodyRow -> [Ann.Cell]
bodyRowCells (Ann.BodyRow _attr _rownum rowhead cells) = rowhead <> cells
-- | Gets a list of rows of the table body, where a row is a simple
-- list of cells.
bodyRows :: Ann.TableBody -> [[Ann.Cell]]
bodyRows (Ann.TableBody _attr _rowheads headerRows rows) =
map headerRowCells headerRows <> map bodyRowCells rows
-- | Gets a list of rows of the table head, where a row is a simple
-- list of cells.
headRows :: Ann.TableHead -> [[Ann.Cell]]
headRows (Ann.TableHead _attr rows) = map headerRowCells rows
-- | Gets a list of rows from the foot, where a row is a simple list
-- of cells.
footRows :: Ann.TableFoot -> [[Ann.Cell]]
footRows (Ann.TableFoot _attr rows) = map headerRowCells rows
-- For simple latex tables (without minipages or parboxes),
-- we need to go to some lengths to get line breaks working:
@ -144,11 +239,14 @@ displayMathToInline :: Inline -> Inline
displayMathToInline (Math DisplayMath x) = Math InlineMath x
displayMathToInline x = x
tableCellToLaTeX :: PandocMonad m
=> ([Block] -> LW m (Doc Text))
-> Bool -> (Alignment, [Block])
-> LW m (Doc Text)
tableCellToLaTeX blockListToLaTeX header (align, blocks) = do
cellToLaTeX :: PandocMonad m
=> BlocksWriter m
-> CellType
-> Ann.Cell
-> LW m (Doc Text)
cellToLaTeX blockListToLaTeX celltype annotatedCell = do
let (Ann.Cell _specs _colnum cell) = annotatedCell
let (Cell _attr align rowspan colspan blocks) = cell
beamer <- gets stBeamer
externalNotes <- gets stExternalNotes
inMinipage <- gets stInMinipage
@ -167,15 +265,30 @@ tableCellToLaTeX blockListToLaTeX header (align, blocks) = do
modify $ \st -> st{ stInMinipage = True }
cellContents <- blockListToLaTeX blocks
modify $ \st -> st{ stInMinipage = inMinipage }
let valign = text $ if header then "[b]" else "[t]"
let halign = case align of
AlignLeft -> "\\raggedright"
AlignRight -> "\\raggedleft"
AlignCenter -> "\\centering"
AlignDefault -> "\\raggedright"
let valign = text $ case celltype of
HeaderCell -> "[b]"
BodyCell -> "[t]"
let halign = literal $ alignCommand align
return $ "\\begin{minipage}" <> valign <>
braces "\\linewidth" <> halign <> cr <>
cellContents <> cr <>
"\\end{minipage}"
modify $ \st -> st{ stExternalNotes = externalNotes }
return result
when (rowspan /= RowSpan 1) $
modify (\st -> st{ stMultiRow = True })
let inMultiColumn x = case colspan of
(ColSpan 1) -> x
(ColSpan n) -> "\\multicolumn"
<> braces (literal (tshow n))
<> braces (literal $ colAlign align)
<> braces x
let inMultiRow x = case rowspan of
(RowSpan 1) -> x
(RowSpan n) -> let nrows = literal (tshow n)
in "\\multirow" <> braces nrows
<> braces "*" <> braces x
return . inMultiColumn . inMultiRow $ result
data CellType
= HeaderCell
| BodyCell

View file

@ -31,6 +31,7 @@ data WriterState =
-- be parameter
, stVerbInNote :: Bool -- ^ true if document has verbatim text in note
, stTable :: Bool -- ^ true if document has a table
, stMultiRow :: Bool -- ^ true if document has multirow cells
, stStrikeout :: Bool -- ^ true if document has strikeout
, stUrl :: Bool -- ^ true if document has visible URL link
, stGraphics :: Bool -- ^ true if document contains images
@ -61,6 +62,7 @@ startingState options =
, stOptions = options
, stVerbInNote = False
, stTable = False
, stMultiRow = False
, stStrikeout = False
, stUrl = False
, stGraphics = False

View file

@ -58,7 +58,7 @@ tests pandocPath =
]
, testGroup "latex"
[ testGroup "writer"
(writerTests' "latex" ++ lhsWriterTests' "latex")
(extWriterTests' "latex" ++ lhsWriterTests' "latex")
, testGroup "reader"
[ test' "basic" ["-r", "latex+raw_tex", "-w", "native", "-s"]
"latex-reader.latex" "latex-reader.native"

26
test/tables/nordics.latex Normal file
View file

@ -0,0 +1,26 @@
\begin{longtable}[]{@{}
>{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}}
>{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}}
>{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}
>{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}@{}}
\caption{States belonging to the \emph{Nordics.}}\tabularnewline
\toprule
Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
\vtop{\hbox{\strut Area}\hbox{\strut (in
km\textsuperscript{2})}} \\ \addlinespace
\midrule
\endfirsthead
\toprule
Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
\vtop{\hbox{\strut Area}\hbox{\strut (in
km\textsuperscript{2})}} \\ \addlinespace
\midrule
\endhead
Denmark & Copenhagen & 5,809,502 & 43,094 \\ \addlinespace
Finland & Helsinki & 5,537,364 & 338,145 \\ \addlinespace
Iceland & Reykjavik & 343,518 & 103,000 \\ \addlinespace
Norway & Oslo & 5,372,191 & 323,802 \\ \addlinespace
Sweden & Stockholm & 10,313,447 & 450,295 \\ \addlinespace
Total & & 27,376,022 & 1,258,336 \\ \addlinespace
\bottomrule
\end{longtable}

36
test/tables/planets.latex Normal file
View file

@ -0,0 +1,36 @@
\begin{longtable}[]{@{}cclrrrrrrrrl@{}}
\caption{Data about the planets of our solar system.}\tabularnewline
\toprule
\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
(kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace
\midrule
\endfirsthead
\toprule
\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
(kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace
\midrule
\endhead
\multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 &
4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the
Sun \\ \addlinespace
& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0
& \\ \addlinespace
& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our
world \\ \addlinespace
& & Mars & 0.642 & 6,792 & 3933 & 3.7 & 24.7 & 227.9 & -65 & 2 & The red
planet \\ \addlinespace
\multirow{4}{*}{Jovian planets} & \multirow{2}{*}{Gas giants} & Jupiter & 1898
& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest
planet \\ \addlinespace
& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62
& \\ \addlinespace
& \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 &
2872.5 & -195 & 27 & \\ \addlinespace
& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14
& \\ \addlinespace
\multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 &
153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \addlinespace
\bottomrule
\end{longtable}

View file

@ -0,0 +1,23 @@
\begin{longtable}[]{@{}
>{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}
>{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}}
\caption{List of Students}\tabularnewline
\toprule
Student ID & Name \\ \addlinespace
\midrule
\endfirsthead
\toprule
Student ID & Name \\ \addlinespace
\midrule
\endhead
\multicolumn{2}{l}{Computer Science} \\ \addlinespace
3741255 & Jones, Martha \\ \addlinespace
4077830 & Pierce, Benjamin \\ \addlinespace
5151701 & Kirk, James \\ \addlinespace
\multicolumn{2}{l}{Russian Literature} \\ \addlinespace
3971244 & Nim, Victor \\ \addlinespace
\multicolumn{2}{l}{Astrophysics} \\ \addlinespace
4100332 & Petrov, Alexandra \\ \addlinespace
4100332 & Toyota, Hiroko \\ \addlinespace
\bottomrule
\end{longtable}