Factor out T.P.Readers.LaTeX.Table.
This commit is contained in:
parent
925815bb33
commit
08231f5cdd
4 changed files with 412 additions and 363 deletions
|
@ -631,6 +631,7 @@ library
|
|||
Text.Pandoc.Readers.LaTeX.Lang,
|
||||
Text.Pandoc.Readers.LaTeX.SIunitx,
|
||||
Text.Pandoc.Readers.LaTeX.Accent,
|
||||
Text.Pandoc.Readers.LaTeX.Table,
|
||||
Text.Pandoc.Readers.Odt.Base,
|
||||
Text.Pandoc.Readers.Odt.Namespaces,
|
||||
Text.Pandoc.Readers.Odt.StyleReader,
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
|
@ -32,7 +31,6 @@ import Control.Monad
|
|||
import Control.Monad.Except (throwError)
|
||||
import Data.Char (isDigit, isLetter, toUpper, chr)
|
||||
import Data.Default
|
||||
import Data.Functor (($>))
|
||||
import Data.List (intercalate)
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe (fromMaybe, maybeToList)
|
||||
|
@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
|
|||
ArgSpec (..), Tok (..), TokType (..))
|
||||
import Text.Pandoc.Readers.LaTeX.Parsing
|
||||
import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
|
||||
import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
|
||||
import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
|
||||
babelLangToBCP47)
|
||||
import Text.Pandoc.Readers.LaTeX.SIunitx
|
||||
|
@ -551,12 +550,8 @@ inlineCommand' = try $ do
|
|||
<|> ignore rawcommand
|
||||
lookupListDefault raw names inlineCommands
|
||||
|
||||
|
||||
tok :: PandocMonad m => LP m Inlines
|
||||
tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
|
||||
where singleChar' = do
|
||||
Tok _ _ t <- singleChar
|
||||
return $ str t
|
||||
tok = tokWith inline
|
||||
|
||||
opt :: PandocMonad m => LP m Inlines
|
||||
opt = do
|
||||
|
@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList
|
|||
, "pagebreak"
|
||||
]
|
||||
|
||||
label :: PandocMonad m => LP m ()
|
||||
label = do
|
||||
controlSeq "label"
|
||||
t <- braced
|
||||
updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
|
||||
|
||||
dolabel :: PandocMonad m => LP m Inlines
|
||||
dolabel = do
|
||||
v <- braced
|
||||
|
@ -1421,13 +1410,6 @@ bracketedNum = do
|
|||
Just i -> return i
|
||||
_ -> return 0
|
||||
|
||||
setCaption :: PandocMonad m => LP m ()
|
||||
setCaption = try $ do
|
||||
skipopts
|
||||
ils <- tok
|
||||
optional $ try $ spaces *> label
|
||||
updateState $ \st -> st{ sCaption = Just ils }
|
||||
|
||||
looseItem :: PandocMonad m => LP m Blocks
|
||||
looseItem = do
|
||||
inListItem <- sInListItem <$> getState
|
||||
|
@ -1441,10 +1423,6 @@ epigraph = do
|
|||
p2 <- grouped block
|
||||
return $ divWith ("", ["epigraph"], []) (p1 <> p2)
|
||||
|
||||
resetCaption :: PandocMonad m => LP m ()
|
||||
resetCaption = updateState $ \st -> st{ sCaption = Nothing
|
||||
, sLastLabel = Nothing }
|
||||
|
||||
section :: PandocMonad m => Attr -> Int -> LP m Blocks
|
||||
section (ident, classes, kvs) lvl = do
|
||||
skipopts
|
||||
|
@ -1585,7 +1563,7 @@ blockCommands = M.fromList
|
|||
, ("item", looseItem)
|
||||
, ("documentclass", skipopts *> braced *> preamble)
|
||||
, ("centerline", para . trimInlines <$> (skipopts *> tok))
|
||||
, ("caption", mempty <$ setCaption)
|
||||
, ("caption", mempty <$ setCaption inline)
|
||||
, ("bibliography", mempty <$ (skipopts *> braced >>=
|
||||
addMeta "bibliography" . splitBibs . untokenize))
|
||||
, ("addbibresource", mempty <$ (skipopts *> braced >>=
|
||||
|
@ -1623,7 +1601,8 @@ blockCommands = M.fromList
|
|||
|
||||
|
||||
environments :: PandocMonad m => M.Map Text (LP m Blocks)
|
||||
environments = M.fromList
|
||||
environments = M.union (tableEnvironments blocks inline) $
|
||||
M.fromList
|
||||
[ ("document", env "document" blocks <* skipMany anyTok)
|
||||
, ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
|
||||
, ("sloppypar", env "sloppypar" blocks)
|
||||
|
@ -1633,13 +1612,6 @@ environments = M.fromList
|
|||
, ("figure", env "figure" $ skipopts *> figure)
|
||||
, ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
|
||||
, ("center", divWith ("", ["center"], []) <$> env "center" blocks)
|
||||
, ("longtable", env "longtable" $
|
||||
resetCaption *> simpTable "longtable" False >>= addTableCaption)
|
||||
, ("table", env "table" $
|
||||
skipopts *> resetCaption *> blocks >>= addTableCaption)
|
||||
, ("tabular*", env "tabular*" $ simpTable "tabular*" True)
|
||||
, ("tabularx", env "tabularx" $ simpTable "tabularx" True)
|
||||
, ("tabular", env "tabular" $ simpTable "tabular" False)
|
||||
, ("quote", blockQuote <$> env "quote" blocks)
|
||||
, ("quotation", blockQuote <$> env "quotation" blocks)
|
||||
, ("verse", blockQuote <$> env "verse" blocks)
|
||||
|
@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils]
|
|||
italicize (Plain ils) = Plain [Emph ils]
|
||||
italicize x = x
|
||||
|
||||
env :: PandocMonad m => Text -> LP m a -> LP m a
|
||||
env name p = p <* end_ name
|
||||
|
||||
rawEnv :: PandocMonad m => Text -> LP m Blocks
|
||||
rawEnv name = do
|
||||
exts <- getOption readerExtensions
|
||||
|
@ -2045,333 +2014,6 @@ orderedList' = try $ do
|
|||
bs <- listenv "enumerate" (many item)
|
||||
return $ orderedListWith (start, style, delim) bs
|
||||
|
||||
-- tables
|
||||
|
||||
hline :: PandocMonad m => LP m ()
|
||||
hline = try $ do
|
||||
spaces
|
||||
controlSeq "hline" <|>
|
||||
-- booktabs rules:
|
||||
controlSeq "toprule" <|>
|
||||
controlSeq "bottomrule" <|>
|
||||
controlSeq "midrule" <|>
|
||||
controlSeq "endhead" <|>
|
||||
controlSeq "endfirsthead"
|
||||
spaces
|
||||
optional opt
|
||||
return ()
|
||||
|
||||
lbreak :: PandocMonad m => LP m Tok
|
||||
lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
|
||||
<* skipopts <* spaces
|
||||
|
||||
amp :: PandocMonad m => LP m Tok
|
||||
amp = symbol '&'
|
||||
|
||||
-- Split a Word into individual Symbols (for parseAligns)
|
||||
splitWordTok :: PandocMonad m => LP m ()
|
||||
splitWordTok = do
|
||||
inp <- getInput
|
||||
case inp of
|
||||
(Tok spos Word t : rest) ->
|
||||
setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
|
||||
_ -> return ()
|
||||
|
||||
parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
|
||||
parseAligns = try $ do
|
||||
let maybeBar = skipMany
|
||||
(try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
|
||||
let cAlign = AlignCenter <$ symbol 'c'
|
||||
let lAlign = AlignLeft <$ symbol 'l'
|
||||
let rAlign = AlignRight <$ symbol 'r'
|
||||
let parAlign = AlignLeft <$ symbol 'p'
|
||||
-- aligns from tabularx
|
||||
let xAlign = AlignLeft <$ symbol 'X'
|
||||
let mAlign = AlignLeft <$ symbol 'm'
|
||||
let bAlign = AlignLeft <$ symbol 'b'
|
||||
let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
|
||||
<|> xAlign <|> mAlign <|> bAlign )
|
||||
let alignPrefix = symbol '>' >> braced
|
||||
let alignSuffix = symbol '<' >> braced
|
||||
let colWidth = try $ do
|
||||
symbol '{'
|
||||
ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
|
||||
spaces
|
||||
symbol '}'
|
||||
return $ safeRead ds
|
||||
let alignSpec = do
|
||||
pref <- option [] alignPrefix
|
||||
spaces
|
||||
al <- alignChar
|
||||
width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
|
||||
pos <- getPosition
|
||||
report $ SkippedContent s pos
|
||||
return Nothing)
|
||||
spaces
|
||||
suff <- option [] alignSuffix
|
||||
return (al, width, (pref, suff))
|
||||
let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
|
||||
symbol '*'
|
||||
spaces
|
||||
ds <- trim . untokenize <$> braced
|
||||
spaces
|
||||
spec <- braced
|
||||
case safeRead ds of
|
||||
Just n ->
|
||||
getInput >>= setInput . (mconcat (replicate n spec) ++)
|
||||
Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
|
||||
bgroup
|
||||
spaces
|
||||
maybeBar
|
||||
aligns' <- many $ try $ spaces >> optional starAlign >>
|
||||
(alignSpec <* maybeBar)
|
||||
spaces
|
||||
egroup
|
||||
spaces
|
||||
return $ map toSpec aligns'
|
||||
where
|
||||
toColWidth (Just w) | w > 0 = ColWidth w
|
||||
toColWidth _ = ColWidthDefault
|
||||
toSpec (x, y, z) = (x, toColWidth y, z)
|
||||
|
||||
-- N.B. this parser returns a Row that may have erroneous empty cells
|
||||
-- in it. See the note above fixTableHead for details.
|
||||
parseTableRow :: PandocMonad m
|
||||
=> Text -- ^ table environment name
|
||||
-> [([Tok], [Tok])] -- ^ pref/suffixes
|
||||
-> LP m Row
|
||||
parseTableRow envname prefsufs = do
|
||||
notFollowedBy (spaces *> end_ envname)
|
||||
-- add prefixes and suffixes in token stream:
|
||||
let celltoks (pref, suff) = do
|
||||
prefpos <- getPosition
|
||||
contents <- mconcat <$>
|
||||
many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
|
||||
<|>
|
||||
snd <$> withRaw (inlineEnvironment <|> dollarsMath)
|
||||
<|>
|
||||
(do notFollowedBy
|
||||
(() <$ amp <|> () <$ lbreak <|> end_ envname)
|
||||
count 1 anyTok) )
|
||||
|
||||
suffpos <- getPosition
|
||||
option [] (count 1 amp)
|
||||
return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
|
||||
rawcells <- mapM celltoks prefsufs
|
||||
cells <- mapM (parseFromToks parseTableCell) rawcells
|
||||
spaces
|
||||
return $ Row nullAttr cells
|
||||
|
||||
parseTableCell :: PandocMonad m => LP m Cell
|
||||
parseTableCell = do
|
||||
spaces
|
||||
updateState $ \st -> st{ sInTableCell = True }
|
||||
cell' <- multicolumnCell
|
||||
<|> multirowCell
|
||||
<|> parseSimpleCell
|
||||
<|> parseEmptyCell
|
||||
updateState $ \st -> st{ sInTableCell = False }
|
||||
spaces
|
||||
return cell'
|
||||
where
|
||||
-- The parsing of empty cells is important in LaTeX, especially when dealing
|
||||
-- with multirow/multicolumn. See #6603.
|
||||
parseEmptyCell = spaces $> emptyCell
|
||||
|
||||
cellAlignment :: PandocMonad m => LP m Alignment
|
||||
cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
|
||||
where
|
||||
alignment = do
|
||||
c <- untoken <$> singleChar
|
||||
return $ case c of
|
||||
"l" -> AlignLeft
|
||||
"r" -> AlignRight
|
||||
"c" -> AlignCenter
|
||||
"*" -> AlignDefault
|
||||
_ -> AlignDefault
|
||||
|
||||
plainify :: Blocks -> Blocks
|
||||
plainify bs = case toList bs of
|
||||
[Para ils] -> plain (fromList ils)
|
||||
_ -> bs
|
||||
|
||||
multirowCell :: PandocMonad m => LP m Cell
|
||||
multirowCell = controlSeq "multirow" >> do
|
||||
-- Full prototype for \multirow macro is:
|
||||
-- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
|
||||
-- However, everything except `nrows` and `text` make
|
||||
-- sense in the context of the Pandoc AST
|
||||
_ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
|
||||
nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
|
||||
_ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
|
||||
_ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
|
||||
_ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
|
||||
content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
|
||||
return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
|
||||
|
||||
multicolumnCell :: PandocMonad m => LP m Cell
|
||||
multicolumnCell = controlSeq "multicolumn" >> do
|
||||
span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
|
||||
alignment <- symbol '{' *> cellAlignment <* symbol '}'
|
||||
|
||||
let singleCell = do
|
||||
content <- plainify <$> blocks
|
||||
return $ cell alignment (RowSpan 1) (ColSpan span') content
|
||||
|
||||
-- Two possible contents: either a \multirow cell, or content.
|
||||
-- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
|
||||
-- Note that a \multirow cell can be nested in a \multicolumn,
|
||||
-- but not the other way around. See #6603
|
||||
let nestedCell = do
|
||||
(Cell _ _ (RowSpan rs) _ bs) <- multirowCell
|
||||
return $ cell
|
||||
alignment
|
||||
(RowSpan rs)
|
||||
(ColSpan span')
|
||||
(fromList bs)
|
||||
|
||||
symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
|
||||
|
||||
-- Parse a simple cell, i.e. not multirow/multicol
|
||||
parseSimpleCell :: PandocMonad m => LP m Cell
|
||||
parseSimpleCell = simpleCell <$> (plainify <$> blocks)
|
||||
|
||||
-- LaTeX tables are stored with empty cells underneath multirow cells
|
||||
-- denoting the grid spaces taken up by them. More specifically, if a
|
||||
-- cell spans m rows, then it will overwrite all the cells in the
|
||||
-- columns it spans for (m-1) rows underneath it, requiring padding
|
||||
-- cells in these places. These padding cells need to be removed for
|
||||
-- proper table reading. See #6603.
|
||||
--
|
||||
-- These fixTable functions do not otherwise fix up malformed
|
||||
-- input tables: that is left to the table builder.
|
||||
fixTableHead :: TableHead -> TableHead
|
||||
fixTableHead (TableHead attr rows) = TableHead attr rows'
|
||||
where
|
||||
rows' = fixTableRows rows
|
||||
|
||||
fixTableBody :: TableBody -> TableBody
|
||||
fixTableBody (TableBody attr rhc th tb)
|
||||
= TableBody attr rhc th' tb'
|
||||
where
|
||||
th' = fixTableRows th
|
||||
tb' = fixTableRows tb
|
||||
|
||||
fixTableRows :: [Row] -> [Row]
|
||||
fixTableRows = fixTableRows' $ repeat Nothing
|
||||
where
|
||||
fixTableRows' oldHang (Row attr cells : rs)
|
||||
= let (newHang, cells') = fixTableRow oldHang cells
|
||||
rs' = fixTableRows' newHang rs
|
||||
in Row attr cells' : rs'
|
||||
fixTableRows' _ [] = []
|
||||
|
||||
-- The overhang is represented as Just (relative cell dimensions) or
|
||||
-- Nothing for an empty grid space.
|
||||
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
|
||||
fixTableRow oldHang cells
|
||||
-- If there's overhang, drop cells until their total width meets the
|
||||
-- width of the occupied grid spaces (or we run out)
|
||||
| (n, prefHang, restHang) <- splitHang oldHang
|
||||
, n > 0
|
||||
= let cells' = dropToWidth getCellW n cells
|
||||
(restHang', cells'') = fixTableRow restHang cells'
|
||||
in (prefHang restHang', cells'')
|
||||
-- Otherwise record the overhang of a pending cell and fix the rest
|
||||
-- of the row
|
||||
| c@(Cell _ _ h w _):cells' <- cells
|
||||
= let h' = max 1 h
|
||||
w' = max 1 w
|
||||
oldHang' = dropToWidth getHangW w' oldHang
|
||||
(newHang, cells'') = fixTableRow oldHang' cells'
|
||||
in (toHang w' h' <> newHang, c : cells'')
|
||||
| otherwise
|
||||
= (oldHang, [])
|
||||
where
|
||||
getCellW (Cell _ _ _ w _) = w
|
||||
getHangW = maybe 1 fst
|
||||
getCS (ColSpan n) = n
|
||||
|
||||
toHang c r
|
||||
| r > 1 = [Just (c, r)]
|
||||
| otherwise = replicate (getCS c) Nothing
|
||||
|
||||
-- Take the prefix of the overhang list representing filled grid
|
||||
-- spaces. Also return the remainder and the length of this prefix.
|
||||
splitHang = splitHang' 0 id
|
||||
|
||||
splitHang' !n l (Just (c, r):xs)
|
||||
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
|
||||
splitHang' n l xs = (n, l, xs)
|
||||
|
||||
-- Drop list items until the total width of the dropped items
|
||||
-- exceeds the passed width.
|
||||
dropToWidth _ n l | n < 1 = l
|
||||
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
|
||||
dropToWidth _ _ [] = []
|
||||
|
||||
simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
|
||||
simpTable envname hasWidthParameter = try $ do
|
||||
when hasWidthParameter $ () <$ (spaces >> tok)
|
||||
skipopts
|
||||
colspecs <- parseAligns
|
||||
let (aligns, widths, prefsufs) = unzip3 colspecs
|
||||
optional $ controlSeq "caption" *> setCaption
|
||||
spaces
|
||||
optional label
|
||||
spaces
|
||||
optional lbreak
|
||||
spaces
|
||||
skipMany hline
|
||||
spaces
|
||||
header' <- option [] . try . fmap (:[]) $
|
||||
parseTableRow envname prefsufs <* lbreak <* many1 hline
|
||||
spaces
|
||||
rows <- sepEndBy (parseTableRow envname prefsufs)
|
||||
(lbreak <* optional (skipMany hline))
|
||||
spaces
|
||||
optional $ controlSeq "caption" *> setCaption
|
||||
spaces
|
||||
optional label
|
||||
spaces
|
||||
optional lbreak
|
||||
spaces
|
||||
lookAhead $ controlSeq "end" -- make sure we're at end
|
||||
let th = fixTableHead $ TableHead nullAttr header'
|
||||
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
|
||||
let tf = TableFoot nullAttr []
|
||||
return $ table emptyCaption (zip aligns widths) th tbs tf
|
||||
|
||||
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
|
||||
addTableCaption = walkM go
|
||||
where go (Table attr c spec th tb tf) = do
|
||||
st <- getState
|
||||
let mblabel = sLastLabel st
|
||||
capt <- case (sCaption st, mblabel) of
|
||||
(Just ils, Nothing) -> return $ caption Nothing (plain ils)
|
||||
(Just ils, Just lab) -> do
|
||||
num <- getNextNumber sLastTableNum
|
||||
setState
|
||||
st{ sLastTableNum = num
|
||||
, sLabels = M.insert lab
|
||||
[Str (renderDottedNum num)]
|
||||
(sLabels st) }
|
||||
return $ caption Nothing (plain ils) -- add number??
|
||||
(Nothing, _) -> return c
|
||||
let attr' = case (attr, mblabel) of
|
||||
((_,classes,kvs), Just ident) ->
|
||||
(ident,classes,kvs)
|
||||
_ -> attr
|
||||
return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
|
||||
go x = return x
|
||||
|
||||
-- TODO: For now we add a Div to contain table attributes, since
|
||||
-- most writers don't do anything yet with attributes on Table.
|
||||
-- This can be removed when that changes.
|
||||
addAttrDiv :: Attr -> Block -> Block
|
||||
addAttrDiv ("",[],[]) b = b
|
||||
addAttrDiv attr b = Div attr [b]
|
||||
|
||||
block :: PandocMonad m => LP m Blocks
|
||||
block = do
|
||||
res <- (mempty <$ spaces1)
|
||||
|
|
|
@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
|
|||
, comment
|
||||
, anyTok
|
||||
, singleChar
|
||||
, tokWith
|
||||
, specialChars
|
||||
, endline
|
||||
, blankline
|
||||
|
@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing
|
|||
, rawopt
|
||||
, overlaySpecification
|
||||
, getNextNumber
|
||||
, label
|
||||
, setCaption
|
||||
, resetCaption
|
||||
, env
|
||||
) where
|
||||
|
||||
import Control.Applicative (many, (<|>))
|
||||
|
@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do
|
|||
Just n -> [n, 1]
|
||||
Nothing -> [1]
|
||||
|
||||
label :: PandocMonad m => LP m ()
|
||||
label = do
|
||||
controlSeq "label"
|
||||
t <- braced
|
||||
updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
|
||||
|
||||
setCaption :: PandocMonad m => LP m Inlines -> LP m ()
|
||||
setCaption inline = try $ do
|
||||
skipopts
|
||||
ils <- tokWith inline
|
||||
optional $ try $ spaces *> label
|
||||
updateState $ \st -> st{ sCaption = Just ils }
|
||||
|
||||
resetCaption :: PandocMonad m => LP m ()
|
||||
resetCaption = updateState $ \st -> st{ sCaption = Nothing
|
||||
, sLastLabel = Nothing }
|
||||
|
||||
env :: PandocMonad m => Text -> LP m a -> LP m a
|
||||
env name p = p <* end_ name
|
||||
|
||||
tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
|
||||
tokWith inlineParser = try $ spaces >>
|
||||
grouped inlineParser
|
||||
<|> (lookAhead anyControlSeq >> inlineParser)
|
||||
<|> singleChar'
|
||||
where singleChar' = do
|
||||
Tok _ _ t <- singleChar
|
||||
return $ str t
|
||||
|
|
373
src/Text/Pandoc/Readers/LaTeX/Table.hs
Normal file
373
src/Text/Pandoc/Readers/LaTeX/Table.hs
Normal file
|
@ -0,0 +1,373 @@
|
|||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Text.Pandoc.Readers.LaTeX.Table
|
||||
( tableEnvironments )
|
||||
where
|
||||
|
||||
import Data.Functor (($>))
|
||||
import Text.Pandoc.Class
|
||||
import Text.Pandoc.Readers.LaTeX.Parsing
|
||||
import Text.Pandoc.Readers.LaTeX.Types
|
||||
import Text.Pandoc.Builder as B
|
||||
import qualified Data.Map as M
|
||||
import Data.Text (Text)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import qualified Data.Text as T
|
||||
import Control.Applicative ((<|>), optional, many)
|
||||
import Control.Monad (when, void)
|
||||
import Text.Pandoc.Shared (safeRead, trim)
|
||||
import Text.Pandoc.Logging (LogMessage(SkippedContent))
|
||||
import Text.Pandoc.Walk (walkM)
|
||||
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
|
||||
optional, space, spaces, withRaw, (<|>))
|
||||
|
||||
tableEnvironments :: PandocMonad m
|
||||
=> LP m Blocks
|
||||
-> LP m Inlines
|
||||
-> M.Map Text (LP m Blocks)
|
||||
tableEnvironments blocks inline =
|
||||
M.fromList
|
||||
[ ("longtable", env "longtable" $
|
||||
resetCaption *>
|
||||
simpTable blocks inline "longtable" False >>= addTableCaption)
|
||||
, ("table", env "table" $
|
||||
skipopts *> resetCaption *> blocks >>= addTableCaption)
|
||||
, ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
|
||||
, ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
|
||||
, ("tabular", env "tabular" $ simpTable blocks inline "tabular" False)
|
||||
]
|
||||
|
||||
hline :: PandocMonad m => LP m ()
|
||||
hline = try $ do
|
||||
spaces
|
||||
controlSeq "hline" <|>
|
||||
-- booktabs rules:
|
||||
controlSeq "toprule" <|>
|
||||
controlSeq "bottomrule" <|>
|
||||
controlSeq "midrule" <|>
|
||||
controlSeq "endhead" <|>
|
||||
controlSeq "endfirsthead"
|
||||
spaces
|
||||
optional rawopt
|
||||
return ()
|
||||
|
||||
lbreak :: PandocMonad m => LP m Tok
|
||||
lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
|
||||
<* skipopts <* spaces
|
||||
|
||||
amp :: PandocMonad m => LP m Tok
|
||||
amp = symbol '&'
|
||||
|
||||
-- Split a Word into individual Symbols (for parseAligns)
|
||||
splitWordTok :: PandocMonad m => LP m ()
|
||||
splitWordTok = do
|
||||
inp <- getInput
|
||||
case inp of
|
||||
(Tok spos Word t : rest) ->
|
||||
setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
|
||||
_ -> return ()
|
||||
|
||||
parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
|
||||
parseAligns = try $ do
|
||||
let maybeBar = skipMany
|
||||
(try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
|
||||
let cAlign = AlignCenter <$ symbol 'c'
|
||||
let lAlign = AlignLeft <$ symbol 'l'
|
||||
let rAlign = AlignRight <$ symbol 'r'
|
||||
let parAlign = AlignLeft <$ symbol 'p'
|
||||
-- aligns from tabularx
|
||||
let xAlign = AlignLeft <$ symbol 'X'
|
||||
let mAlign = AlignLeft <$ symbol 'm'
|
||||
let bAlign = AlignLeft <$ symbol 'b'
|
||||
let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
|
||||
<|> xAlign <|> mAlign <|> bAlign )
|
||||
let alignPrefix = symbol '>' >> braced
|
||||
let alignSuffix = symbol '<' >> braced
|
||||
let colWidth = try $ do
|
||||
symbol '{'
|
||||
ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
|
||||
spaces
|
||||
symbol '}'
|
||||
return $ safeRead ds
|
||||
let alignSpec = do
|
||||
pref <- option [] alignPrefix
|
||||
spaces
|
||||
al <- alignChar
|
||||
width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
|
||||
pos <- getPosition
|
||||
report $ SkippedContent s pos
|
||||
return Nothing)
|
||||
spaces
|
||||
suff <- option [] alignSuffix
|
||||
return (al, width, (pref, suff))
|
||||
let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
|
||||
symbol '*'
|
||||
spaces
|
||||
ds <- trim . untokenize <$> braced
|
||||
spaces
|
||||
spec <- braced
|
||||
case safeRead ds of
|
||||
Just n ->
|
||||
getInput >>= setInput . (mconcat (replicate n spec) ++)
|
||||
Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
|
||||
bgroup
|
||||
spaces
|
||||
maybeBar
|
||||
aligns' <- many $ try $ spaces >> optional starAlign >>
|
||||
(alignSpec <* maybeBar)
|
||||
spaces
|
||||
egroup
|
||||
spaces
|
||||
return $ map toSpec aligns'
|
||||
where
|
||||
toColWidth (Just w) | w > 0 = ColWidth w
|
||||
toColWidth _ = ColWidthDefault
|
||||
toSpec (x, y, z) = (x, toColWidth y, z)
|
||||
|
||||
-- N.B. this parser returns a Row that may have erroneous empty cells
|
||||
-- in it. See the note above fixTableHead for details.
|
||||
parseTableRow :: PandocMonad m
|
||||
=> LP m Blocks -- ^ block parser
|
||||
-> LP m Inlines -- ^ inline parser
|
||||
-> Text -- ^ table environment name
|
||||
-> [([Tok], [Tok])] -- ^ pref/suffixes
|
||||
-> LP m Row
|
||||
parseTableRow blocks inline envname prefsufs = do
|
||||
notFollowedBy (spaces *> end_ envname)
|
||||
-- add prefixes and suffixes in token stream:
|
||||
let celltoks (pref, suff) = do
|
||||
prefpos <- getPosition
|
||||
contents <- mconcat <$>
|
||||
many ( snd <$> withRaw
|
||||
((lookAhead (controlSeq "parbox") >>
|
||||
void blocks) -- #5711
|
||||
<|>
|
||||
(lookAhead (controlSeq "begin") >> void inline)
|
||||
<|>
|
||||
(lookAhead (symbol '$') >> void inline))
|
||||
<|>
|
||||
(do notFollowedBy
|
||||
(() <$ amp <|> () <$ lbreak <|> end_ envname)
|
||||
count 1 anyTok) )
|
||||
|
||||
suffpos <- getPosition
|
||||
option [] (count 1 amp)
|
||||
return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
|
||||
rawcells <- mapM celltoks prefsufs
|
||||
cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
|
||||
spaces
|
||||
return $ Row nullAttr cells
|
||||
|
||||
parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
|
||||
parseTableCell blocks = do
|
||||
spaces
|
||||
updateState $ \st -> st{ sInTableCell = True }
|
||||
cell' <- multicolumnCell blocks
|
||||
<|> multirowCell blocks
|
||||
<|> parseSimpleCell
|
||||
<|> parseEmptyCell
|
||||
updateState $ \st -> st{ sInTableCell = False }
|
||||
spaces
|
||||
return cell'
|
||||
where
|
||||
-- The parsing of empty cells is important in LaTeX, especially when dealing
|
||||
-- with multirow/multicolumn. See #6603.
|
||||
parseEmptyCell = spaces $> emptyCell
|
||||
parseSimpleCell = simpleCell <$> (plainify <$> blocks)
|
||||
|
||||
|
||||
cellAlignment :: PandocMonad m => LP m Alignment
|
||||
cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
|
||||
where
|
||||
alignment = do
|
||||
c <- untoken <$> singleChar
|
||||
return $ case c of
|
||||
"l" -> AlignLeft
|
||||
"r" -> AlignRight
|
||||
"c" -> AlignCenter
|
||||
"*" -> AlignDefault
|
||||
_ -> AlignDefault
|
||||
|
||||
plainify :: Blocks -> Blocks
|
||||
plainify bs = case toList bs of
|
||||
[Para ils] -> plain (fromList ils)
|
||||
_ -> bs
|
||||
|
||||
multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
|
||||
multirowCell blocks = controlSeq "multirow" >> do
|
||||
-- Full prototype for \multirow macro is:
|
||||
-- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
|
||||
-- However, everything except `nrows` and `text` make
|
||||
-- sense in the context of the Pandoc AST
|
||||
_ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
|
||||
nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
|
||||
_ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
|
||||
_ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
|
||||
_ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
|
||||
content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
|
||||
return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
|
||||
|
||||
multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
|
||||
multicolumnCell blocks = controlSeq "multicolumn" >> do
|
||||
span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
|
||||
alignment <- symbol '{' *> cellAlignment <* symbol '}'
|
||||
|
||||
let singleCell = do
|
||||
content <- plainify <$> blocks
|
||||
return $ cell alignment (RowSpan 1) (ColSpan span') content
|
||||
|
||||
-- Two possible contents: either a \multirow cell, or content.
|
||||
-- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
|
||||
-- Note that a \multirow cell can be nested in a \multicolumn,
|
||||
-- but not the other way around. See #6603
|
||||
let nestedCell = do
|
||||
(Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
|
||||
return $ cell
|
||||
alignment
|
||||
(RowSpan rs)
|
||||
(ColSpan span')
|
||||
(fromList bs)
|
||||
|
||||
symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
|
||||
|
||||
-- LaTeX tables are stored with empty cells underneath multirow cells
|
||||
-- denoting the grid spaces taken up by them. More specifically, if a
|
||||
-- cell spans m rows, then it will overwrite all the cells in the
|
||||
-- columns it spans for (m-1) rows underneath it, requiring padding
|
||||
-- cells in these places. These padding cells need to be removed for
|
||||
-- proper table reading. See #6603.
|
||||
--
|
||||
-- These fixTable functions do not otherwise fix up malformed
|
||||
-- input tables: that is left to the table builder.
|
||||
fixTableHead :: TableHead -> TableHead
|
||||
fixTableHead (TableHead attr rows) = TableHead attr rows'
|
||||
where
|
||||
rows' = fixTableRows rows
|
||||
|
||||
fixTableBody :: TableBody -> TableBody
|
||||
fixTableBody (TableBody attr rhc th tb)
|
||||
= TableBody attr rhc th' tb'
|
||||
where
|
||||
th' = fixTableRows th
|
||||
tb' = fixTableRows tb
|
||||
|
||||
fixTableRows :: [Row] -> [Row]
|
||||
fixTableRows = fixTableRows' $ repeat Nothing
|
||||
where
|
||||
fixTableRows' oldHang (Row attr cells : rs)
|
||||
= let (newHang, cells') = fixTableRow oldHang cells
|
||||
rs' = fixTableRows' newHang rs
|
||||
in Row attr cells' : rs'
|
||||
fixTableRows' _ [] = []
|
||||
|
||||
-- The overhang is represented as Just (relative cell dimensions) or
|
||||
-- Nothing for an empty grid space.
|
||||
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
|
||||
fixTableRow oldHang cells
|
||||
-- If there's overhang, drop cells until their total width meets the
|
||||
-- width of the occupied grid spaces (or we run out)
|
||||
| (n, prefHang, restHang) <- splitHang oldHang
|
||||
, n > 0
|
||||
= let cells' = dropToWidth getCellW n cells
|
||||
(restHang', cells'') = fixTableRow restHang cells'
|
||||
in (prefHang restHang', cells'')
|
||||
-- Otherwise record the overhang of a pending cell and fix the rest
|
||||
-- of the row
|
||||
| c@(Cell _ _ h w _):cells' <- cells
|
||||
= let h' = max 1 h
|
||||
w' = max 1 w
|
||||
oldHang' = dropToWidth getHangW w' oldHang
|
||||
(newHang, cells'') = fixTableRow oldHang' cells'
|
||||
in (toHang w' h' <> newHang, c : cells'')
|
||||
| otherwise
|
||||
= (oldHang, [])
|
||||
where
|
||||
getCellW (Cell _ _ _ w _) = w
|
||||
getHangW = maybe 1 fst
|
||||
getCS (ColSpan n) = n
|
||||
|
||||
toHang c r
|
||||
| r > 1 = [Just (c, r)]
|
||||
| otherwise = replicate (getCS c) Nothing
|
||||
|
||||
-- Take the prefix of the overhang list representing filled grid
|
||||
-- spaces. Also return the remainder and the length of this prefix.
|
||||
splitHang = splitHang' 0 id
|
||||
|
||||
splitHang' !n l (Just (c, r):xs)
|
||||
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
|
||||
splitHang' n l xs = (n, l, xs)
|
||||
|
||||
-- Drop list items until the total width of the dropped items
|
||||
-- exceeds the passed width.
|
||||
dropToWidth _ n l | n < 1 = l
|
||||
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
|
||||
dropToWidth _ _ [] = []
|
||||
|
||||
simpTable :: PandocMonad m
|
||||
=> LP m Blocks
|
||||
-> LP m Inlines
|
||||
-> Text
|
||||
-> Bool
|
||||
-> LP m Blocks
|
||||
simpTable blocks inline envname hasWidthParameter = try $ do
|
||||
when hasWidthParameter $ () <$ tokWith inline
|
||||
skipopts
|
||||
colspecs <- parseAligns
|
||||
let (aligns, widths, prefsufs) = unzip3 colspecs
|
||||
optional $ controlSeq "caption" *> setCaption inline
|
||||
spaces
|
||||
optional label
|
||||
spaces
|
||||
optional lbreak
|
||||
spaces
|
||||
skipMany hline
|
||||
spaces
|
||||
header' <- option [] . try . fmap (:[]) $
|
||||
parseTableRow blocks inline envname prefsufs <*
|
||||
lbreak <* many1 hline
|
||||
spaces
|
||||
rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
|
||||
(lbreak <* optional (skipMany hline))
|
||||
spaces
|
||||
optional $ controlSeq "caption" *> setCaption inline
|
||||
spaces
|
||||
optional label
|
||||
spaces
|
||||
optional lbreak
|
||||
spaces
|
||||
lookAhead $ controlSeq "end" -- make sure we're at end
|
||||
let th = fixTableHead $ TableHead nullAttr header'
|
||||
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
|
||||
let tf = TableFoot nullAttr []
|
||||
return $ table emptyCaption (zip aligns widths) th tbs tf
|
||||
|
||||
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
|
||||
addTableCaption = walkM go
|
||||
where go (Table attr c spec th tb tf) = do
|
||||
st <- getState
|
||||
let mblabel = sLastLabel st
|
||||
capt <- case (sCaption st, mblabel) of
|
||||
(Just ils, Nothing) -> return $ caption Nothing (plain ils)
|
||||
(Just ils, Just lab) -> do
|
||||
num <- getNextNumber sLastTableNum
|
||||
setState
|
||||
st{ sLastTableNum = num
|
||||
, sLabels = M.insert lab
|
||||
[Str (renderDottedNum num)]
|
||||
(sLabels st) }
|
||||
return $ caption Nothing (plain ils) -- add number??
|
||||
(Nothing, _) -> return c
|
||||
let attr' = case (attr, mblabel) of
|
||||
((_,classes,kvs), Just ident) ->
|
||||
(ident,classes,kvs)
|
||||
_ -> attr
|
||||
return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
|
||||
go x = return x
|
||||
|
||||
-- TODO: For now we add a Div to contain table attributes, since
|
||||
-- most writers don't do anything yet with attributes on Table.
|
||||
-- This can be removed when that changes.
|
||||
addAttrDiv :: Attr -> Block -> Block
|
||||
addAttrDiv ("",[],[]) b = b
|
||||
addAttrDiv attr b = Div attr [b]
|
Loading…
Add table
Reference in a new issue