Docx writer: extract Table handling into separate module

This commit is contained in:
Albert Krewinkel 2021-04-20 10:54:46 +02:00
parent 3ab08fe2fb
commit 0b74bbbdaa
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
3 changed files with 121 additions and 221 deletions

View file

@ -657,6 +657,8 @@ library
Text.Pandoc.Readers.Metadata, Text.Pandoc.Readers.Metadata,
Text.Pandoc.Readers.Roff, Text.Pandoc.Readers.Roff,
Text.Pandoc.Writers.Docx.StyleMap, Text.Pandoc.Writers.Docx.StyleMap,
Text.Pandoc.Writers.Docx.Table,
Text.Pandoc.Writers.Docx.Types,
Text.Pandoc.Writers.JATS.References, Text.Pandoc.Writers.JATS.References,
Text.Pandoc.Writers.JATS.Table, Text.Pandoc.Writers.JATS.Table,
Text.Pandoc.Writers.JATS.Types, Text.Pandoc.Writers.JATS.Types,

View file

@ -22,7 +22,6 @@ import Control.Applicative ((<|>))
import Control.Monad.Except (catchError, throwError) import Control.Monad.Except (catchError, throwError)
import Control.Monad.Reader import Control.Monad.Reader
import Control.Monad.State.Strict import Control.Monad.State.Strict
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
import Data.Char (isSpace, isLetter) import Data.Char (isSpace, isLetter)
import Data.List (intercalate, isPrefixOf, isSuffixOf) import Data.List (intercalate, isPrefixOf, isSuffixOf)
@ -47,123 +46,24 @@ import Text.Pandoc.Highlighting (highlight)
import Text.Pandoc.Error import Text.Pandoc.Error
import Text.Pandoc.ImageSize import Text.Pandoc.ImageSize
import Text.Pandoc.Logging import Text.Pandoc.Logging
import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, import Text.Pandoc.MIME (extensionFromMimeType, getMimeType, getMimeTypeDef)
getMimeTypeDef)
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Writers.Docx.StyleMap import Text.Pandoc.Writers.Docx.StyleMap
import Text.Pandoc.Writers.Docx.Table
import Text.Pandoc.Writers.Docx.Types
import Text.Pandoc.Shared import Text.Pandoc.Shared
import Text.Pandoc.Walk import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared import Text.Pandoc.Writers.Shared
import Text.Printf (printf)
import Text.TeXMath import Text.TeXMath
import Text.Pandoc.Writers.OOXML import Text.Pandoc.Writers.OOXML
import Text.Pandoc.XML.Light as XML import Text.Pandoc.XML.Light as XML
import Data.Generics (mkT, everywhere) import Data.Generics (mkT, everywhere)
data ListMarker = NoMarker
| BulletMarker
| NumberMarker ListNumberStyle ListNumberDelim Int
deriving (Show, Read, Eq, Ord)
listMarkerToId :: ListMarker -> Text
listMarkerToId NoMarker = "990"
listMarkerToId BulletMarker = "991"
listMarkerToId (NumberMarker sty delim n) = T.pack $
'9' : '9' : styNum : delimNum : show n
where styNum = case sty of
DefaultStyle -> '2'
Example -> '3'
Decimal -> '4'
LowerRoman -> '5'
UpperRoman -> '6'
LowerAlpha -> '7'
UpperAlpha -> '8'
delimNum = case delim of
DefaultDelim -> '0'
Period -> '1'
OneParen -> '2'
TwoParens -> '3'
data EnvProps = EnvProps{ styleElement :: Maybe Element
, otherElements :: [Element]
}
instance Semigroup EnvProps where
EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es')
instance Monoid EnvProps where
mempty = EnvProps Nothing []
mappend = (<>)
squashProps :: EnvProps -> [Element] squashProps :: EnvProps -> [Element]
squashProps (EnvProps Nothing es) = es squashProps (EnvProps Nothing es) = es
squashProps (EnvProps (Just e) es) = e : es squashProps (EnvProps (Just e) es) = e : es
data WriterEnv = WriterEnv{ envTextProperties :: EnvProps
, envParaProperties :: EnvProps
, envRTL :: Bool
, envListLevel :: Int
, envListNumId :: Int
, envInDel :: Bool
, envChangesAuthor :: Text
, envChangesDate :: Text
, envPrintWidth :: Integer
}
defaultWriterEnv :: WriterEnv
defaultWriterEnv = WriterEnv{ envTextProperties = mempty
, envParaProperties = mempty
, envRTL = False
, envListLevel = -1
, envListNumId = 1
, envInDel = False
, envChangesAuthor = "unknown"
, envChangesDate = "1969-12-31T19:00:00Z"
, envPrintWidth = 1
}
data WriterState = WriterState{
stFootnotes :: [Element]
, stComments :: [([(Text, Text)], [Inline])]
, stSectionIds :: Set.Set Text
, stExternalLinks :: M.Map Text Text
, stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
, stLists :: [ListMarker]
, stInsId :: Int
, stDelId :: Int
, stStyleMaps :: StyleMaps
, stFirstPara :: Bool
, stInTable :: Bool
, stInList :: Bool
, stTocTitle :: [Inline]
, stDynamicParaProps :: Set.Set ParaStyleName
, stDynamicTextProps :: Set.Set CharStyleName
, stCurId :: Int
}
defaultWriterState :: WriterState
defaultWriterState = WriterState{
stFootnotes = defaultFootnotes
, stComments = []
, stSectionIds = Set.empty
, stExternalLinks = M.empty
, stImages = M.empty
, stLists = [NoMarker]
, stInsId = 1
, stDelId = 1
, stStyleMaps = StyleMaps M.empty M.empty
, stFirstPara = False
, stInTable = False
, stInList = False
, stTocTitle = [Str "Table of Contents"]
, stDynamicParaProps = Set.empty
, stDynamicTextProps = Set.empty
, stCurId = 20
}
type WS m = ReaderT WriterEnv (StateT WriterState m)
renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap :: Int -> [Element] -> M.Map Text Text
renumIdMap _ [] = M.empty renumIdMap _ [] = M.empty
renumIdMap n (e:es) renumIdMap n (e:es)
@ -858,12 +758,6 @@ separateTables (x@Table{}:xs@(Table{}:_)) =
x : RawBlock (Format "openxml") "<w:p />" : separateTables xs x : RawBlock (Format "openxml") "<w:p />" : separateTables xs
separateTables (x:xs) = x : separateTables xs separateTables (x:xs) = x : separateTables xs
pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
pStyleM styleName = do
pStyleMap <- gets (smParaStyle . stStyleMaps)
let sty' = getStyleIdFromName styleName pStyleMap
return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
rStyleM styleName = do rStyleM styleName = do
cStyleMap <- gets (smCharStyle . stStyleMaps) cStyleMap <- gets (smCharStyle . stStyleMaps)
@ -995,78 +889,8 @@ blockToOpenXML' _ HorizontalRule = do
$ mknode "v:rect" [("style","width:0;height:1.5pt"), $ mknode "v:rect" [("style","width:0;height:1.5pt"),
("o:hralign","center"), ("o:hralign","center"),
("o:hrstd","t"),("o:hr","t")] () ] ("o:hrstd","t"),("o:hr","t")] () ]
blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) =
let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot
setFirstPara
modify $ \s -> s { stInTable = True }
let captionStr = stringify caption
caption' <- if null caption
then return []
else withParaPropM (pStyleM "Table Caption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-- Table cells require a <w:p> element, even an empty one!
-- Not in the spec but in Word 2007, 2010. See #4953. And
-- apparently the last element must be a <w:p>, see #6983.
let cellToOpenXML (al, cell) = do
es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
return $
case reverse (onlyElems es) of
b:e:_ | qName (elName b) == "bookmarkEnd"
, qName (elName e) == "p" -> es
e:_ | qName (elName e) == "p" -> es
_ -> es ++ [Elem $ mknode "w:p" [] ()]
headers' <- mapM cellToOpenXML $ zip aligns headers
rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
compactStyle <- pStyleM "Compact"
let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
let mkcell contents = mknode "w:tc" []
$ if null contents
then emptyCell'
else contents
let mkrow cells =
mknode "w:tr" [] $
map mkcell cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let fullrow = 5000 -- 100% specified in pct
let (rowwidth :: Int) = round $ fullrow * sum widths
let mkgridcol w = mknode "w:gridCol"
[("w:w", tshow (floor (textwidth * w) :: Integer))] ()
let hasHeader = not $ all null headers
modify $ \s -> s { stInTable = False }
-- for compatibility with Word <= 2007, we include a val with a bitmask
-- 0×0020 Apply first row conditional formatting
-- 0×0040 Apply last row conditional formatting
-- 0×0080 Apply first column conditional formatting
-- 0×0100 Apply last column conditional formatting
-- 0×0200 Do not apply row banding conditional formatting
-- 0×0400 Do not apply column banding conditional formattin
let tblLookVal :: Int
tblLookVal = if hasHeader then 0x20 else 0
return $
caption' ++
[Elem $
mknode "w:tbl" []
( mknode "w:tblPr" []
( mknode "w:tblStyle" [("w:val","Table")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
,("w:lastRow","0")
,("w:firstColumn","0")
,("w:lastColumn","0")
,("w:noHBand","0")
,("w:noVBand","0")
,("w:val", T.pack $ printf "%04x" tblLookVal)
] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
(if all (==0) widths
then []
else map mkgridcol widths)
: [ mkrow headers' | hasHeader ] ++
map mkrow rows'
)]
blockToOpenXML' opts el blockToOpenXML' opts el
| BulletList lst <- el = addOpenXMLList BulletMarker lst | BulletList lst <- el = addOpenXMLList BulletMarker lst
| OrderedList (start, numstyle, numdelim) lst <- el | OrderedList (start, numstyle, numdelim) lst <- el
@ -1121,13 +945,6 @@ listItemToOpenXML opts numid (first:rest) = do
modify $ \st -> st{ stInList = oldInList } modify $ \st -> st{ stInList = oldInList }
return $ first'' ++ rest'' return $ first'' ++ rest''
alignmentToString :: Alignment -> Text
alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
AlignDefault -> "left"
-- | Convert a list of inline elements to OpenXML. -- | Convert a list of inline elements to OpenXML.
inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content] inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content]
inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
@ -1138,10 +955,6 @@ withNumId numid = local $ \env -> env{ envListNumId = numid }
asList :: (PandocMonad m) => WS m a -> WS m a asList :: (PandocMonad m) => WS m a -> WS m a
asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } asList = local $ \env -> env{ envListLevel = envListLevel env + 1 }
isStyle :: Element -> Bool
isStyle e = isElem [] "w" "rStyle" e ||
isElem [] "w" "pStyle" e
getTextProps :: (PandocMonad m) => WS m [Element] getTextProps :: (PandocMonad m) => WS m [Element]
getTextProps = do getTextProps = do
props <- asks envTextProperties props <- asks envTextProperties
@ -1170,16 +983,6 @@ getParaProps displayMathPara = do
[] -> [] [] -> []
ps -> [mknode "w:pPr" [] ps] ps -> [mknode "w:pPr" [] ps]
withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
withParaProp d p =
local (\env -> env {envParaProperties = ep <> envParaProperties env}) p
where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d]
withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
withParaPropM md p = do
d <- md
withParaProp d p
formattedString :: PandocMonad m => Text -> WS m [Element] formattedString :: PandocMonad m => Text -> WS m [Element]
formattedString str = formattedString str =
-- properly handle soft hyphens -- properly handle soft hyphens
@ -1200,9 +1003,6 @@ formattedRun els = do
props <- getTextProps props <- getTextProps
return [ mknode "w:r" [] $ props ++ els ] return [ mknode "w:r" [] $ props ++ els ]
setFirstPara :: PandocMonad m => WS m ()
setFirstPara = modify $ \s -> s { stFirstPara = True }
-- | Convert an inline element to OpenXML. -- | Convert an inline element to OpenXML.
inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
@ -1494,22 +1294,6 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
br :: Element br :: Element
br = mknode "w:r" [] [mknode "w:br" [] ()] br = mknode "w:r" [] [mknode "w:br" [] ()]
-- Word will insert these footnotes into the settings.xml file
-- (whether or not they're visible in the document). If they're in the
-- file, but not in the footnotes.xml file, it will produce
-- problems. So we want to make sure we insert them into our document.
defaultFootnotes :: [Element]
defaultFootnotes = [ mknode "w:footnote"
[("w:type", "separator"), ("w:id", "-1")]
[ mknode "w:p" []
[mknode "w:r" []
[ mknode "w:separator" [] ()]]]
, mknode "w:footnote"
[("w:type", "continuationSeparator"), ("w:id", "0")]
[ mknode "w:p" []
[ mknode "w:r" []
[ mknode "w:continuationSeparator" [] ()]]]]
withDirection :: PandocMonad m => WS m a -> WS m a withDirection :: PandocMonad m => WS m a -> WS m a
withDirection x = do withDirection x = do

View file

@ -0,0 +1,114 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Writers.Docx
Copyright : Copyright (C) 2012-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Conversion of table blocks to docx.
-}
module Text.Pandoc.Writers.Docx.Table
( tableToOpenXML
) where
import Control.Monad.State.Strict
import Data.Text (Text)
import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Writers.Docx.Types
import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Printf (printf)
import Text.Pandoc.Writers.OOXML
import Text.Pandoc.XML.Light as XML
import qualified Data.Text as T
tableToOpenXML :: PandocMonad m
=> ([Block] -> WS m [Content])
-> Caption
-> [ColSpec]
-> TableHead
-> [TableBody]
-> TableFoot
-> WS m [Content]
tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
let (caption, aligns, widths, headers, rows) =
toLegacyTable blkCapt specs thead tbody tfoot
setFirstPara
modify $ \s -> s { stInTable = True }
let captionStr = stringify caption
caption' <- if null caption
then return []
else withParaPropM (pStyleM "Table Caption")
$ blocksToOpenXML [Para caption]
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-- Table cells require a <w:p> element, even an empty one!
-- Not in the spec but in Word 2007, 2010. See #4953. And
-- apparently the last element must be a <w:p>, see #6983.
let cellToOpenXML (al, cell) = do
es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell
return $
case reverse (onlyElems es) of
b:e:_ | qName (elName b) == "bookmarkEnd"
, qName (elName e) == "p" -> es
e:_ | qName (elName e) == "p" -> es
_ -> es ++ [Elem $ mknode "w:p" [] ()]
headers' <- mapM cellToOpenXML $ zip aligns headers
rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
compactStyle <- pStyleM "Compact"
let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
let mkcell contents = mknode "w:tc" []
$ if null contents
then emptyCell'
else contents
let mkrow cells =
mknode "w:tr" [] $
map mkcell cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let fullrow = 5000 -- 100% specified in pct
let (rowwidth :: Int) = round $ fullrow * sum widths
let mkgridcol w = mknode "w:gridCol"
[("w:w", tshow (floor (textwidth * w) :: Integer))] ()
let hasHeader = not $ all null headers
modify $ \s -> s { stInTable = False }
-- for compatibility with Word <= 2007, we include a val with a bitmask
-- 0×0020 Apply first row conditional formatting
-- 0×0040 Apply last row conditional formatting
-- 0×0080 Apply first column conditional formatting
-- 0×0100 Apply last column conditional formatting
-- 0×0200 Do not apply row banding conditional formatting
-- 0×0400 Do not apply column banding conditional formattin
let tblLookVal :: Int
tblLookVal = if hasHeader then 0x20 else 0
return $
caption' ++
[Elem $
mknode "w:tbl" []
( mknode "w:tblPr" []
( mknode "w:tblStyle" [("w:val","Table")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
,("w:lastRow","0")
,("w:firstColumn","0")
,("w:lastColumn","0")
,("w:noHBand","0")
,("w:noVBand","0")
,("w:val", T.pack $ printf "%04x" tblLookVal)
] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
(if all (==0) widths
then []
else map mkgridcol widths)
: [ mkrow headers' | hasHeader ] ++
map mkrow rows'
)]
alignmentToString :: Alignment -> Text
alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
AlignDefault -> "left"