836 lines
38 KiB
Haskell
836 lines
38 KiB
Haskell
{-# LANGUAGE ScopedTypeVariables #-}
|
|
{-
|
|
Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Writers.Docx
|
|
Copyright : Copyright (C) 2012 John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
Conversion of 'Pandoc' documents to docx.
|
|
-}
|
|
module Text.Pandoc.Writers.Docx ( writeDocx ) where
|
|
import Data.List ( intercalate, groupBy )
|
|
import qualified Data.ByteString as B
|
|
import qualified Data.ByteString.Lazy as BL
|
|
import qualified Data.ByteString.Lazy.Char8 as BL8
|
|
import qualified Data.Map as M
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|
import Text.Pandoc.Compat.Monoid ((<>))
|
|
import Codec.Archive.Zip
|
|
import Data.Time.Clock.POSIX
|
|
import Text.Pandoc.Definition
|
|
import Text.Pandoc.Generic
|
|
import Text.Pandoc.ImageSize
|
|
import Text.Pandoc.Shared hiding (Element)
|
|
import Text.Pandoc.Options
|
|
import Text.Pandoc.Readers.TeXMath
|
|
import Text.Pandoc.Highlighting ( highlight )
|
|
import Text.Pandoc.Walk
|
|
import Text.Highlighting.Kate.Types ()
|
|
import Text.XML.Light
|
|
import Text.TeXMath
|
|
import Control.Monad.State
|
|
import Text.Highlighting.Kate
|
|
import Data.Unique (hashUnique, newUnique)
|
|
import System.Random (randomRIO)
|
|
import Text.Printf (printf)
|
|
import qualified Control.Exception as E
|
|
import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
|
|
import Control.Applicative ((<|>))
|
|
|
|
data WriterState = WriterState{
|
|
stTextProperties :: [Element]
|
|
, stParaProperties :: [Element]
|
|
, stFootnotes :: [Element]
|
|
, stSectionIds :: [String]
|
|
, stExternalLinks :: M.Map String String
|
|
, stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString)
|
|
, stListLevel :: Int
|
|
, stListNumId :: Int
|
|
, stNumStyles :: M.Map ListMarker Int
|
|
, stLists :: [ListMarker]
|
|
}
|
|
|
|
data ListMarker = NoMarker
|
|
| BulletMarker
|
|
| NumberMarker ListNumberStyle ListNumberDelim Int
|
|
deriving (Show, Read, Eq, Ord)
|
|
|
|
defaultWriterState :: WriterState
|
|
defaultWriterState = WriterState{
|
|
stTextProperties = []
|
|
, stParaProperties = []
|
|
, stFootnotes = []
|
|
, stSectionIds = []
|
|
, stExternalLinks = M.empty
|
|
, stImages = M.empty
|
|
, stListLevel = -1
|
|
, stListNumId = 1
|
|
, stNumStyles = M.fromList [(NoMarker, 0)]
|
|
, stLists = [NoMarker]
|
|
}
|
|
|
|
type WS a = StateT WriterState IO a
|
|
|
|
mknode :: Node t => String -> [(String,String)] -> t -> Element
|
|
mknode s attrs =
|
|
add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
|
|
|
|
toLazy :: B.ByteString -> BL.ByteString
|
|
toLazy = BL.fromChunks . (:[])
|
|
|
|
renderXml :: Element -> BL.ByteString
|
|
renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
|
|
UTF8.fromStringLazy (showElement elt)
|
|
|
|
-- | Produce an Docx file from a Pandoc document.
|
|
writeDocx :: WriterOptions -- ^ Writer options
|
|
-> Pandoc -- ^ Document to convert
|
|
-> IO BL.ByteString
|
|
writeDocx opts doc@(Pandoc meta _) = do
|
|
let datadir = writerUserDataDir opts
|
|
let doc' = walk fixDisplayMath doc
|
|
refArchive <- liftM (toArchive . toLazy) $
|
|
case writerReferenceDocx opts of
|
|
Just f -> B.readFile f
|
|
Nothing -> readDataFile datadir "reference.docx"
|
|
|
|
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
|
|
defaultWriterState
|
|
epochtime <- floor `fmap` getPOSIXTime
|
|
let imgs = M.elems $ stImages st
|
|
|
|
-- we create [Content_Types].xml and word/_rels/document.xml.rels
|
|
-- from scratch rather than reading from reference.docx,
|
|
-- because Word sometimes changes these files when a reference.docx is modified,
|
|
-- e.g. deleting the reference to footnotes.xml or removing default entries
|
|
-- for image content types.
|
|
|
|
-- [Content_Types].xml
|
|
let mkOverrideNode (part', contentType') = mknode "Override"
|
|
[("PartName",part'),("ContentType",contentType')] ()
|
|
let mkImageOverride (_, imgpath, mbMimeType, _, _) =
|
|
mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType)
|
|
let overrides = map mkOverrideNode
|
|
[("/word/webSettings.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
|
|
,("/word/numbering.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml")
|
|
,("/word/settings.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml")
|
|
,("/word/theme/theme1.xml",
|
|
"application/vnd.openxmlformats-officedocument.theme+xml")
|
|
,("/word/fontTable.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml")
|
|
,("/docProps/app.xml",
|
|
"application/vnd.openxmlformats-officedocument.extended-properties+xml")
|
|
,("/docProps/core.xml",
|
|
"application/vnd.openxmlformats-package.core-properties+xml")
|
|
,("/word/styles.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml")
|
|
,("/word/document.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
|
|
,("/word/footnotes.xml",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
|
|
] ++ map mkImageOverride imgs
|
|
let defaultnodes = [mknode "Default"
|
|
[("Extension","xml"),("ContentType","application/xml")] (),
|
|
mknode "Default"
|
|
[("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()]
|
|
let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides
|
|
let contentTypesEntry = toEntry "[Content_Types].xml" epochtime
|
|
$ renderXml contentTypesDoc
|
|
|
|
-- word/_rels/document.xml.rels
|
|
let toBaseRel (url', id', target') = mknode "Relationship"
|
|
[("Type",url')
|
|
,("Id",id')
|
|
,("Target",target')] ()
|
|
let baserels = map toBaseRel
|
|
[("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering",
|
|
"rId1",
|
|
"numbering.xml")
|
|
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles",
|
|
"rId2",
|
|
"styles.xml")
|
|
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings",
|
|
"rId3",
|
|
"settings.xml")
|
|
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings",
|
|
"rId4",
|
|
"webSettings.xml")
|
|
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
|
|
"rId5",
|
|
"fontTable.xml")
|
|
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme",
|
|
"rId6",
|
|
"theme/theme1.xml")
|
|
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
|
|
"rId7",
|
|
"footnotes.xml")]
|
|
let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
|
|
let imgrels = map toImgRel imgs
|
|
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
|
|
let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
|
|
let reldoc = mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ baserels ++ imgrels ++ linkrels
|
|
let relEntry = toEntry "word/_rels/document.xml.rels" epochtime
|
|
$ renderXml reldoc
|
|
|
|
-- create entries for images in word/media/...
|
|
let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
|
|
let imageEntries = map toImageEntry imgs
|
|
|
|
-- word/document.xml
|
|
let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents
|
|
|
|
-- footnotes
|
|
let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes
|
|
|
|
-- footnote rels
|
|
let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime
|
|
$ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
|
|
$ linkrels
|
|
|
|
-- styles
|
|
let newstyles = styleToOpenXml $ writerHighlightStyle opts
|
|
let stylepath = "word/styles.xml"
|
|
styledoc <- parseXml refArchive stylepath
|
|
let styledoc' = styledoc{ elContent = elContent styledoc ++
|
|
[Elem x | x <- newstyles, writerHighlight opts] }
|
|
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
|
|
|
|
-- construct word/numbering.xml
|
|
let numpath = "word/numbering.xml"
|
|
numEntry <- (toEntry numpath epochtime . renderXml)
|
|
`fmap` mkNumbering (stNumStyles st) (stLists st)
|
|
let docPropsPath = "docProps/core.xml"
|
|
let docProps = mknode "cp:coreProperties"
|
|
[("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
|
|
,("xmlns:dc","http://purl.org/dc/elements/1.1/")
|
|
,("xmlns:dcterms","http://purl.org/dc/terms/")
|
|
,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
|
|
,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
|
|
$ mknode "dc:title" [] (stringify $ docTitle meta)
|
|
: mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
|
|
: maybe []
|
|
(\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x
|
|
, mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x
|
|
]) (normalizeDate $ stringify $ docDate meta)
|
|
let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
|
|
|
|
let relsPath = "_rels/.rels"
|
|
let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
|
|
$ map (\attrs -> mknode "Relationship" attrs ())
|
|
[ [("Id","rId1")
|
|
,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
|
|
,("Target","word/document.xml")]
|
|
, [("Id","rId4")
|
|
,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties")
|
|
,("Target","docProps/app.xml")]
|
|
, [("Id","rId3")
|
|
,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties")
|
|
,("Target","docProps/core.xml")]
|
|
]
|
|
let relsEntry = toEntry relsPath epochtime $ renderXml rels
|
|
|
|
let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap`
|
|
parseXml refArchive path
|
|
docPropsAppEntry <- entryFromArchive "docProps/app.xml"
|
|
themeEntry <- entryFromArchive "word/theme/theme1.xml"
|
|
fontTableEntry <- entryFromArchive "word/fontTable.xml"
|
|
settingsEntry <- entryFromArchive "word/settings.xml"
|
|
webSettingsEntry <- entryFromArchive "word/webSettings.xml"
|
|
|
|
-- Create archive
|
|
let archive = foldr addEntryToArchive emptyArchive $
|
|
contentTypesEntry : relsEntry : contentEntry : relEntry :
|
|
footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
|
|
docPropsEntry : docPropsAppEntry : themeEntry :
|
|
fontTableEntry : settingsEntry : webSettingsEntry :
|
|
imageEntries
|
|
return $ fromArchive archive
|
|
|
|
styleToOpenXml :: Style -> [Element]
|
|
styleToOpenXml style = parStyle : map toStyle alltoktypes
|
|
where alltoktypes = enumFromTo KeywordTok NormalTok
|
|
toStyle toktype = mknode "w:style" [("w:type","character"),
|
|
("w:customStyle","1"),("w:styleId",show toktype)]
|
|
[ mknode "w:name" [("w:val",show toktype)] ()
|
|
, mknode "w:basedOn" [("w:val","VerbatimChar")] ()
|
|
, mknode "w:rPr" [] $
|
|
[ mknode "w:color" [("w:val",tokCol toktype)] ()
|
|
| tokCol toktype /= "auto" ] ++
|
|
[ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
|
|
| tokBg toktype /= "auto" ] ++
|
|
[ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
|
|
[ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
|
|
[ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
|
|
]
|
|
tokStyles = tokenStyles style
|
|
tokFeature f toktype = maybe False f $ lookup toktype tokStyles
|
|
tokCol toktype = maybe "auto" (drop 1 . fromColor)
|
|
$ (tokenColor =<< lookup toktype tokStyles)
|
|
`mplus` defaultColor style
|
|
tokBg toktype = maybe "auto" (drop 1 . fromColor)
|
|
$ (tokenBackground =<< lookup toktype tokStyles)
|
|
`mplus` backgroundColor style
|
|
parStyle = mknode "w:style" [("w:type","paragraph"),
|
|
("w:customStyle","1"),("w:styleId","SourceCode")]
|
|
[ mknode "w:name" [("w:val","Source Code")] ()
|
|
, mknode "w:basedOn" [("w:val","Normal")] ()
|
|
, mknode "w:link" [("w:val","VerbatimChar")] ()
|
|
, mknode "w:pPr" []
|
|
$ mknode "w:wordWrap" [("w:val","off")] ()
|
|
: ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
|
|
$ backgroundColor style )
|
|
]
|
|
|
|
mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element
|
|
mkNumbering markers lists = do
|
|
elts <- mapM mkAbstractNum (M.toList markers)
|
|
return $ mknode "w:numbering"
|
|
[("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
|
|
$ elts ++ zipWith (mkNum markers) lists [1..(length lists)]
|
|
|
|
mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element
|
|
mkNum markers marker numid =
|
|
mknode "w:num" [("w:numId",show numid)]
|
|
$ mknode "w:abstractNumId" [("w:val",show absnumid)] ()
|
|
: case marker of
|
|
NoMarker -> []
|
|
BulletMarker -> []
|
|
NumberMarker _ _ start ->
|
|
map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
|
|
$ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
|
|
where absnumid = maybe 0 id $ M.lookup marker markers
|
|
|
|
mkAbstractNum :: (ListMarker,Int) -> IO Element
|
|
mkAbstractNum (marker,numid) = do
|
|
nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer)
|
|
return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)]
|
|
$ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
|
|
: mknode "w:multiLevelType" [("w:val","multilevel")] ()
|
|
: map (mkLvl marker) [0..6]
|
|
|
|
mkLvl :: ListMarker -> Int -> Element
|
|
mkLvl marker lvl =
|
|
mknode "w:lvl" [("w:ilvl",show lvl)] $
|
|
[ mknode "w:start" [("w:val",start)] ()
|
|
| marker /= NoMarker && marker /= BulletMarker ] ++
|
|
[ mknode "w:numFmt" [("w:val",fmt)] ()
|
|
, mknode "w:lvlText" [("w:val",lvltxt)] ()
|
|
, mknode "w:lvlJc" [("w:val","left")] ()
|
|
, mknode "w:pPr" []
|
|
[ mknode "w:tabs" []
|
|
$ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] ()
|
|
, mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] ()
|
|
]
|
|
]
|
|
where (fmt, lvltxt, start) =
|
|
case marker of
|
|
NoMarker -> ("bullet"," ","1")
|
|
BulletMarker -> ("bullet",bulletFor lvl,"1")
|
|
NumberMarker st de n -> (styleFor st lvl
|
|
,patternFor de ("%" ++ show (lvl + 1))
|
|
,show n)
|
|
step = 720
|
|
hang = 480
|
|
bulletFor 0 = "\x2022" -- filled circle
|
|
bulletFor 1 = "\x2013" -- en dash
|
|
bulletFor 2 = "\x2022" -- hyphen bullet
|
|
bulletFor 3 = "\x2013"
|
|
bulletFor 4 = "\x2022"
|
|
bulletFor 5 = "\x2013"
|
|
bulletFor _ = "\x2022"
|
|
styleFor UpperAlpha _ = "upperLetter"
|
|
styleFor LowerAlpha _ = "lowerLetter"
|
|
styleFor UpperRoman _ = "upperRoman"
|
|
styleFor LowerRoman _ = "lowerRoman"
|
|
styleFor Decimal _ = "decimal"
|
|
styleFor DefaultStyle 1 = "decimal"
|
|
styleFor DefaultStyle 2 = "lowerLetter"
|
|
styleFor DefaultStyle 3 = "lowerRoman"
|
|
styleFor DefaultStyle 4 = "decimal"
|
|
styleFor DefaultStyle 5 = "lowerLetter"
|
|
styleFor DefaultStyle 6 = "lowerRoman"
|
|
styleFor _ _ = "decimal"
|
|
patternFor OneParen s = s ++ ")"
|
|
patternFor TwoParens s = "(" ++ s ++ ")"
|
|
patternFor _ s = s ++ "."
|
|
|
|
getNumId :: WS Int
|
|
getNumId = length `fmap` gets stLists
|
|
|
|
-- | Convert Pandoc document to two OpenXML elements (the main document and footnotes).
|
|
writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element)
|
|
writeOpenXML opts (Pandoc meta blocks) = do
|
|
let tit = docTitle meta ++ case lookupMeta "subtitle" meta of
|
|
Just (MetaBlocks [Plain xs]) -> LineBreak : xs
|
|
_ -> []
|
|
let auths = docAuthors meta
|
|
let dat = docDate meta
|
|
title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
|
|
authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
|
|
[Para (intercalate [LineBreak] auths) | not (null auths)]
|
|
date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
|
|
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
|
|
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
|
|
convertSpace xs = xs
|
|
let blocks' = bottomUp convertSpace $ blocks
|
|
doc' <- blocksToOpenXML opts blocks'
|
|
notes' <- reverse `fmap` gets stFootnotes
|
|
let meta' = title ++ authors ++ date
|
|
let stdAttributes =
|
|
[("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
|
|
,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
|
|
,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
|
|
,("xmlns:o","urn:schemas-microsoft-com:office:office")
|
|
,("xmlns:v","urn:schemas-microsoft-com:vml")
|
|
,("xmlns:w10","urn:schemas-microsoft-com:office:word")
|
|
,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
|
|
,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
|
|
,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
|
|
let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc')
|
|
let notes = mknode "w:footnotes" stdAttributes notes'
|
|
return (doc, notes)
|
|
|
|
-- | Convert a list of Pandoc blocks to OpenXML.
|
|
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
|
|
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
|
|
|
|
pStyle :: String -> Element
|
|
pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
|
|
|
|
rStyle :: String -> Element
|
|
rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
|
|
|
|
getUniqueId :: MonadIO m => m String
|
|
-- the + 20 is to ensure that there are no clashes with the rIds
|
|
-- already in word/document.xml.rel
|
|
getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique
|
|
|
|
-- | Convert a Pandoc block element to OpenXML.
|
|
blockToOpenXML :: WriterOptions -> Block -> WS [Element]
|
|
blockToOpenXML _ Null = return []
|
|
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
|
|
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
|
|
contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
|
|
blockToOpenXML opts (Para lst)
|
|
usedIdents <- gets stSectionIds
|
|
let bookmarkName = if null ident
|
|
then uniqueIdent lst usedIdents
|
|
else ident
|
|
modify $ \s -> s{ stSectionIds = bookmarkName : stSectionIds s }
|
|
id' <- getUniqueId
|
|
let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
|
|
,("w:name",bookmarkName)] ()
|
|
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
|
|
return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
|
|
blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
|
|
$ blockToOpenXML opts (Para lst)
|
|
-- title beginning with fig: indicates that the image is a figure
|
|
blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
|
|
paraProps <- getParaProps False
|
|
contents <- inlinesToOpenXML opts [Image alt (src,tit)]
|
|
captionNode <- withParaProp (pStyle "ImageCaption")
|
|
$ blockToOpenXML opts (Para alt)
|
|
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
|
|
-- fixDisplayMath sometimes produces a Para [] as artifact
|
|
blockToOpenXML _ (Para []) = return []
|
|
blockToOpenXML opts (Para lst) = do
|
|
paraProps <- getParaProps $ case lst of
|
|
[Math DisplayMath _] -> True
|
|
_ -> False
|
|
contents <- inlinesToOpenXML opts lst
|
|
return [mknode "w:p" [] (paraProps ++ contents)]
|
|
blockToOpenXML _ (RawBlock format str)
|
|
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
|
|
| otherwise = return []
|
|
blockToOpenXML opts (BlockQuote blocks) =
|
|
withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
|
|
blockToOpenXML opts (CodeBlock attrs str) =
|
|
withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str]
|
|
blockToOpenXML _ HorizontalRule = return [
|
|
mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
|
|
$ mknode "v:rect" [("style","width:0;height:1.5pt"),
|
|
("o:hralign","center"),
|
|
("o:hrstd","t"),("o:hr","t")] () ]
|
|
blockToOpenXML opts (Table caption aligns widths headers rows) = do
|
|
let captionStr = stringify caption
|
|
caption' <- if null caption
|
|
then return []
|
|
else withParaProp (pStyle "TableCaption")
|
|
$ blockToOpenXML opts (Para caption)
|
|
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
|
|
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
|
|
$ blocksToOpenXML opts cell
|
|
headers' <- mapM cellToOpenXML $ zip aligns headers
|
|
rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells)
|
|
$ rows
|
|
let borderProps = mknode "w:tcPr" []
|
|
[ mknode "w:tcBorders" []
|
|
$ mknode "w:bottom" [("w:val","single")] ()
|
|
, mknode "w:vAlign" [("w:val","bottom")] () ]
|
|
let mkcell border contents = mknode "w:tc" []
|
|
$ [ borderProps | border ] ++
|
|
if null contents
|
|
then [mknode "w:p" [] ()]
|
|
else contents
|
|
let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
|
|
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
|
|
let mkgridcol w = mknode "w:gridCol"
|
|
[("w:w", show $ (floor (textwidth * w) :: Integer))] ()
|
|
return $
|
|
[ mknode "w:tbl" []
|
|
( mknode "w:tblPr" []
|
|
( [ mknode "w:tblStyle" [("w:val","TableNormal")] () ] ++
|
|
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
|
|
| not (null caption) ] )
|
|
: mknode "w:tblGrid" []
|
|
(if all (==0) widths
|
|
then []
|
|
else map mkgridcol widths)
|
|
: [ mkrow True headers' | not (all null headers) ] ++
|
|
map (mkrow False) rows'
|
|
)
|
|
] ++ caption'
|
|
blockToOpenXML opts (BulletList lst) = do
|
|
let marker = BulletMarker
|
|
addList marker
|
|
numid <- getNumId
|
|
asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
|
|
blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do
|
|
let marker = NumberMarker numstyle numdelim start
|
|
addList marker
|
|
numid <- getNumId
|
|
asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
|
|
blockToOpenXML opts (DefinitionList items) =
|
|
concat `fmap` mapM (definitionListItemToOpenXML opts) items
|
|
|
|
definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
|
|
definitionListItemToOpenXML opts (term,defs) = do
|
|
term' <- withParaProp (pStyle "DefinitionTerm")
|
|
$ blockToOpenXML opts (Para term)
|
|
defs' <- withParaProp (pStyle "Definition")
|
|
$ concat `fmap` mapM (blocksToOpenXML opts) defs
|
|
return $ term' ++ defs'
|
|
|
|
addList :: ListMarker -> WS ()
|
|
addList marker = do
|
|
lists <- gets stLists
|
|
modify $ \st -> st{ stLists = lists ++ [marker] }
|
|
numStyles <- gets stNumStyles
|
|
case M.lookup marker numStyles of
|
|
Just _ -> return ()
|
|
Nothing -> modify $ \st ->
|
|
st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles }
|
|
|
|
listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element]
|
|
listItemToOpenXML _ _ [] = return []
|
|
listItemToOpenXML opts numid (first:rest) = do
|
|
first' <- withNumId numid $ blockToOpenXML opts first
|
|
rest' <- withNumId 1 $ blocksToOpenXML opts rest
|
|
return $ first' ++ rest'
|
|
|
|
alignmentToString :: Alignment -> [Char]
|
|
alignmentToString alignment = case alignment of
|
|
AlignLeft -> "left"
|
|
AlignRight -> "right"
|
|
AlignCenter -> "center"
|
|
AlignDefault -> "left"
|
|
|
|
-- | Convert a list of inline elements to OpenXML.
|
|
inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element]
|
|
inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
|
|
|
|
withNumId :: Int -> WS a -> WS a
|
|
withNumId numid p = do
|
|
origNumId <- gets stListNumId
|
|
modify $ \st -> st{ stListNumId = numid }
|
|
result <- p
|
|
modify $ \st -> st{ stListNumId = origNumId }
|
|
return result
|
|
|
|
asList :: WS a -> WS a
|
|
asList p = do
|
|
origListLevel <- gets stListLevel
|
|
modify $ \st -> st{ stListLevel = stListLevel st + 1 }
|
|
result <- p
|
|
modify $ \st -> st{ stListLevel = origListLevel }
|
|
return result
|
|
|
|
getTextProps :: WS [Element]
|
|
getTextProps = do
|
|
props <- gets stTextProperties
|
|
return $ if null props
|
|
then []
|
|
else [mknode "w:rPr" [] $ props]
|
|
|
|
pushTextProp :: Element -> WS ()
|
|
pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s }
|
|
|
|
popTextProp :: WS ()
|
|
popTextProp = modify $ \s -> s{ stTextProperties = drop 1 $ stTextProperties s }
|
|
|
|
withTextProp :: Element -> WS a -> WS a
|
|
withTextProp d p = do
|
|
pushTextProp d
|
|
res <- p
|
|
popTextProp
|
|
return res
|
|
|
|
getParaProps :: Bool -> WS [Element]
|
|
getParaProps displayMathPara = do
|
|
props <- gets stParaProperties
|
|
listLevel <- gets stListLevel
|
|
numid <- gets stListNumId
|
|
let listPr = if listLevel >= 0 && not displayMathPara
|
|
then [ mknode "w:numPr" []
|
|
[ mknode "w:numId" [("w:val",show numid)] ()
|
|
, mknode "w:ilvl" [("w:val",show listLevel)] () ]
|
|
]
|
|
else []
|
|
return $ case props ++ listPr of
|
|
[] -> []
|
|
ps -> [mknode "w:pPr" [] ps]
|
|
|
|
pushParaProp :: Element -> WS ()
|
|
pushParaProp d = modify $ \s -> s{ stParaProperties = d : stParaProperties s }
|
|
|
|
popParaProp :: WS ()
|
|
popParaProp = modify $ \s -> s{ stParaProperties = drop 1 $ stParaProperties s }
|
|
|
|
withParaProp :: Element -> WS a -> WS a
|
|
withParaProp d p = do
|
|
pushParaProp d
|
|
res <- p
|
|
popParaProp
|
|
return res
|
|
|
|
formattedString :: String -> WS [Element]
|
|
formattedString str = do
|
|
props <- getTextProps
|
|
return [ mknode "w:r" [] $
|
|
props ++
|
|
[ mknode "w:t" [("xml:space","preserve")] str ] ]
|
|
|
|
-- | Convert an inline element to OpenXML.
|
|
inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
|
|
inlineToOpenXML _ (Str str) = formattedString str
|
|
inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
|
|
inlineToOpenXML opts (Span (_,classes,_) ils) = do
|
|
let off x = withTextProp (mknode x [("w:val","0")] ())
|
|
((if "csl-no-emph" `elem` classes then off "w:i" else id) .
|
|
(if "csl-no-strong" `elem` classes then off "w:b" else id) .
|
|
(if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
|
|
$ inlinesToOpenXML opts ils
|
|
inlineToOpenXML opts (Strong lst) =
|
|
withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
|
|
inlineToOpenXML opts (Emph lst) =
|
|
withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
|
|
inlineToOpenXML opts (Subscript lst) =
|
|
withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ())
|
|
$ inlinesToOpenXML opts lst
|
|
inlineToOpenXML opts (Superscript lst) =
|
|
withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ())
|
|
$ inlinesToOpenXML opts lst
|
|
inlineToOpenXML opts (SmallCaps lst) =
|
|
withTextProp (mknode "w:smallCaps" [] ())
|
|
$ inlinesToOpenXML opts lst
|
|
inlineToOpenXML opts (Strikeout lst) =
|
|
withTextProp (mknode "w:strike" [] ())
|
|
$ inlinesToOpenXML opts lst
|
|
inlineToOpenXML _ LineBreak = return [br]
|
|
inlineToOpenXML _ (RawInline f str)
|
|
| f == Format "openxml" = return [ x | Elem x <- parseXML str ]
|
|
| otherwise = return []
|
|
inlineToOpenXML opts (Quoted quoteType lst) =
|
|
inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
|
|
where (open, close) = case quoteType of
|
|
SingleQuote -> ("\x2018", "\x2019")
|
|
DoubleQuote -> ("\x201C", "\x201D")
|
|
inlineToOpenXML opts (Math mathType str) = do
|
|
let displayType = if mathType == DisplayMath
|
|
then DisplayBlock
|
|
else DisplayInline
|
|
case texMathToOMML displayType str of
|
|
Right r -> return [r]
|
|
Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str)
|
|
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
|
|
inlineToOpenXML opts (Code attrs str) =
|
|
withTextProp (rStyle "VerbatimChar")
|
|
$ if writerHighlight opts
|
|
then case highlight formatOpenXML attrs str of
|
|
Nothing -> unhighlighted
|
|
Just h -> return h
|
|
else unhighlighted
|
|
where unhighlighted = intercalate [br] `fmap`
|
|
(mapM formattedString $ lines str)
|
|
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
|
|
toHlTok (toktype,tok) = mknode "w:r" []
|
|
[ mknode "w:rPr" []
|
|
[ rStyle $ show toktype ]
|
|
, mknode "w:t" [("xml:space","preserve")] tok ]
|
|
inlineToOpenXML opts (Note bs) = do
|
|
notes <- gets stFootnotes
|
|
notenum <- getUniqueId
|
|
let notemarker = mknode "w:r" []
|
|
[ mknode "w:rPr" [] (rStyle "FootnoteRef")
|
|
, mknode "w:footnoteRef" [] () ]
|
|
let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
|
|
let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
|
|
insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs
|
|
insertNoteRef xs = Para [notemarkerXml] : xs
|
|
oldListLevel <- gets stListLevel
|
|
oldParaProperties <- gets stParaProperties
|
|
oldTextProperties <- gets stTextProperties
|
|
modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
|
|
contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
|
|
$ insertNoteRef bs
|
|
modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
|
|
stTextProperties = oldTextProperties }
|
|
let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
|
|
modify $ \s -> s{ stFootnotes = newnote : notes }
|
|
return [ mknode "w:r" []
|
|
[ mknode "w:rPr" [] (rStyle "FootnoteRef")
|
|
, mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
|
|
-- internal link:
|
|
inlineToOpenXML opts (Link txt ('#':xs,_)) = do
|
|
contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
|
|
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
|
|
-- external link:
|
|
inlineToOpenXML opts (Link txt (src,_)) = do
|
|
contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
|
|
extlinks <- gets stExternalLinks
|
|
id' <- case M.lookup src extlinks of
|
|
Just i -> return i
|
|
Nothing -> do
|
|
i <- ("rId"++) `fmap` getUniqueId
|
|
modify $ \st -> st{ stExternalLinks =
|
|
M.insert src i extlinks }
|
|
return i
|
|
return [ mknode "w:hyperlink" [("r:id",id')] contents ]
|
|
inlineToOpenXML opts (Image alt (src, tit)) = do
|
|
-- first, check to see if we've already done this image
|
|
imgs <- gets stImages
|
|
case M.lookup src imgs of
|
|
Just (_,_,_,elt,_) -> return [elt]
|
|
Nothing -> do
|
|
res <- liftIO $ fetchItem (writerSourceURL opts) src
|
|
case res of
|
|
Left (_ :: E.SomeException) -> do
|
|
liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
|
|
-- emit alt text
|
|
inlinesToOpenXML opts alt
|
|
Right (img, mt) -> do
|
|
ident <- ("rId"++) `fmap` getUniqueId
|
|
let size = imageSize img
|
|
let (xpt,ypt) = maybe (120,120) sizeInPoints size
|
|
-- 12700 emu = 1 pt
|
|
let (xemu,yemu) = (xpt * 12700, ypt * 12700)
|
|
let cNvPicPr = mknode "pic:cNvPicPr" [] $
|
|
mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
|
|
let nvPicPr = mknode "pic:nvPicPr" []
|
|
[ mknode "pic:cNvPr"
|
|
[("descr",src),("id","0"),("name","Picture")] ()
|
|
, cNvPicPr ]
|
|
let blipFill = mknode "pic:blipFill" []
|
|
[ mknode "a:blip" [("r:embed",ident)] ()
|
|
, mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
|
|
let xfrm = mknode "a:xfrm" []
|
|
[ mknode "a:off" [("x","0"),("y","0")] ()
|
|
, mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
|
|
let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
|
|
mknode "a:avLst" [] ()
|
|
let ln = mknode "a:ln" [("w","9525")]
|
|
[ mknode "a:noFill" [] ()
|
|
, mknode "a:headEnd" [] ()
|
|
, mknode "a:tailEnd" [] () ]
|
|
let spPr = mknode "pic:spPr" [("bwMode","auto")]
|
|
[xfrm, prstGeom, mknode "a:noFill" [] (), ln]
|
|
let graphic = mknode "a:graphic" [] $
|
|
mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
|
|
[ mknode "pic:pic" []
|
|
[ nvPicPr
|
|
, blipFill
|
|
, spPr ] ]
|
|
let imgElt = mknode "w:r" [] $
|
|
mknode "w:drawing" [] $
|
|
mknode "wp:inline" []
|
|
[ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
|
|
, mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
|
|
, mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
|
|
, graphic ]
|
|
let imgext = case mt >>= extensionFromMimeType of
|
|
Just x -> '.':x
|
|
Nothing -> case imageType img of
|
|
Just Png -> ".png"
|
|
Just Jpeg -> ".jpeg"
|
|
Just Gif -> ".gif"
|
|
Just Pdf -> ".pdf"
|
|
Just Eps -> ".eps"
|
|
Nothing -> ""
|
|
if null imgext
|
|
then -- without an extension there is no rule for content type
|
|
inlinesToOpenXML opts alt -- return alt to avoid corrupted docx
|
|
else do
|
|
let imgpath = "media/" ++ ident ++ imgext
|
|
let mbMimeType = mt <|> getMimeType imgpath
|
|
-- insert mime type to use in constructing [Content_Types].xml
|
|
modify $ \st -> st{ stImages =
|
|
M.insert src (ident, imgpath, mbMimeType, imgElt, img)
|
|
$ stImages st }
|
|
return [imgElt]
|
|
|
|
br :: Element
|
|
br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
|
|
|
|
parseXml :: Archive -> String -> IO Element
|
|
parseXml refArchive relpath =
|
|
case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of
|
|
Just d -> return d
|
|
Nothing -> fail $ relpath ++ " missing in reference docx"
|
|
|
|
isDisplayMath :: Inline -> Bool
|
|
isDisplayMath (Math DisplayMath _) = True
|
|
isDisplayMath _ = False
|
|
|
|
stripLeadingTrailingSpace :: [Inline] -> [Inline]
|
|
stripLeadingTrailingSpace = go . reverse . go . reverse
|
|
where go (Space:xs) = xs
|
|
go xs = xs
|
|
|
|
fixDisplayMath :: Block -> Block
|
|
fixDisplayMath (Plain lst)
|
|
| any isDisplayMath lst && not (all isDisplayMath lst) =
|
|
-- chop into several paragraphs so each displaymath is its own
|
|
Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
|
|
groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
|
|
not (isDisplayMath x || isDisplayMath y)) lst
|
|
fixDisplayMath (Para lst)
|
|
| any isDisplayMath lst && not (all isDisplayMath lst) =
|
|
-- chop into several paragraphs so each displaymath is its own
|
|
Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
|
|
groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
|
|
not (isDisplayMath x || isDisplayMath y)) lst
|
|
fixDisplayMath x = x
|