Added Docx writer.

* New module `Text.Pandoc.Docx`.
* New output format `docx`.
* Added reference.docx.
* New option `--reference-docx`.

The writer includes support for highlighted code blocks
and math (which is converted from TeX to OMML using
texmath's new OMML module).
This commit is contained in:
John MacFarlane 2012-01-03 12:10:10 -08:00
parent 9ce3e2bf85
commit ba81cda7f1
8 changed files with 669 additions and 21 deletions

View file

@ -18,8 +18,8 @@ Description: Pandoc is a Haskell library for converting from one markup
this library. It can read markdown and (subsets of)
reStructuredText, HTML, LaTeX and Textile, and it can write
markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook,
OpenDocument, ODT, RTF, MediaWiki, Textile, groff man pages,
plain text, Emacs Org-Mode, AsciiDoc, EPUB,
OpenDocument, ODT, Word docx, RTF, MediaWiki, Textile,
groff man pages, plain text, Emacs Org-Mode, AsciiDoc, EPUB,
and S5 and Slidy HTML slide shows.
.
Pandoc extends standard markdown syntax with footnotes,
@ -51,6 +51,8 @@ Data-Files:
templates/epub-coverimage.html,
-- data for ODT writer
reference.odt,
-- data for docx writer
reference.docx,
-- stylesheet for EPUB writer
epub.css,
-- data for LaTeXMathML writer
@ -215,7 +217,7 @@ Library
utf8-string >= 0.3 && < 0.4,
old-time >= 1 && < 1.2,
HTTP >= 4000.0.5 && < 4000.3,
texmath >= 0.5 && < 0.6,
texmath >= 0.6 && < 0.7,
xml >= 1.3.5 && < 1.4,
random >= 1 && < 1.1,
extensible-exceptions >= 0.1 && < 0.2,
@ -269,6 +271,7 @@ Library
Text.Pandoc.Writers.MediaWiki,
Text.Pandoc.Writers.RTF,
Text.Pandoc.Writers.ODT,
Text.Pandoc.Writers.Docx,
Text.Pandoc.Writers.EPUB,
Text.Pandoc.Templates,
Text.Pandoc.Biblio,
@ -302,7 +305,7 @@ Executable pandoc
utf8-string >= 0.3 && < 0.4,
old-time >= 1 && < 1.2,
HTTP >= 4000.0.5 && < 4000.3,
texmath >= 0.5 && < 0.6,
texmath >= 0.6 && < 0.7,
xml >= 1.3.5 && < 1.4,
random >= 1 && < 1.1,
extensible-exceptions >= 0.1 && < 0.2,

BIN
reference.docx Normal file

Binary file not shown.

View file

@ -94,6 +94,7 @@ module Text.Pandoc
, writeTextile
, writeRTF
, writeODT
, writeDocx
, writeEPUB
, writeOrg
, writeAsciiDoc
@ -128,6 +129,7 @@ import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.ODT
import Text.Pandoc.Writers.Docx
import Text.Pandoc.Writers.EPUB
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.OpenDocument
@ -166,7 +168,7 @@ readers = [("native" , \_ -> readNative)
]
-- | Association list of formats and writers (omitting the
-- binary writers, odt and epub).
-- binary writers, odt, docx, and epub).
writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
writers = [("native" , writeNative)
,("json" , \_ -> encodeJSON)

View file

@ -69,6 +69,9 @@ expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
medspace = Str "\x2005"
widespace = Str "\x2004"
expToInlines (EStretchy x) = expToInlines x
expToInlines (EDelimited start end xs) = do
xs' <- mapM expToInlines xs
return $ [Str start] ++ concat xs' ++ [Str end]
expToInlines (EGrouped xs) = expsToInlines xs
expToInlines (ESpace "0.167em") = Just [Str "\x2009"]
expToInlines (ESpace "0.222em") = Just [Str "\x2005"]
@ -94,10 +97,10 @@ expToInlines (ESubsup x y z) = do
expToInlines (EDown x y) = expToInlines (ESub x y)
expToInlines (EUp x y) = expToInlines (ESuper x y)
expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
expToInlines (EText "normal" x) = Just [Str x]
expToInlines (EText "bold" x) = Just [Strong [Str x]]
expToInlines (EText "monospace" x) = Just [Code nullAttr x]
expToInlines (EText "italic" x) = Just [Emph [Str x]]
expToInlines (EText TextNormal x) = Just [Str x]
expToInlines (EText TextBold x) = Just [Strong [Str x]]
expToInlines (EText TextMonospace x) = Just [Code nullAttr x]
expToInlines (EText TextItalic x) = Just [Emph [Str x]]
expToInlines (EText _ x) = Just [Str x]
expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) =
case accent of

View file

@ -84,6 +84,7 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
getDefaultTemplate _ "native" = return $ Right ""
getDefaultTemplate _ "json" = return $ Right ""
getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument"
getDefaultTemplate user "docx" = getDefaultTemplate user "openxml"
getDefaultTemplate user "epub" = getDefaultTemplate user "html"
getDefaultTemplate user "beamer" = getDefaultTemplate user "latex"
getDefaultTemplate user writer = do

View file

@ -0,0 +1,626 @@
{-
Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.Docx
Copyright : Copyright (C) 2012 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
import Data.List ( intercalate, elemIndex )
import System.FilePath ( (</>) )
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Data.ByteString.Lazy.UTF8 ( fromString, toString )
import Codec.Archive.Zip
import System.Time
import Paths_pandoc ( getDataFileName )
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import System.Directory
import Text.Pandoc.ImageSize
import Text.Pandoc.Shared hiding (Element)
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlight )
import Text.Highlighting.Kate.Types ()
import Text.XML.Light
import Text.TeXMath
import Control.Monad.State
import Text.Highlighting.Kate
data WriterState = WriterState{
stTextProperties :: [Element]
, stParaProperties :: [Element]
, stFootnotes :: [Element]
, stSectionIds :: [String]
, stExternalLinks :: M.Map String String
, stImages :: M.Map FilePath (String, B.ByteString)
, stListLevel :: Int
, stListMarker :: ListMarker
, stMarkersUsed :: [ListMarker]
}
data ListMarker = NoMarker
| BulletMarker
| NumberMarker ListNumberStyle ListNumberDelim Int
deriving (Show, Read, Eq, Ord)
defaultWriterState :: WriterState
defaultWriterState = WriterState{
stTextProperties = []
, stParaProperties = []
, stFootnotes = []
, stSectionIds = []
, stExternalLinks = M.empty
, stImages = M.empty
, stListLevel = 0 -- not in a list
, stListMarker = NoMarker
, stMarkersUsed = [NoMarker]
}
type WS a = StateT WriterState IO a
showTopElement' :: Element -> String
showTopElement' x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++ showElement x
mknode :: Node t => String -> [(String,String)] -> t -> Element
mknode s attrs =
add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
-- | Produce an Docx file from a Pandoc document.
writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx
-> WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths _) _) = do
let datadir = writerUserDataDir opts
refArchive <- liftM toArchive $
case mbRefDocx of
Just f -> B.readFile f
Nothing -> do
let defaultDocx = getDataFileName "reference.docx" >>= B.readFile
case datadir of
Nothing -> defaultDocx
Just d -> do
exists <- doesFileExist (d </> "reference.docx")
if exists
then B.readFile (d </> "reference.docx")
else defaultDocx
(newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc)
defaultWriterState
(TOD epochtime _) <- getClockTime
let imgs = M.elems $ stImages st
let imgPath ident img = "media/" ++ ident ++
case imageType img of
Just Png -> ".png"
Just Jpeg -> ".jpeg"
Just Gif -> ".gif"
Nothing -> ""
let toImgRel (ident,img) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",imgPath ident img)] ()
let newrels = map toImgRel imgs
let relpath = "word/_rels/document.xml.rels"
let reldoc = case findEntryByPath relpath refArchive >>=
parseXMLDoc . toString . fromEntry of
Just d -> d
Nothing -> error $ relpath ++ "missing in reference docx"
let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels }
-- create entries for images
let toImageEntry (ident,img) = toEntry ("word/" ++ imgPath ident img)
epochtime img
let imageEntries = map toImageEntry imgs
-- NOW get list of external links and images from this, and do what's needed
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
let newrels' = map toLinkRel $ M.toList $ stExternalLinks st
let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' }
let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc''
let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents
-- styles
let newstyles = styleToOpenXml $ writerHighlightStyle opts
let stylepath = "word/styles.xml"
let styledoc = case findEntryByPath stylepath refArchive >>=
parseXMLDoc . toString . fromEntry of
Just d -> d
Nothing -> error $ stylepath ++ "missing in reference docx"
let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc'
-- construct word/numbering.xml
let markersUsed = stMarkersUsed st
let numpath = "word/numbering.xml"
let numEntry = toEntry numpath epochtime $ fromString $ showTopElement' $ mkNumbering markersUsed
-- TODO add metadata, etc.
let docPropsPath = "docProps/core.xml"
let docProps = mknode "cp:coreProperties"
[("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
,("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:dcterms","http://purl.org/dc/terms/")
,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
$ mknode "dc:title" [] (stringify tit)
: mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] () -- put doc date here
: mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
: map (mknode "dc:creator" [] . stringify) auths
let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps
let archive = foldr addEntryToArchive refArchive $
contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries
return $ fromArchive archive
styleToOpenXml :: Style -> [Element]
styleToOpenXml style = parStyle : map toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok
toStyle toktype = mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)]
[ mknode "w:name" [("w:val",show toktype)] ()
, mknode "w:basedOn" [("w:val","VerbatimChar")] ()
, mknode "w:rPr" [] $
[ mknode "w:color" [("w:val",tokCol toktype)] ()
| tokCol toktype /= "auto" ] ++
[ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
| tokBg toktype /= "auto" ] ++
[ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
[ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
[ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
]
tokStyles = tokenStyles style
tokFeature f toktype = maybe False f $ lookup toktype tokStyles
tokCol toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenColor =<< lookup toktype tokStyles)
`mplus` defaultColor style
tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< lookup toktype tokStyles)
`mplus` backgroundColor style
parStyle = mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")]
[ mknode "w:name" [("w:val","Source Code")] ()
, mknode "w:basedOn" [("w:val","Normal")] ()
, mknode "w:link" [("w:val","VerbatimChar")] ()
, mknode "w:pPr" []
$ mknode "w:wordWrap" [("w:val","off")] ()
: ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
$ backgroundColor style )
]
mkNumbering :: [ListMarker] -> Element
mkNumbering markers =
mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
$ zipWith mkAbstractNum nums markers
++ map mkNum nums
where nums = [1..(length markers)]
mkNum :: Int -> Element
mkNum numid =
mknode "w:num" [("w:numId",show numid)]
$ mknode "w:abstractNumId" [("w:val",show numid)] ()
mkAbstractNum :: Int -> ListMarker -> Element
mkAbstractNum numid marker =
mknode "w:abstractNum" [("w:abstractNumId",show numid)]
$ mknode "w:multiLevelType" [("w:val","multilevel")] ()
: map (mkLvl marker) [0..6]
mkLvl :: ListMarker -> Int -> Element
mkLvl marker lvl =
mknode "w:lvl" [("w:ilvl",show lvl)] $
[ mknode "w:start" [("w:val",start)] ()
| marker /= NoMarker && marker /= BulletMarker ] ++
[ mknode "w:numFmt" [("w:val",fmt)] ()
, mknode "w:lvlText" [("w:val",lvltxt)] ()
, mknode "w:lvlJc" [("w:val","left")] ()
, mknode "w:pPr" []
[ mknode "w:tabs" []
$ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] ()
, mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] ()
]
]
where (fmt, lvltxt, start) =
case marker of
NoMarker -> ("bullet"," ","1")
BulletMarker -> ("bullet",bulletFor lvl,"1")
NumberMarker st de n -> (styleFor st lvl
,patternFor de ("%" ++ show (lvl + 1))
,show n)
step = 720
hang = step `div` 2
bulletFor 1 = "\8226"
bulletFor 2 = "\9702"
bulletFor 3 = "\8227"
bulletFor 4 = "\8259"
bulletFor 5 = "\8226"
bulletFor _ = "\9702"
styleFor UpperAlpha _ = "upperLetter"
styleFor LowerAlpha _ = "lowerLetter"
styleFor UpperRoman _ = "upperRoman"
styleFor LowerRoman _ = "lowerRoman"
styleFor Decimal _ = "decimal"
styleFor DefaultStyle 1 = "decimal"
styleFor DefaultStyle 2 = "lowerLetter"
styleFor DefaultStyle 3 = "lowerRoman"
styleFor DefaultStyle 4 = "decimal"
styleFor DefaultStyle 5 = "lowerLetter"
styleFor DefaultStyle 6 = "lowerRoman"
styleFor _ _ = "decimal"
patternFor OneParen s = s ++ ")"
patternFor TwoParens s = "(" ++ s ++ ")"
patternFor _ s = s ++ "."
-- | Convert Pandoc document to string in OpenXML format.
writeOpenXML :: WriterOptions -> Pandoc -> WS Element
writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do
title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
[Para (intercalate [LineBreak] auths) | not (null auths)]
date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
let blocks' = bottomUp convertSpace $ blocks
doc <- blocksToOpenXML opts blocks'
notes' <- reverse `fmap` gets stFootnotes
let notes = case notes' of
[] -> []
ns -> [mknode "w:footnotes" [] ns]
let meta = title ++ authors ++ date
return $ mknode "w:document"
[("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
,("xmlns:o","urn:schemas-microsoft-com:office:office")
,("xmlns:v","urn:schemas-microsoft-com:vml")
,("xmlns:w10","urn:schemas-microsoft-com:office:word")
,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
$ mknode "w:body" [] (meta ++ doc ++ notes)
-- | Convert a list of Pandoc blocks to OpenXML.
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
pStyle :: String -> Element
pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
rStyle :: String -> Element
rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
-- | Convert a Pandoc block element to OpenXML.
blockToOpenXML :: WriterOptions -> Block -> WS [Element]
blockToOpenXML _ Null = return []
blockToOpenXML opts (Header lev lst) = do
contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
blockToOpenXML opts (Para lst)
usedIdents <- gets stSectionIds
let ident = uniqueIdent lst usedIdents
modify $ \s -> s{ stSectionIds = ident : stSectionIds s }
let bookmarkStart = mknode "w:bookmarkStart" [("w:id",ident)
,("w:name",ident)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id",ident)] ()
return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
blockToOpenXML opts (Plain lst) = blockToOpenXML opts (Para lst)
blockToOpenXML opts (Para x@[Image alt _]) = do
paraProps <- getParaProps
contents <- inlinesToOpenXML opts x
captionNode <- withParaProp (pStyle "ImageCaption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
blockToOpenXML opts (Para lst) = do
paraProps <- getParaProps
contents <- inlinesToOpenXML opts lst
return [mknode "w:p" [] (paraProps ++ contents)]
blockToOpenXML _ (RawBlock format str)
| format == "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
blockToOpenXML opts (BlockQuote blocks) =
withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
blockToOpenXML opts (CodeBlock attrs str) =
withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str]
blockToOpenXML _ HorizontalRule = return [
mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
$ mknode "v:rect" [("style","width:0;height:1.5pt"),
("o:hralign","center"),
("o:hrstd","t"),("o:hr","t")] () ]
blockToOpenXML opts (Table caption aligns widths headers rows) = do
let captionStr = stringify caption
caption' <- if null caption
then return []
else withParaProp (pStyle "TableCaption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
$ blocksToOpenXML opts cell
headers' <- mapM cellToOpenXML $ zip aligns headers
rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells)
$ rows
let borderProps = mknode "w:tcPr" []
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
then [mknode "w:p" [] ()]
else contents
let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let mkgridcol w = mknode "w:gridCol"
[("w:w", show $ (floor (textwidth * w) :: Integer))] ()
return $
[ mknode "w:tbl" []
( mknode "w:tblPr" []
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ]
: mknode "w:tblGrid" []
(if all (==0) widths
then []
else map mkgridcol widths)
: [ mkrow True headers' | not (all null headers) ] ++
map (mkrow False) rows'
)
] ++ caption'
blockToOpenXML opts (BulletList lst) = do
let marker = BulletMarker
asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do
let marker = NumberMarker numstyle numdelim start
asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
blockToOpenXML opts (DefinitionList items) =
concat `fmap` mapM (definitionListItemToOpenXML opts) items
definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
definitionListItemToOpenXML opts (term,defs) = do
term' <- withParaProp (pStyle "DefinitionTerm")
$ blockToOpenXML opts (Para term)
defs' <- withParaProp (pStyle "Definition")
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
getNumId :: WS Int
getNumId = do
marker <- gets stListMarker
markersUsed <- gets stMarkersUsed
case elemIndex marker markersUsed of
Just x -> return $ x + 1
Nothing -> do
modify $ \st -> st{ stMarkersUsed = markersUsed ++ [marker] }
return $ length markersUsed + 1
listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element]
listItemToOpenXML _ _ [] = return []
listItemToOpenXML opts marker (first:rest) = do
first' <- withMarker marker $ blockToOpenXML opts first
rest' <- withMarker NoMarker $ blocksToOpenXML opts rest
return $ first' ++ rest'
alignmentToString :: Alignment -> [Char]
alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
AlignDefault -> "left"
-- | Convert a list of inline elements to OpenXML.
inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element]
inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
withMarker :: ListMarker -> WS a -> WS a
withMarker m p = do
origMarker <- gets stListMarker
modify $ \st -> st{ stListMarker = m }
result <- p
modify $ \st -> st{ stListMarker = origMarker }
return result
asList :: WS a -> WS a
asList p = do
origListLevel <- gets stListLevel
modify $ \st -> st{ stListLevel = stListLevel st + 1 }
result <- p
modify $ \st -> st{ stListLevel = origListLevel }
return result
getTextProps :: WS [Element]
getTextProps = do
props <- gets stTextProperties
return $ if null props
then []
else [mknode "w:rPr" [] $ props]
pushTextProp :: Element -> WS ()
pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s }
popTextProp :: WS ()
popTextProp = modify $ \s -> s{ stTextProperties = drop 1 $ stTextProperties s }
withTextProp :: Element -> WS a -> WS a
withTextProp d p = do
pushTextProp d
res <- p
popTextProp
return res
getParaProps :: WS [Element]
getParaProps = do
props <- gets stParaProperties
listLevel <- gets stListLevel
numid <- getNumId
let listPr = if listLevel >= 1
then [ mknode "w:numPr" []
[ mknode "w:numId" [("w:val",show numid)] ()
, mknode "w:ilvl" [("w:val",show listLevel)] () ]
]
else []
return $ case props ++ listPr of
[] -> []
ps -> [mknode "w:pPr" [] ps]
pushParaProp :: Element -> WS ()
pushParaProp d = modify $ \s -> s{ stParaProperties = d : stParaProperties s }
popParaProp :: WS ()
popParaProp = modify $ \s -> s{ stParaProperties = drop 1 $ stParaProperties s }
withParaProp :: Element -> WS a -> WS a
withParaProp d p = do
pushParaProp d
res <- p
popParaProp
return res
formattedString :: String -> WS [Element]
formattedString str = do
props <- getTextProps
return [ mknode "w:r" [] $
props ++
[ mknode "w:t" [("xml:space","preserve")] str ] ]
-- | Convert an inline element to OpenXML.
inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
inlineToOpenXML _ (Str str) = formattedString str
inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
inlineToOpenXML opts (Strong lst) =
withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
inlineToOpenXML opts (Emph lst) =
withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
inlineToOpenXML opts (Subscript lst) =
withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ())
$ inlinesToOpenXML opts lst
inlineToOpenXML opts (Superscript lst) =
withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ())
$ inlinesToOpenXML opts lst
inlineToOpenXML opts (SmallCaps lst) =
withTextProp (mknode "w:smallCaps" [] ())
$ inlinesToOpenXML opts lst
inlineToOpenXML opts (Strikeout lst) =
withTextProp (mknode "w:strike" [] ())
$ inlinesToOpenXML opts lst
inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ]
inlineToOpenXML _ (RawInline f str)
| f == "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
inlineToOpenXML opts (Quoted quoteType lst) =
inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
where (open, close) = case quoteType of
SingleQuote -> ("\x2018", "\x2019")
DoubleQuote -> ("\x201C", "\x201D")
inlineToOpenXML opts (Math t str) =
case texMathToOMML dt str of
Right r -> return [r]
Left _ -> inlinesToOpenXML opts (readTeXMath str)
where dt = if t == InlineMath
then DisplayInline
else DisplayBlock
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML _ (Code attrs str) =
withTextProp (rStyle "VerbatimChar")
$ case highlight formatOpenXML attrs str of
Nothing -> intercalate [mknode "w:br" [] ()]
`fmap` (mapM formattedString $ lines str)
Just h -> return h
where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] .
map (map toHlTok)
toHlTok (toktype,tok) = mknode "w:r" []
[ mknode "w:rPr" []
[ rStyle $ show toktype ]
, mknode "w:t" [("xml:space","preserve")] tok ]
inlineToOpenXML opts (Note bs) = do
notes <- gets stFootnotes
let notenum = length notes + 1
let notemarker = mknode "w:r" []
[ mknode "w:rPr" [] (rStyle "FootnoteReference")
, mknode "w:footnoteRef" [] () ]
let notemarkerXml = RawInline "openxml" $ ppElement notemarker
let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs
insertNoteRef xs = Para [notemarkerXml] : xs
contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
$ insertNoteRef bs
let newnote = mknode "w:footnote" [("w:id",show notenum)] $ contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
[ mknode "w:rPr" [] (rStyle "FootnoteReference")
, mknode "w:footnoteReference" [("w:id", show notenum)] () ] ]
-- internal link:
inlineToOpenXML opts (Link txt ('#':xs,_)) = do
contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
-- external link:
inlineToOpenXML opts (Link txt (src,_)) = do
contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt
extlinks <- gets stExternalLinks
ind <- case M.lookup src extlinks of
Just i -> return i
Nothing -> do
let i = "link" ++ show (M.size extlinks)
modify $ \st -> st{ stExternalLinks =
M.insert src i extlinks }
return i
return [ mknode "w:hyperlink" [("r:id",ind)] contents ]
inlineToOpenXML _ (Image _ (src, tit)) = do
imgs <- gets stImages
(ident,size) <- case M.lookup src imgs of
Just (i,img) -> return (i, imageSize img)
Nothing -> do
-- TODO check existence download etc.
img <- liftIO $ B.readFile src
let ident' = "image" ++ show (M.size imgs + 1)
let size' = imageSize img
modify $ \st -> st{
stImages = M.insert src (ident',img) $ stImages st }
return (ident',size')
let (xpt,ypt) = maybe (120,120) sizeInPoints size
-- 12700 emu = 1 pt
let (xemu,yemu) = (xpt * 12700, ypt * 12700)
let cNvPicPr = mknode "pic:cNvPicPr" [] $
mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
let nvPicPr = mknode "pic:nvPicPr" []
[ mknode "pic:cNvPr"
[("descr",src),("id","0"),("name","Picture")] ()
, cNvPicPr ]
let blipFill = mknode "pic:blipFill" []
[ mknode "a:blip" [("r:embed",ident)] ()
, mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
let xfrm = mknode "a:xfrm" []
[ mknode "a:off" [("x","0"),("y","0")] ()
, mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
mknode "a:avLst" [] ()
let ln = mknode "a:ln" [("w","9525")]
[ mknode "a:noFill" [] ()
, mknode "a:headEnd" [] ()
, mknode "a:tailEnd" [] () ]
let spPr = mknode "pic:spPr" [("bwMode","auto")]
[xfrm, prstGeom, mknode "a:noFill" [] (), ln]
let graphic = mknode "a:graphic" [] $
mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
[ mknode "pic:pic" []
[ nvPicPr
, blipFill
, spPr ] ]
return [ mknode "w:r" [] $
mknode "w:drawing" [] $
mknode "wp:inline" []
[ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
, mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
, mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
, graphic ] ]

View file

@ -81,8 +81,8 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs
else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs
isNonTextOutput :: String -> Bool
isNonTextOutput = (`elem` ["odt","epub"])
nonTextFormats :: [String]
nonTextFormats = ["odt","docx","epub"]
-- | Data structure for command line options.
data Opt = Opt
@ -110,6 +110,7 @@ data Opt = Opt
, optChapters :: Bool -- ^ Use chapter for top-level sects
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
, optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt
, optReferenceDocx :: Maybe FilePath -- ^ Path of reference.docx
, optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet
, optEPUBMetadata :: String -- ^ EPUB metadata
, optDumpArgs :: Bool -- ^ Output command-line arguments
@ -157,6 +158,7 @@ defaultOpts = Opt
, optChapters = False
, optHTMLMathMethod = PlainMath
, optReferenceODT = Nothing
, optReferenceDocx = Nothing
, optEPUBStylesheet = Nothing
, optEPUBMetadata = ""
, optDumpArgs = False
@ -530,6 +532,13 @@ options =
"FILENAME")
"" -- "Path of custom reference.odt"
, Option "" ["reference-docx"]
(ReqArg
(\arg opt -> do
return opt { optReferenceDocx = Just arg })
"FILENAME")
"" -- "Path of custom reference.docx"
, Option "" ["epub-stylesheet"]
(ReqArg
(\arg opt -> do
@ -644,7 +653,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
usageMessage programName = usageInfo
(programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
(wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++
(wrapWords 16 78 $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:")
(wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:")
-- Determine default reader based on source file extensions
defaultReaderName :: String -> [FilePath] -> String
@ -695,6 +704,7 @@ defaultWriterName x =
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
".docx" -> "docx"
".epub" -> "epub"
".org" -> "org"
".asciidoc" -> "asciidoc"
@ -750,6 +760,7 @@ main = do
, optChapters = chapters
, optHTMLMathMethod = mathMethod
, optReferenceODT = referenceODT
, optReferenceDocx = referenceDocx
, optEPUBStylesheet = epubStylesheet
, optEPUBMetadata = epubMetadata
, optDumpArgs = dumpArgs
@ -798,7 +809,7 @@ main = do
Just r -> return r
Nothing -> error ("Unknown reader: " ++ readerName')
let standalone' = standalone || isNonTextOutput writerName'
let standalone' = standalone || (`elem` nonTextFormats) writerName'
templ <- case templatePath of
_ | not standalone' -> return ""
@ -909,7 +920,7 @@ main = do
writerHighlight = highlight,
writerHighlightStyle = highlightStyle }
when (isNonTextOutput writerName' && outputFile == "-") $
when (writerName' `elem` nonTextFormats&& outputFile == "-") $
do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName' ++ " output to stdout.\n" ++
"Specify an output file using the -o option.")
exitWith $ ExitFailure 5
@ -955,9 +966,13 @@ main = do
Nothing | writerName' == "epub" ->
writeEPUB epubStylesheet writerOptions doc2
>>= B.writeFile (encodeString outputFile)
Nothing | writerName' == "odt" ->
| writerName' == "odt" ->
writeODT referenceODT writerOptions doc2
>>= B.writeFile (encodeString outputFile)
| writerName' == "docx" ->
writeDocx referenceDocx writerOptions doc2
>>= B.writeFile (encodeString outputFile)
| otherwise -> error $ "Unknown writer: " ++ writerName'
Just r -> writerFn outputFile =<< postProcess result
where writerFn "-" = UTF8.putStr
writerFn f = UTF8.writeFile f
@ -966,5 +981,3 @@ main = do
postProcess = if selfContained && writerName' `elem` htmlFormats
then makeSelfContained datadir
else return
Nothing -> error $ "Unknown writer: " ++ writerName'

View file

@ -15,13 +15,13 @@ Pandoc with citeproc-hs
- In a note.[^1]
- A citation group (see Doe 2005, chap. 3; also Doe and Roe 2007, 3435).
- A citation group (see Doe 2005, chap. 3; also Doe and Roe 2007, 34-35).
- Another one (see Doe 2005, 3435).
- Another one (see Doe 2005, 34-35).
- And another one in a note.[^2]
- Citation with a suffix and locator (Doe 2005, 33, 3537, and nowhere else).
- Citation with a suffix and locator (Doe 2005, 33, 35-37, and nowhere else).
- Citation with suffix only (Doe 2005 and nowhere else).
@ -34,7 +34,7 @@ References
Doe, John. 2005. *First Book*. Cambridge: Cambridge University Press.
———. 2006. “Article.” *Journal of Generic Studies* 6: 3334.
———. 2006. “Article.” *Journal of Generic Studies* 6: 33-34.
Doe, John, and Jenny Roe. 2007. “Why Water Is Wet.” In *Third Book*, ed. Sam Smith. Oxford: Oxford University Press.