Made a proper ODT writer.

+ Transformed the old Text.Pandoc.ODT module into a proper
  writer module, Text.Pandoc.Writers.ODT.
+ Instead of saveOpenDocumentAsODT, we now have writeODT, which
  takes a Pandoc document and produces a bytestring.
  saveOpenDocumentAsODT has been removed.
+ To extract the images and insert them into the ODT, we now use
  processPandocM on the Pandoc document rather than a custom XML parser.
+ Handle the case where the image is remote (or not found) by
  converting the Image element into an Emph with the label.
+ Plumbing in pandoc.hs changed slightly to accomodate this, and to
  allow other writers that live in the IO monad.
This commit is contained in:
John MacFarlane 2010-07-02 20:12:14 -07:00
parent ee51124412
commit ff4d94e054
5 changed files with 102 additions and 117 deletions

View file

@ -164,7 +164,6 @@ Library
Text.Pandoc.Definition,
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.ODT,
Text.Pandoc.Highlighting,
Text.Pandoc.Readers.HTML,
Text.Pandoc.Readers.LaTeX,
@ -183,6 +182,7 @@ Library
Text.Pandoc.Writers.MediaWiki,
Text.Pandoc.Writers.RTF,
Text.Pandoc.Writers.S5,
Text.Pandoc.Writers.ODT,
Text.Pandoc.Templates
Other-Modules: Text.Pandoc.XML,
Text.Pandoc.UTF8,

View file

@ -86,6 +86,7 @@ module Text.Pandoc
, writeMan
, writeMediaWiki
, writeRTF
, writeODT
, prettyPandoc
-- * Writer options used in writers
, WriterOptions (..)
@ -109,6 +110,7 @@ import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.S5
import Text.Pandoc.Writers.ODT
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.OpenDocument
import Text.Pandoc.Writers.Man

View file

@ -1,101 +0,0 @@
{-
Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.ODT
Copyright : Copyright (C) 2008-2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Functions for producing an ODT file from OpenDocument XML.
-}
module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
import Data.List ( find )
import System.FilePath ( (</>), takeFileName )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
import Control.Applicative ( (<$>) )
import Text.ParserCombinators.Parsec
import System.Time
import Paths_pandoc ( getDataFileName )
import System.Directory
import Control.Monad (liftM)
-- | Produce an ODT file from OpenDocument XML.
saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory
-> FilePath -- ^ Pathname of ODT file to be produced
-> FilePath -- ^ Relative directory of source file
-> Maybe FilePath -- ^ Path specified by --reference-odt
-> String -- ^ OpenDocument XML contents
-> IO ()
saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do
refArchive <- liftM toArchive $
case mbRefOdt of
Just f -> B.readFile f
Nothing -> do
let defaultODT = getDataFileName "reference.odt" >>= B.readFile
case datadir of
Nothing -> defaultODT
Just d -> do
exists <- doesFileExist (d </> "reference.odt")
if exists
then B.readFile (d </> "reference.odt")
else defaultODT
-- handle pictures
let (newContents, pics) =
case runParser pPictures [] "OpenDocument XML contents" xml of
Left err -> error $ show err
Right x -> x
picEntries <- mapM (makePictureEntry sourceDirRelative) pics
(TOD epochTime _) <- getClockTime
let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
B.writeFile destinationODTPath $ fromArchive archive
makePictureEntry :: FilePath -- ^ Relative directory of source file
-> (FilePath, String) -- ^ Path and new path of picture
-> IO Entry
makePictureEntry sourceDirRelative (path, newPath) = do
entry <- readEntry [] $ sourceDirRelative </> path
return (entry { eRelativePath = newPath })
pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
pPictures = do
contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
pics <- getState
return (contents, pics)
pPicture :: GenParser Char [(FilePath, String)] [Char]
pPicture = try $ do
string "<draw:image xlink:href=\""
path <- manyTill anyChar (char '"')
let filename = takeFileName path
pics <- getState
newPath <- case find (\(o, _) -> o == path) pics of
Just (_, new) -> return new
Nothing -> do
-- get a unique name
let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics
let new = "Pictures/" ++ replicate dups '0' ++ filename
updateState ((path, new) :)
return new
return $ "<draw:image xlink:href=\"" ++ newPath ++ "\""

View file

@ -0,0 +1,83 @@
{-
Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.ODT
Copyright : Copyright (C) 2008-2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to ODT.
-}
module Text.Pandoc.Writers.ODT ( writeODT ) where
import Data.IORef
import System.FilePath ( (</>), takeExtension )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
import System.Time
import Paths_pandoc ( getDataFileName )
import Text.Pandoc.Shared ( WriterOptions )
import Text.Pandoc.Definition
import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
import System.Directory
import Control.Monad (liftM)
-- | Produce an ODT file from a Pandoc document.
writeODT :: Maybe FilePath -- ^ Path of user data directory
-> FilePath -- ^ Relative directory of source file
-> Maybe FilePath -- ^ Path specified by --reference-odt
-> WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
writeODT datadir sourceDirRelative mbRefOdt opts doc = do
refArchive <- liftM toArchive $
case mbRefOdt of
Just f -> B.readFile f
Nothing -> do
let defaultODT = getDataFileName "reference.odt" >>= B.readFile
case datadir of
Nothing -> defaultODT
Just d -> do
exists <- doesFileExist (d </> "reference.odt")
if exists
then B.readFile (d </> "reference.odt")
else defaultODT
-- handle pictures
picEntriesRef <- newIORef ([] :: [Entry])
doc' <- processWithM (transformPic sourceDirRelative picEntriesRef) doc
let newContents = writeOpenDocument opts doc'
(TOD epochtime _) <- getClockTime
let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
picEntries <- readIORef picEntriesRef
let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
return $ fromArchive archive
transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
transformPic sourceDirRelative entriesRef (Image lab (src,tit)) = do
entries <- readIORef entriesRef
let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
catch (readEntry [] (sourceDirRelative </> src) >>= \entry ->
modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
return (Image lab (newsrc, tit)))
(\_ -> return (Emph lab))
transformPic _ _ x = return x

View file

@ -30,7 +30,6 @@ writers.
-}
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
import Text.Pandoc.Writers.S5 (s5HeaderIncludes)
import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile )
#ifdef _HIGHLIGHTING
@ -53,7 +52,8 @@ import Text.Pandoc.Biblio
import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI)
import Data.ByteString.Lazy.UTF8 (toString)
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString, fromString)
import Codec.Binary.UTF8.String (decodeString)
copyrightMessage :: String
@ -108,7 +108,7 @@ writers = [("native" , writeDoc)
,("s5" , writeS5String)
,("docbook" , writeDocbook)
,("opendocument" , writeOpenDocument)
,("odt" , writeOpenDocument)
,("odt" , \_ _ -> "")
,("latex" , writeLaTeX)
,("latex+lhs" , writeLaTeX)
,("context" , writeConTeXt)
@ -658,6 +658,10 @@ main = do
Nothing -> return ()
let sources = if ignoreArgs then [] else args
let sourceDirRelative = if null sources
then ""
else takeDirectory (head sources)
datadir <- case mbDataDir of
Nothing -> catch
@ -682,8 +686,11 @@ main = do
Nothing -> error ("Unknown reader: " ++ readerName')
writer <- case (lookup writerName' writers) of
Just r -> return r
Nothing -> error ("Unknown writer: " ++ writerName')
Just _ | writerName' == "odt" -> return
(writeODT datadir sourceDirRelative referenceODT)
Just r -> return $ \o d ->
return $ fromString (r o d)
Nothing -> error ("Unknown writer: " ++ writerName')
templ <- getDefaultTemplate datadir writerName'
let defaultTemplate = case templ of
@ -762,10 +769,6 @@ main = do
"Specify an output file using the -o option.")
exitWith $ ExitFailure 5
let sourceDirRelative = if null sources
then ""
else takeDirectory (head sources)
let readSources [] = mapM readSource ["-"]
readSources srcs = mapM readSource srcs
readSource "-" = UTF8.getContents
@ -788,10 +791,8 @@ main = do
return doc'
#endif
let writerOutput = writer writerOptions doc'' ++ "\n"
writerOutput <- writer writerOptions doc''
case writerName' of
"odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput
_ -> if outputFile == "-"
then UTF8.putStr writerOutput
else UTF8.writeFile outputFile writerOutput
if outputFile == "-"
then B.putStrLn writerOutput
else B.writeFile outputFile writerOutput