From ff4d94e054ae4ff0fbe80920193b99eb325fd8df Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 2 Jul 2010 20:12:14 -0700
Subject: [PATCH] Made a proper ODT writer.

+ Transformed the old Text.Pandoc.ODT module into a proper
  writer module, Text.Pandoc.Writers.ODT.
+ Instead of saveOpenDocumentAsODT, we now have writeODT, which
  takes a Pandoc document and produces a bytestring.
  saveOpenDocumentAsODT has been removed.
+ To extract the images and insert them into the ODT, we now use
  processPandocM on the Pandoc document rather than a custom XML parser.
+ Handle the case where the image is remote (or not found) by
  converting the Image element into an Emph with the label.
+ Plumbing in pandoc.hs changed slightly to accomodate this, and to
  allow other writers that live in the IO monad.
---
 pandoc.cabal                   |   2 +-
 src/Text/Pandoc.hs             |   2 +
 src/Text/Pandoc/ODT.hs         | 101 ---------------------------------
 src/Text/Pandoc/Writers/ODT.hs |  83 +++++++++++++++++++++++++++
 src/pandoc.hs                  |  31 +++++-----
 5 files changed, 102 insertions(+), 117 deletions(-)
 delete mode 100644 src/Text/Pandoc/ODT.hs
 create mode 100644 src/Text/Pandoc/Writers/ODT.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index 1c7b15333..93283b346 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -164,7 +164,6 @@ Library
                    Text.Pandoc.Definition,
                    Text.Pandoc.CharacterReferences,
                    Text.Pandoc.Shared,
-                   Text.Pandoc.ODT,
                    Text.Pandoc.Highlighting,
                    Text.Pandoc.Readers.HTML,
                    Text.Pandoc.Readers.LaTeX,
@@ -183,6 +182,7 @@ Library
                    Text.Pandoc.Writers.MediaWiki,
                    Text.Pandoc.Writers.RTF,
                    Text.Pandoc.Writers.S5,
+                   Text.Pandoc.Writers.ODT,
                    Text.Pandoc.Templates
   Other-Modules:   Text.Pandoc.XML,
                    Text.Pandoc.UTF8,
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 9cad5fb34..8cbaaa109 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -86,6 +86,7 @@ module Text.Pandoc
                , writeMan
                , writeMediaWiki
                , writeRTF
+               , writeODT
                , prettyPandoc
                -- * Writer options used in writers 
                , WriterOptions (..)
@@ -109,6 +110,7 @@ import Text.Pandoc.Writers.ConTeXt
 import Text.Pandoc.Writers.Texinfo
 import Text.Pandoc.Writers.HTML
 import Text.Pandoc.Writers.S5
+import Text.Pandoc.Writers.ODT
 import Text.Pandoc.Writers.Docbook
 import Text.Pandoc.Writers.OpenDocument
 import Text.Pandoc.Writers.Man
diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs
deleted file mode 100644
index a69d9d4e4..000000000
--- a/src/Text/Pandoc/ODT.hs
+++ /dev/null
@@ -1,101 +0,0 @@
-{-
-Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.ODT
-   Copyright   : Copyright (C) 2008-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for producing an ODT file from OpenDocument XML.
--}
-module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Data.List ( find )
-import System.FilePath ( (</>), takeFileName )
-import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 ( fromString )
-import Codec.Archive.Zip
-import Control.Applicative ( (<$>) )
-import Text.ParserCombinators.Parsec
-import System.Time
-import Paths_pandoc ( getDataFileName )
-import System.Directory
-import Control.Monad (liftM)
-
--- | Produce an ODT file from OpenDocument XML.
-saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory
-                      -> FilePath       -- ^ Pathname of ODT file to be produced
-                      -> FilePath       -- ^ Relative directory of source file
-                      -> Maybe FilePath -- ^ Path specified by --reference-odt
-                      -> String         -- ^ OpenDocument XML contents
-                      -> IO ()
-saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do
-  refArchive <- liftM toArchive $
-       case mbRefOdt of
-             Just f -> B.readFile f
-             Nothing -> do
-               let defaultODT = getDataFileName "reference.odt" >>= B.readFile
-               case datadir of
-                     Nothing  -> defaultODT
-                     Just d   -> do
-                        exists <- doesFileExist (d </> "reference.odt")
-                        if exists
-                           then B.readFile (d </> "reference.odt")
-                           else defaultODT
-  -- handle pictures
-  let (newContents, pics) = 
-        case runParser pPictures [] "OpenDocument XML contents" xml of
-          Left err          -> error $ show err
-          Right x           -> x
-  picEntries <- mapM (makePictureEntry sourceDirRelative) pics 
-  (TOD epochTime _) <- getClockTime
-  let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
-  let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
-  B.writeFile destinationODTPath $ fromArchive archive
-
-makePictureEntry :: FilePath            -- ^ Relative directory of source file
-                 -> (FilePath, String)  -- ^ Path and new path of picture
-                 -> IO Entry
-makePictureEntry sourceDirRelative (path, newPath) = do
-  entry <- readEntry [] $ sourceDirRelative </> path
-  return (entry { eRelativePath = newPath })
-
-pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
-pPictures = do
-  contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
-  pics <- getState
-  return (contents, pics)
-
-pPicture :: GenParser Char [(FilePath, String)] [Char]
-pPicture = try $ do
-  string "<draw:image xlink:href=\""
-  path <- manyTill anyChar (char '"')
-  let filename =  takeFileName path
-  pics <- getState
-  newPath <- case find (\(o, _) -> o == path) pics of
-             Just (_, new) -> return new
-             Nothing -> do 
-                        -- get a unique name
-                        let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics 
-                        let new = "Pictures/" ++ replicate dups '0' ++ filename
-                        updateState ((path, new) :)
-                        return new
-  return $ "<draw:image xlink:href=\"" ++ newPath ++ "\"" 
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
new file mode 100644
index 000000000..667e55c4d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -0,0 +1,83 @@
+{-
+Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Writers.ODT
+   Copyright   : Copyright (C) 2008-2010 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of 'Pandoc' documents to ODT.
+-}
+module Text.Pandoc.Writers.ODT ( writeODT ) where
+import Data.IORef
+import System.FilePath ( (</>), takeExtension )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Codec.Archive.Zip
+import System.Time
+import Paths_pandoc ( getDataFileName )
+import Text.Pandoc.Shared ( WriterOptions )
+import Text.Pandoc.Definition
+import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
+import System.Directory
+import Control.Monad (liftM)
+
+-- | Produce an ODT file from a Pandoc document.
+writeODT :: Maybe FilePath -- ^ Path of user data directory
+         -> FilePath       -- ^ Relative directory of source file
+         -> Maybe FilePath -- ^ Path specified by --reference-odt
+         -> WriterOptions  -- ^ Writer options
+         -> Pandoc         -- ^ Document to convert
+         -> IO B.ByteString
+writeODT datadir sourceDirRelative mbRefOdt opts doc = do
+  refArchive <- liftM toArchive $
+       case mbRefOdt of
+             Just f -> B.readFile f
+             Nothing -> do
+               let defaultODT = getDataFileName "reference.odt" >>= B.readFile
+               case datadir of
+                     Nothing  -> defaultODT
+                     Just d   -> do
+                        exists <- doesFileExist (d </> "reference.odt")
+                        if exists
+                           then B.readFile (d </> "reference.odt")
+                           else defaultODT
+  -- handle pictures
+  picEntriesRef <- newIORef ([] :: [Entry])
+  doc' <- processWithM (transformPic sourceDirRelative picEntriesRef) doc
+  let newContents = writeOpenDocument opts doc'
+  (TOD epochtime _) <- getClockTime
+  let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
+  picEntries <- readIORef picEntriesRef
+  let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
+  return $ fromArchive archive
+
+transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
+transformPic sourceDirRelative entriesRef (Image lab (src,tit)) = do
+  entries <- readIORef entriesRef
+  let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
+  catch (readEntry [] (sourceDirRelative </> src) >>= \entry ->
+           modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
+           return (Image lab (newsrc, tit)))
+        (\_ -> return (Emph lab))
+transformPic _ _ x = return x
+
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 84e2b2a52..3356a6d58 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -30,7 +30,6 @@ writers.
 -}
 module Main where
 import Text.Pandoc
-import Text.Pandoc.ODT
 import Text.Pandoc.Writers.S5 (s5HeaderIncludes)
 import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile )
 #ifdef _HIGHLIGHTING
@@ -53,7 +52,8 @@ import Text.Pandoc.Biblio
 import Control.Monad (when, unless, liftM)
 import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
 import Network.URI (parseURI, isURI)
-import Data.ByteString.Lazy.UTF8 (toString)
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 (toString, fromString)
 import Codec.Binary.UTF8.String (decodeString)
 
 copyrightMessage :: String
@@ -108,7 +108,7 @@ writers = [("native"       , writeDoc)
           ,("s5"           , writeS5String)
           ,("docbook"      , writeDocbook)
           ,("opendocument" , writeOpenDocument)
-          ,("odt"          , writeOpenDocument)
+          ,("odt"          , \_ _ -> "")
           ,("latex"        , writeLaTeX)
           ,("latex+lhs"    , writeLaTeX)
           ,("context"      , writeConTeXt)
@@ -658,6 +658,10 @@ main = do
      Nothing -> return ()
 
   let sources = if ignoreArgs then [] else args
+  
+  let sourceDirRelative = if null sources
+                             then ""
+                             else takeDirectory (head sources)
 
   datadir <- case mbDataDir of
                   Nothing   -> catch
@@ -682,8 +686,11 @@ main = do
      Nothing -> error ("Unknown reader: " ++ readerName')
 
   writer <- case (lookup writerName' writers) of
-     Just r  -> return r
-     Nothing -> error ("Unknown writer: " ++ writerName')
+     Just _ | writerName' == "odt"  -> return
+          (writeODT datadir sourceDirRelative referenceODT)
+     Just r                         -> return $ \o d ->
+                                          return $ fromString (r o d)
+     Nothing                        -> error ("Unknown writer: " ++ writerName')
 
   templ <- getDefaultTemplate datadir writerName'
   let defaultTemplate = case templ of
@@ -762,10 +769,6 @@ main = do
                                "Specify an output file using the -o option.")
        exitWith $ ExitFailure 5
 
-  let sourceDirRelative = if null sources
-                             then ""
-                             else takeDirectory (head sources)
-
   let readSources [] = mapM readSource ["-"]
       readSources srcs = mapM readSource srcs
       readSource "-" = UTF8.getContents
@@ -788,10 +791,8 @@ main = do
           return doc'
 #endif
 
-  let writerOutput = writer writerOptions doc'' ++ "\n"
+  writerOutput <- writer writerOptions doc''
 
-  case writerName' of
-       "odt"   -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput
-       _       -> if outputFile == "-"
-                     then UTF8.putStr writerOutput
-                     else UTF8.writeFile outputFile writerOutput
+  if outputFile == "-"
+     then B.putStrLn writerOutput
+     else B.writeFile outputFile writerOutput