Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.

+ Removed utf8-string and xml-light modules, and unneeded content.xml. + Removed code for building reference.odt from Setup.hs. The ODT is now built using template haskell in Text.Pandoc.ODT. + Removed copyright statements for utf8-string and xml modules, since they are no longer included in the source. + README: Removed claim that 'zip' is needed for ODT production. + Removed dependency on 'zip' from debian/control. + Text.Pandoc.Shared: Removed withTempDir, added inDirectory. + Added makeZip to Text.Pandoc.TH. + pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string. Added markdown2pdf files to extra-sources list. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b
2008-09-04 02:51:28 +00:00 · 2008-09-04 02:51:28 +00:00 · 4dca8f6e75
commit 4dca8f6e75
parent 9b7ec2d366
17 changed files with 75 additions and 1537 deletions
--- a/Codec/Binary/UTF8/String.hs
+++ b/Codec/Binary/UTF8/String.hs
@ -1,97 +0,0 @@
--
-- |
-- Module      :  Codec.Binary.UTF8.String
-- Copyright   :  (c) Eric Mertens 2007
-- License     :  BSD3-style (see LICENSE)
-- 
-- Maintainer:    emertens@galois.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Support for encoding UTF8 Strings to and from @[Word8]@
--
-
-module Codec.Binary.UTF8.String (
-      encode
-    , decode
-    , encodeString
-    , decodeString
-  ) where
-
-import Data.Word        (Word8)
-import Data.Bits        ((.|.),(.&.),shiftL,shiftR)
-import Data.Char        (chr,ord)
-
-default(Int)
-
-- | Encode a string using 'encode' and store the result in a 'String'.
-encodeString :: String -> String
-encodeString xs = map (toEnum . fromEnum) (encode xs)
-
-- | Decode a string using 'decode' using a 'String' as input.
-- | This is not safe but it is necessary if UTF-8 encoded text
-- | has been loaded into a 'String' prior to being decoded.
-decodeString :: String -> String
-decodeString xs = decode (map (toEnum . fromEnum) xs)
-
-replacement_character :: Char
-replacement_character = '\xfffd'
-
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-encode :: String -> [Word8]
-encode = concatMap (map fromIntegral . go . ord)
- where
-  go oc
-   | oc <= 0x7f       = [oc]
-
-   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
-   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
-                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
--
-- | Decode a UTF8 string packed into a list of Word8 values, directly to String
--
-decode :: [Word8] -> String
-decode [    ] = ""
-decode (c:cs)
-  | c < 0x80  = chr (fromEnum c) : decode cs
-  | c < 0xc0  = replacement_character : decode cs
-  | c < 0xe0  = multi1
-  | c < 0xf0  = multi_byte 2 0xf  0x800
-  | c < 0xf8  = multi_byte 3 0x7  0x10000
-  | c < 0xfc  = multi_byte 4 0x3  0x200000
-  | c < 0xfe  = multi_byte 5 0x1  0x4000000
-  | otherwise = replacement_character : decode cs
-  where
-    multi1 = case cs of
-      c1 : ds | c1 .&. 0xc0 == 0x80 ->
-        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
-        in if d >= 0x000080 then toEnum d : decode ds
-                            else replacement_character : decode ds
-      _ -> replacement_character : decode cs
-
-    multi_byte :: Int -> Word8 -> Int -> [Char]
-    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
-      where
-        aux 0 rs acc
-          | overlong <= acc && acc <= 0x10ffff &&
-            (acc < 0xd800 || 0xdfff < acc)     &&
-            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
-          | otherwise = replacement_character : decode rs
-
-        aux n (r:rs) acc
-          | r .&. 0xc0 == 0x80 = aux (n-1) rs
-                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
-
-        aux _ rs     _ = replacement_character : decode rs
-
--- a/5
+++ b/5
@ -87,11 +87,6 @@ Supported output formats include `markdown`, `latex`, `context`
 Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5`
 (which produces an HTML file that acts like powerpoint).

-For `odt` output, you must have `zip` in the path. If you
-don't have it installed, you can get the free [Info-ZIP].
-  
-[Info-ZIP]: http://www.info-zip.org/Zip.html
-
 Supported input formats include `markdown`, `html`, `latex`, and `rst`.
 Note that the `rst` reader only parses a subset of reStructuredText
 syntax. For example, it doesn't handle tables, option lists, or
--- a/Setup.hs
+++ b/Setup.hs
@ -12,7 +12,6 @@ import Data.Maybe ( fromJust, isNothing, catMaybes )

 main = defaultMainWithHooks $
       simpleUserHooks { runTests  = runTestSuite
-                       , preConf   = checkReferenceODT
                       , postBuild = makeManPages }

 -- | Run test suite.
@ -20,34 +19,6 @@ runTestSuite _ _ _ _ = do
  inDirectory "tests" $ runCommand "runhaskell -i.. RunTests.hs" >>= waitForProcess
  return ()

-- | If reference.odt needs rebuilding, build it.
-checkReferenceODT _ _ = inDirectory "odt-styles" $ do
-  let refodt = "reference.odt"
-  let deps   = [ "meta.xml", "content.xml", "settings.xml", "META-INF/manifest.xml",
-                 "Thumbnails/thumbnail.png", "styles.xml", "mimetype" ]
-  modifiedDeps <- modifiedDependencies refodt deps
-  if null modifiedDeps
-     then return ()
-     else makeReferenceODT modifiedDeps
-  return emptyHookedBuildInfo
-
-- | Create reference.odt by zipping up sources in odt-styles directory.
-makeReferenceODT :: [FilePath] -> IO ()
-makeReferenceODT sources = do
-  zipPathMaybe <- findExecutable "zip"
-  if isNothing zipPathMaybe
-     then error $ "The 'zip' command, which is needed to build reference.odt\n" ++
-                  "from sources in the odt-styles directory, was not found.\n" ++
-                  "Try again after installing zip (http://www.info-zip.org/Zip.html).\n" ++
-                  "Or use the pandoc source tarball, which contains a prebuilt reference.odt."
-     else do
-       putStrLn "Creating reference.odt:"
-       ec <- runProcess (fromJust zipPathMaybe) (["-9", "-r", "reference.odt"] ++ sources)
-                 Nothing Nothing Nothing Nothing (Just stderr) >>= waitForProcess
-       case ec of
-            ExitSuccess -> return ()
-            _           -> error "Error creating ODT."
-
 -- | Build man pages from markdown sources in man/man1/.
 makeManPages _ _ _ _ = do
  mapM makeManPage ["pandoc.1", "hsmarkdown.1", "html2markdown.1", "markdown2pdf.1"]
--- a/System/IO/UTF8.hs
+++ b/System/IO/UTF8.hs
@ -1,118 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module      :  System.IO.UTF8
-- Copyright   :  (c) Eric Mertens 2007
-- License     :  BSD3-style (see LICENSE)
-- 
-- Maintainer:    emertens@galois.com
-- Stability   :  experimental
-- Portability :  portable
--
-- String IO preserving UTF8 encoding.
--
-
-module System.IO.UTF8 (
-      print
-    , putStr
-    , putStrLn
-    , getLine
-    , readLn
-    , readFile
-    , writeFile
-    , appendFile
-    , getContents
-    , hGetLine
-    , hGetContents
-    , hPutStr
-    , hPutStrLn
-  ) where
-
-import Control.Monad (liftM)
-import Data.Char (ord, chr)
-import Data.Word (Word8)
-import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read,
-                Show(..))
-import System.IO (Handle, IO, FilePath)
-import qualified System.IO as IO
-
-import Codec.Binary.UTF8.String (encode, decode)
-
-
-- | Encode a string in UTF8 form.
-encodeString :: String -> String
-encodeString xs = bytesToString (encode xs)
-
-- | Decode a string from UTF8
-decodeString :: String -> String
-decodeString xs = decode (stringToBytes xs)
-
-- | Convert a list of bytes to a String
-bytesToString :: [Word8] -> String
-bytesToString xs = map (chr . fromEnum) xs
-
-- | String to list of bytes.
-stringToBytes :: String -> [Word8]
-stringToBytes xs = map (toEnum . ord) xs
-
-- | The 'print' function outputs a value of any printable type to the
-- standard output device. This function differs from the
-- System.IO.print in that it preserves any UTF8 encoding of the shown value.
--
-print :: Show a => a -> IO ()
-print x = putStrLn (show x)
-
-- | Write a UTF8 string to the standard output device
-putStr :: String -> IO ()
-putStr x = IO.putStr (encodeString x)
-
-- | The same as 'putStr', but adds a newline character.
-putStrLn :: String -> IO ()
-putStrLn x = IO.putStrLn (encodeString x)
-
-- | Read a UTF8 line from the standard input device
-getLine :: IO String
-getLine = liftM decodeString IO.getLine
-
-- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8
-readLn :: Read a => IO a
-readLn = IO.readIO =<< getLine
-
-- | The 'readFile' function reads a file and
-- returns the contents of the file as a UTF8 string.
-- The file is read lazily, on demand, as with 'getContents'.
-readFile :: FilePath -> IO String
-readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>=
-                                 IO.hGetContents)
-
-- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@,
-- to the file @file@.
-writeFile :: FilePath -> String -> IO ()
-writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h ->
-                    IO.hPutStr h $ encodeString c
-
-- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@,
-- to the file @file@.
-appendFile :: FilePath -> String -> IO ()
-appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h ->
-                    IO.hPutStr h $ encodeString c
-
-- | Read a UTF8 line from a Handle
-hGetLine :: Handle -> IO String
-hGetLine h = liftM decodeString $ IO.hGetLine h
-
-- | Lazily read a UTF8 string from a Handle
-hGetContents :: Handle -> IO String
-hGetContents h = liftM decodeString (IO.hGetContents h)
-
-- | Write a UTF8 string to a Handle.
-hPutStr :: Handle -> String -> IO ()
-hPutStr h s = IO.hPutStr h (encodeString s)
-
-- | Write a UTF8 string to a Handle, appending a newline.
-hPutStrLn :: Handle -> String -> IO ()
-hPutStrLn h s = IO.hPutStrLn h (encodeString s)
-
-- | Lazily read stdin as a UTF8 string.
-getContents :: IO String
-getContents = liftM decodeString IO.getContents
-
--- a/Text/Pandoc/ODT.hs
+++ b/Text/Pandoc/ODT.hs
@ -29,22 +29,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Functions for producing an ODT file from OpenDocument XML.
 -}
 module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Text.Pandoc.TH ( binaryContentsOf )
-import Data.Maybe ( fromJust )
-import Data.List ( partition, intersperse )
-import System.Directory
-import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
-import System.Process ( runProcess, waitForProcess )
-import System.Exit
-import Text.XML.Light
-import Text.XML.Light.Cursor
-import Text.Pandoc.Shared ( withTempDir )
-import Network.URI ( isURI )
-import qualified Data.ByteString as B ( writeFile, pack )
-import Data.ByteString.Internal ( c2w )
+import Text.Pandoc.TH ( makeZip )
+import Data.List ( find )
+import System.FilePath ( (</>), takeFileName )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
 import Prelude hiding ( writeFile, readFile )
-import System.IO ( stderr )
-import System.IO.UTF8
+import Codec.Archive.Zip
+import Control.Applicative ( (<$>) )
+import Text.ParserCombinators.Parsec
+import System.Time

 -- | Produce an ODT file from OpenDocument XML.
 saveOpenDocumentAsODT :: FilePath    -- ^ Pathname of ODT file to be produced.
@ -52,89 +46,43 @@ saveOpenDocumentAsODT :: FilePath    -- ^ Pathname of ODT file to be produced.
                      -> String      -- ^ OpenDocument XML contents.
                      -> IO ()
 saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
-  let zipCmd = "zip"
-  -- check for zip in path:
-  zipPathMaybe <- findExecutable zipCmd
-  let zipPath = case zipPathMaybe of
-                  Nothing -> error $ "The '" ++ zipCmd ++
-                                     "' command, which is needed to build an ODT file, was not found.\n" ++
-                                     "It can be obtained from http://www.info-zip.org/Zip.html\n" ++
-                                     "Debian (and Debian-based) linux: apt-get install zip\n" ++
-                                     "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
-                  Just x  -> x
-  withTempDir "pandoc-odt" $ \tempDir -> do
-    let tempODT = tempDir </> "reference.odt"
-    B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt")
-    xml' <- handlePictures tempODT sourceDirRelative xml
-    writeFile (tempDir </> "content.xml") xml'
-    ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"]
-            (Just tempDir) Nothing Nothing Nothing (Just stderr)
-    ec <- waitForProcess ph   -- requires compilation with -threaded
-    case ec of
-         ExitSuccess -> copyFile tempODT destinationODTPath
-         _           -> error "Error creating ODT." >> exitWith ec
+  let refArchive = read $(makeZip "odt-styles")
+  -- handle pictures
+  let (newContents, pics) = 
+        case runParser pPictures [] "OpenDocument XML contents" xml of
+          Left err          -> error $ show err
+          Right x           -> x
+  picEntries <- mapM (makePictureEntry sourceDirRelative) pics 
+  (TOD epochTime _) <- getClockTime
+  let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
+  let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
+  B.writeFile destinationODTPath $ fromArchive archive

-- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
-- the zip file.  If filename is a URL, attempt to download it.  Modify xlink:href attributes
-- to point to the new locations in Pictures/.  Return modified XML.
-handlePictures :: FilePath     -- ^ Path of ODT file in temp directory
-               -> FilePath     -- ^ Directory (relative) containing source file
-               -> String       -- ^ OpenDocument XML string
-               -> IO String    -- ^ Modified XML
-handlePictures tempODT sourceDirRelative xml = do
-  let parsed = case parseXMLDoc xml of
-                Nothing -> error "Could not parse OpenDocument XML."
-                Just x  -> x
-  let cursor = case (fromForest $ elContent parsed) of
-                   Nothing -> error "ODT appears empty"
-                   Just x  -> x
-  cursor' <- scanPictures tempODT sourceDirRelative cursor
-  let modified = parsed { elContent = toForest $ root cursor' }
-  return $ showTopElement modified
+makePictureEntry :: FilePath            -- ^ Relative directory of source file
+                 -> (FilePath, String)  -- ^ Path and new path of picture
+                 -> IO Entry
+makePictureEntry sourceDirRelative (path, newPath) = do
+  entry <- readEntry [] $ sourceDirRelative </> path
+  return (entry { eRelativePath = newPath })

-scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
-scanPictures tempODT sourceDirRelative cursor = do
-  cursor' <- handleTree tempODT sourceDirRelative cursor
-  case right cursor' of
-       Just n  -> scanPictures tempODT sourceDirRelative n
-       Nothing -> return cursor'
-
-handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
-handleTree tempODT sourceDirRelative cursor = do
-  case firstChild cursor of
-       Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
-       Just n  -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
-
-- | If content is an image link, handle it appropriately.
-- Otherwise, handle children if any.
-handleContent :: FilePath -> FilePath -> Content -> IO Content
-handleContent tempODT sourceDirRelative content@(Elem el) = do
-  if qName (elName el) == "image"
-     then do
-        let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
-        let href = case hrefs of
-                          []     -> error $ "No href found in " ++ show el
-                          [x]    -> x
-                          _      -> error $ "Multiple hrefs found in " ++ show el
-        if isURI $ attrVal href
-           then return content
-           else do  -- treat as filename
-             let oldLoc = sourceDirRelative </> attrVal href
-             fileExists <- doesFileExist oldLoc
-             if fileExists
-                then do
-                   let pref = take 230 $ concat $ intersperse "_" $
-                                splitDirectories $ takeDirectory $ attrVal href
-                   let picName = pref ++ "_" ++ (takeFileName $ attrVal href)
-                   let tempDir = takeDirectory tempODT
-                   createDirectoryIfMissing False $ tempDir </> "Pictures"
-                   copyFile oldLoc $ tempDir </> "Pictures" </> picName
-                   let newAttrs = (href { attrVal = "Pictures/" ++ picName }) : rest
-                   return $ Elem (el { elAttribs = newAttrs })
-                else do
-                   hPutStrLn stderr $ "Warning:  Unable to find image at " ++ oldLoc ++ " - ignoring."
-                   return content
-     else return content
- 
-handleContent _ _ c = return c  -- not Element
+pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
+pPictures = do
+  contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
+  pics <- getState
+  return (contents, pics)

+pPicture :: GenParser Char [(FilePath, String)] [Char]
+pPicture = try $ do
+  string "<draw:image xlink:href=\""
+  path <- manyTill anyChar (char '"')
+  let filename =  takeFileName path
+  pics <- getState
+  newPath <- case find (\(o, _) -> o == path) pics of
+             Just (_, new) -> return new
+             Nothing -> do 
+                        -- get a unique name
+                        let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics 
+                        let new = "Pictures/" ++ replicate dups '0' ++ filename
+                        updateState ((path, new) :)
+                        return new
+  return $ "<draw:image xlink:href=\"" ++ newPath ++ "\"" 
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@ -101,7 +101,7 @@ module Text.Pandoc.Shared (
                     WriterOptions (..),
                     defaultWriterOptions,
                     -- * File handling
-                     withTempDir
+                     inDirectory
                    ) where

 import Text.Pandoc.Definition
@ -112,10 +112,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
 import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
 import Data.List ( find, isPrefixOf )
 import Control.Monad ( join )
-import Control.Exception ( bracket )
 import Network.URI ( parseURI, URI (..), isAllowedInURI )
-import System.FilePath ( (</>), (<.>) )
-import System.IO.Error ( catch, ioError, isAlreadyExistsError )
 import System.Directory
 import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
 import System.IO.UTF8
@ -920,16 +917,11 @@ defaultWriterOptions =
 -- File handling
 --

-- | Perform a function in a temporary directory and clean up.
-withTempDir :: FilePath -> (FilePath -> IO a) -> IO a
-withTempDir baseName = bracket (createTempDir 0 baseName) (removeDirectoryRecursive)
-
-- | Create a temporary directory with a unique name.
-createTempDir :: Integer -> FilePath -> IO FilePath
-createTempDir num baseName = do
-  sysTempDir <- getTemporaryDirectory
-  let dirName = sysTempDir </> baseName <.> show num
-  catch (createDirectory dirName >> return dirName) $
-      \e -> if isAlreadyExistsError e
-               then createTempDir (num + 1) baseName
-               else ioError e
+-- | Perform an IO action in a directory, returning to starting directory.
+inDirectory :: FilePath -> IO a -> IO a
+inDirectory path action = do
+  oldDir <- getCurrentDirectory
+  setCurrentDirectory path
+  result <- action
+  setCurrentDirectory oldDir
+  return result
--- a/Text/Pandoc/TH.hs
+++ b/Text/Pandoc/TH.hs
@ -30,7 +30,8 @@ Template haskell functions used by Pandoc modules.
 -}
 module Text.Pandoc.TH (
                        contentsOf,
-                        binaryContentsOf
+                        binaryContentsOf,
+                        makeZip
                      ) where

 import Language.Haskell.TH
@ -39,6 +40,8 @@ import qualified Data.ByteString as B
 import Data.ByteString.Internal ( w2c )
 import Prelude hiding ( readFile )
 import System.IO.UTF8
+import Codec.Archive.Zip
+import Text.Pandoc.Shared ( inDirectory )

 -- | Insert contents of text file into a template.
 contentsOf :: FilePath -> ExpQ
@ -51,3 +54,12 @@ binaryContentsOf p = lift =<< (runIO $ B.readFile p)

 instance Lift B.ByteString where
  lift x = return (LitE (StringL $ map w2c $ B.unpack x))
+
+instance Lift Archive where
+  lift x = return (LitE (StringL $ show x ))
+
+-- | Construct zip file from files in a directory, and
+-- insert into a template.
+makeZip :: FilePath -> ExpQ
+makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive  ["."])
+
--- a/Text/XML/Light.hs
+++ b/Text/XML/Light.hs
@ -1,96 +0,0 @@
-{-# LANGUAGE FlexibleInstances #-}
--------------------------------------------------------------------
-- |
-- Module    : Text.XML.Light
-- Copyright : (c) Galois, Inc. 2007
-- License   : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability: portability
--
-- A lightweight XML parsing, filtering and generating library.
--
-- This module reexports functions from:
--
-- * "Text.XML.Light.Types"
--
-- * "Text.XML.Light.Proc"
--
-- * "Text.XML.Light.Input"
--
-- * "Text.XML.Light.Output"
--
-
-module Text.XML.Light (
-
-    module Text.XML.Light,
-    module Text.XML.Light.Types,
-    module Text.XML.Light.Proc,
-    module Text.XML.Light.Input,
-    module Text.XML.Light.Output
-
-  ) where
-
-import Text.XML.Light.Types
-import Text.XML.Light.Proc
-import Text.XML.Light.Input
-import Text.XML.Light.Output
-
-- | Add an attribute to an element.
-add_attr :: Attr -> Element -> Element
-add_attr a e = add_attrs [a] e
-
-- | Add some attributes to an element.
-add_attrs :: [Attr] -> Element -> Element
-add_attrs as e = e { elAttribs = as ++ elAttribs e }
-
-- | Create an unqualified name.
-unqual :: String -> QName
-unqual x = blank_name { qName = x }
-
-- | A smart element constructor which uses the type of its argument
-- to determine what sort of element to make.
-class Node t where
-  node :: QName -> t -> Element
-
-instance Node ([Attr],[Content]) where
-  node n (attrs,cont) = blank_element { elName     = n
-                                      , elAttribs  = attrs
-                                      , elContent  = cont
-                                      }
-
-instance Node [Attr]             where node n as   = node n (as,[]::[Content])
-instance Node Attr               where node n a    = node n [a]
-instance Node ()                 where node n ()   = node n ([]::[Attr])
-
-instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
-instance Node Content            where node n c      = node n [c]
-instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
-instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
-
-instance Node ([Attr],[Element]) where
-  node n (as,cs) = node n (as,map Elem cs)
-
-instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
-instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
-instance Node ([Element])        where node n es     = node n ([]::[Attr],es)
-instance Node (Element)          where node n e      = node n [e]
-
-instance Node ([Attr],[CData])   where
-  node n (as,cs) = node n (as,map Text cs)
-
-instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
-instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
-instance Node [CData]            where node n es     = node n ([]::[Attr],es)
-instance Node CData              where node n e      = node n [e]
-
-instance Node ([Attr],String)    where
-  node n (as,t) = node n (as,blank_cdata { cdData = t })
-
-instance Node (Attr,String)      where node n (a,t)  = node n ([a],t)
-instance Node [Char]             where node n t      = node n ([]::[Attr],t)
-
-- | Create node with unqualified name
-unode :: Node t => String -> t -> Element
-unode = node . unqual
--- a/Text/XML/Light/Cursor.hs
+++ b/Text/XML/Light/Cursor.hs
@ -1,327 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module    : Text.XML.Light.Cursor
-- Copyright : (c) Galois, Inc. 2008
-- License   : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- XML cursors for working XML content withing the context of
-- an XML document.  This implemntation is based on the general
-- tree zipper written by Krasimir Angelov and Iavor S. Diatchki.
--
-
-module Text.XML.Light.Cursor
-  ( Tag(..), getTag, setTag, fromTag
-  , Cursor(..), Path
-
-  -- * Conversions
-  , fromContent
-  , fromElement
-  , fromForest
-  , toForest
-  , toTree
-
-  -- * Moving around
-  , parent
-  , root
-  , getChild
-  , firstChild
-  , lastChild
-  , left
-  , right
-
-  -- ** Searching
-  , findChild
-  , findLeft
-  , findRight
-
-  -- * Node classification
-  , isRoot
-  , isFirst
-  , isLast
-  , isLeaf
-  , isChild
-  , hasChildren
-  , getNodeIndex
-
-  -- * Updates
-  , setContent
-  , modifyContent
-  , modifyContentM
-
-  -- ** Inserting content
-  , insertLeft
-  , insertRight
-  , insertGoLeft
-  , insertGoRight
-
-  -- ** Removing content
-  , removeLeft
-  , removeRight
-  , removeGoLeft
-  , removeGoRight
-  , removeGoUp
-
-  ) where
-
-import Text.XML.Light.Types
-import Data.Maybe(isNothing)
-
-data Tag = Tag { tagName    :: QName
-               , tagAttribs :: [Attr]
-               , tagLine    :: Maybe Line
-               } deriving (Show)
-
-getTag :: Element -> Tag
-getTag e = Tag { tagName = elName e
-               , tagAttribs = elAttribs e
-               , tagLine = elLine e
-               }
-
-setTag :: Tag -> Element -> Element
-setTag t e = fromTag t (elContent e)
-
-fromTag :: Tag -> [Content] -> Element
-fromTag t cs = Element { elName    = tagName t
-                       , elAttribs = tagAttribs t
-                       , elLine    = tagLine t
-                       , elContent = cs
-                       }
-
-type Path = [([Content],Tag,[Content])]
-
-- | The position of a piece of content in an XML document.
-data Cursor = Cur
-  { current :: Content      -- ^ The currently selected content.
-  , lefts   :: [Content]    -- ^ Siblings on the left, closest first.
-  , rights  :: [Content]    -- ^ Siblings on the right, closest first.
-  , parents :: Path -- ^ The contexts of the parent elements of this location.
-  } deriving (Show)
-
-- Moving around ---------------------------------------------------------------
-
-- | The parent of the given location.
-parent :: Cursor -> Maybe Cursor
-parent loc =
-  case parents loc of
-    (pls,v,prs) : ps -> Just
-      Cur { current = Elem
-                    (fromTag v
-                    (combChildren (lefts loc) (current loc) (rights loc)))
-          , lefts = pls, rights = prs, parents = ps
-          }
-    [] -> Nothing
-
-
-- | The top-most parent of the given location.
-root :: Cursor -> Cursor
-root loc = maybe loc root (parent loc)
-
-- | The left sibling of the given location.
-left :: Cursor -> Maybe Cursor
-left loc =
-  case lefts loc of
-    t : ts -> Just loc { current = t, lefts = ts
-                                    , rights = current loc : rights loc }
-    []     -> Nothing
-
-- | The right sibling of the given location.
-right :: Cursor -> Maybe Cursor
-right loc =
-  case rights loc of
-    t : ts -> Just loc { current = t, lefts = current loc : lefts loc
-                                    , rights = ts }
-    []     -> Nothing
-
-- | The first child of the given location.
-firstChild :: Cursor -> Maybe Cursor
-firstChild loc =
-  do (t : ts, ps) <- downParents loc
-     return Cur { current = t, lefts = [], rights = ts , parents = ps }
-
-- | The last child of the given location.
-lastChild :: Cursor -> Maybe Cursor
-lastChild loc =
-  do (ts, ps) <- downParents loc
-     case reverse ts of
-       l : ls -> return Cur { current = l, lefts = ls, rights = []
-                                                     , parents = ps }
-       [] -> Nothing
-
-- | Find the next left sibling that satisfies a predicate.
-findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findLeft p loc = do loc1 <- left loc
-                    if p loc1 then return loc1 else findLeft p loc1
-
-- | Find the next right sibling that satisfies a predicate.
-findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findRight p loc = do loc1 <- right loc
-                     if p loc1 then return loc1 else findRight p loc1
-
-- | The first child that satisfies a predicate.
-findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findChild p loc =
-  do loc1 <- firstChild loc
-     if p loc1 then return loc1 else findRight p loc1
-
-- | The child with the given index (starting from 0).
-getChild :: Int -> Cursor -> Maybe Cursor
-getChild n loc =
-  do (ts,ps) <- downParents loc
-     (ls,t,rs) <- splitChildren ts n
-     return Cur { current = t, lefts = ls, rights = rs, parents = ps }
-
-
-- | private: computes the parent for "down" operations.
-downParents :: Cursor -> Maybe ([Content], Path)
-downParents loc =
-  case current loc of
-    Elem e -> Just ( elContent e
-                   , (lefts loc, getTag e, rights loc) : parents loc
-                   )
-    _      -> Nothing
-
-- Conversions -----------------------------------------------------------------
-
-- | A cursor for the guven content.
-fromContent :: Content -> Cursor
-fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
-
-- | A cursor for the guven element.
-fromElement :: Element -> Cursor
-fromElement e = fromContent (Elem e)
-
-- | The location of the first tree in a forest.
-fromForest :: [Content] -> Maybe Cursor
-fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
-                                                      , parents = [] }
-fromForest []     = Nothing
-
-- | Computes the tree containing this location.
-toTree :: Cursor -> Content
-toTree loc = current (root loc)
-
-- | Computes the forest containing this location.
-toForest :: Cursor -> [Content]
-toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
-
-
-- Queries ---------------------------------------------------------------------
-
-- | Are we at the top of the document?
-isRoot :: Cursor -> Bool
-isRoot loc = null (parents loc)
-
-- | Are we at the left end of the the document?
-isFirst :: Cursor -> Bool
-isFirst loc = null (lefts loc)
-
-- | Are we at the right end of the document?
-isLast :: Cursor -> Bool
-isLast loc = null (rights loc)
-
-- | Are we at the bottom of the document?
-isLeaf :: Cursor -> Bool
-isLeaf loc = isNothing (downParents loc)
-
-- | Do we have a parent?
-isChild :: Cursor -> Bool
-isChild loc = not (isRoot loc)
-
-- | Get the node index inside the sequence of children
-getNodeIndex :: Cursor -> Int
-getNodeIndex loc = length (lefts loc)
-
-- | Do we have children?
-hasChildren :: Cursor -> Bool
-hasChildren loc = not (isLeaf loc)
-
-
-
-- Updates ---------------------------------------------------------------------
-
-- | Change the current content.
-setContent :: Content -> Cursor -> Cursor
-setContent t loc = loc { current = t }
-
-- | Modify the current content.
-modifyContent :: (Content -> Content) -> Cursor -> Cursor
-modifyContent f loc = setContent (f (current loc)) loc
-
-- | Modify the current content, allowing for an effect.
-modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
-modifyContentM f loc = do x <- f (current loc)
-                          return (setContent x loc)
-
-- | Insert content to the left of the current position.
-insertLeft :: Content -> Cursor -> Cursor
-insertLeft t loc = loc { lefts = t : lefts loc }
-
-- | Insert content to the right of the current position.
-insertRight :: Content -> Cursor -> Cursor
-insertRight t loc = loc { rights = t : rights loc }
-
-- | Remove the conent on the left of the current position, if any.
-removeLeft :: Cursor -> Maybe (Content,Cursor)
-removeLeft loc = case lefts loc of
-                   l : ls -> return (l,loc { lefts = ls })
-                   [] -> Nothing
-
-- | Remove the conent on the right of the current position, if any.
-removeRight :: Cursor -> Maybe (Content,Cursor)
-removeRight loc = case rights loc of
-                    l : ls -> return (l,loc { rights = ls })
-                    [] -> Nothing
-
-
-- | Insert content to the left of the current position.
-- The new content becomes the current position.
-insertGoLeft :: Content -> Cursor -> Cursor
-insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
-
-- | Insert content to the right of the current position.
-- The new content becomes the current position.
-insertGoRight :: Content -> Cursor -> Cursor
-insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
-
-- | Remove the current element.
-- The new position is the one on the left.
-removeGoLeft :: Cursor -> Maybe Cursor
-removeGoLeft loc = case lefts loc of
-                     l : ls -> Just loc { current = l, lefts = ls }
-                     []     -> Nothing
-
-- | Remove the current element.
-- The new position is the one on the right.
-removeGoRight :: Cursor -> Maybe Cursor
-removeGoRight loc = case rights loc of
-                     l : ls -> Just loc { current = l, rights = ls }
-                     []     -> Nothing
-
-- | Remove the current element.
-- The new position is the parent of the old position.
-removeGoUp :: Cursor -> Maybe Cursor
-removeGoUp loc =
-  case parents loc of
-    (pls,v,prs) : ps -> Just
-      Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
-          , lefts = pls, rights = prs, parents = ps
-          }
-    [] -> Nothing
-
-
-- | private: Gets the given element of a list.
-- Also returns the preceeding elements (reversed) and the folloing elements.
-splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
-splitChildren _ n | n < 0 = Nothing
-splitChildren cs pos = loop [] cs pos
-  where loop acc (x:xs) 0 = Just (acc,x,xs)
-        loop acc (x:xs) n = loop (x:acc) xs $! n-1
-        loop _ _ _        = Nothing
-
-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
-combChildren :: [a] -> a -> [a] -> [a]
-combChildren ls t rs = foldl (flip (:)) (t:rs) ls
--- a/Text/XML/Light/Input.hs
+++ b/Text/XML/Light/Input.hs
@ -1,307 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module    : Text.XML.Light.Input
-- Copyright : (c) Galois, Inc. 2007
-- License   : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability: portable
--
-- Lightweight XML parsing
--
-
-module Text.XML.Light.Input (parseXML,parseXMLDoc) where
-
-import Text.XML.Light.Types
-import Text.XML.Light.Proc
-import Text.XML.Light.Output(tagEnd)
-
-import Data.Char(isSpace)
-import Data.List(isPrefixOf)
-import Numeric(readHex)
-
-- | parseXMLDoc, parse a XMLl document to maybe an element
-parseXMLDoc  :: String -> Maybe Element
-parseXMLDoc xs  = strip (parseXML xs)
-  where strip cs = case onlyElems cs of
-                    e : es
-                      | "?xml" `isPrefixOf` qName (elName e)
-                          -> strip (map Elem es)
-                      | otherwise -> Just e
-                    _ -> Nothing
-
-- | parseXML to a list of content chunks
-parseXML   :: String -> [Content]
-parseXML xs = parse $ tokens $ preprocess xs
-
------------------------------------------------------------------------
-
-parse      :: [Token] -> [Content]
-parse []    = []
-parse ts    = let (es,_,ts1) = nodes ([],Nothing) [] ts
-              in es ++ parse ts1
-
-- Information about namespaces.
-- The first component is a map that associates prefixes to URIs,
-- the second is the URI for the default namespace, if one was provided.
-type NSInfo = ([(String,String)],Maybe String)
-
-nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token])
-
-nodes ns ps (TokCRef ref : ts) =
-  let (es,qs,ts1) = nodes ns ps ts
-  in (CRef ref : es, qs, ts1)
-
-nodes ns ps (TokText txt : ts) =
-  let (es,qs,ts1) = nodes ns ps ts
-      (more,es1)  = case es of
-                      Text cd : es1'
-                        | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1')
-                      _                                   -> ([],es)
-
-  in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1)
-
-nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks)
-  where
-  new_name  = annotName new_info t
-  new_info  = foldr addNS cur_info as
-  node      = Elem Element { elLine    = Just p
-                           , elName    = new_name
-                           , elAttribs = map (annotAttr new_info) as
-                           , elContent = children
-                           }
-
-  (children,(siblings,open,toks))
-    | empty     = ([], nodes cur_info ps ts)
-    | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts
-                  in (es1,
-                      case qs1 of
-                        [] -> nodes cur_info ps ts1
-                        _ : qs3 -> ([],qs3,ts1))
-
-nodes ns ps (TokEnd p t : ts)   = let t1 = annotName ns t
-                                in case break (t1 ==) ps of
-                                  (as,_:_) -> ([],as,ts)
-                                  -- Unknown closing tag. Insert as text.
-                                  (_,[]) ->
-                                    let (es,qs,ts1) = nodes ns ps ts
-                                    in (Text CData {
-                                               cdLine = Just p,
-                                               cdVerbatim = CDataText,
-                                               cdData = tagEnd t ""
-                                              } : es,qs, ts1)
-
-nodes _ ps []                 = ([],ps,[])
-
-
-annotName :: NSInfo -> QName -> QName
-annotName (namespaces,def_ns) n =
-  n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) }
-
-annotAttr :: NSInfo -> Attr -> Attr
-annotAttr ns a@(Attr { attrKey = k}) =
-  case (qPrefix k, qName k) of
-    (Nothing,"xmlns") -> a
-    _                 -> a { attrKey = annotName ns k }
-
-addNS :: Attr -> NSInfo -> NSInfo
-addNS (Attr { attrKey = key, attrVal = val }) (ns,def) =
-  case (qPrefix key, qName key) of
-    (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val)
-    (Just "xmlns", k) -> ((k, val) : ns, def)
-    _                 -> (ns,def)
-
-
-- Lexer -----------------------------------------------------------------------
-
-type LChar              = (Line,Char)
-type LString            = [LChar]
-data Token              = TokStart Line QName [Attr] Bool  -- is empty?
-                        | TokEnd Line QName
-                        | TokCRef String
-                        | TokText CData
-                          deriving Show
-
-tokens             :: String -> [Token]
-tokens = tokens' . linenumber 1
-
-tokens' :: LString -> [Token]
-tokens' ((_,'<') : c@(_,'!') : cs) = special c cs
-
-tokens' ((_,'<') : cs)   = tag (dropSpace cs) -- we are being nice here
-tokens' [] = []
-tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs
-                       in map cvt (decode_text as) ++ tokens' bs
-
-  -- XXX: Note, some of the lines might be a bit inacuarate
-  where cvt (TxtBit x)  = TokText CData { cdLine = Just l
-                                        , cdVerbatim = CDataText
-                                        , cdData = x
-                                        }
-        cvt (CRefBit x) = case cref_to_char x of
-                            Just c -> TokText CData { cdLine = Just l
-                                                    , cdVerbatim = CDataText
-                                                    , cdData = [c]
-                                                    }
-                            Nothing -> TokCRef x
-
-
-special :: LChar -> LString -> [Token]
-special _ ((_,'-') : (_,'-') : cs) = skip cs
-  where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds
-        skip (_ : ds) = skip ds
-        skip [] = [] -- unterminated comment
-
-special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[')
-         : cs) =
-  let (xs,ts) = cdata cs
-  in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs }
-                                                                  : tokens' ts
-  where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds)
-        cdata ((_,d) : ds)  = let (xs,ys) = cdata ds in (d:xs,ys)
-        cdata []        = ([],[])
-
-special c cs = 
-  let (xs,ts) = munch "" 0 cs
-  in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts
-  where munch acc nesting ((_,'>') : ds) 
-         | nesting == (0::Int) = ('>':acc,ds)
-	 | otherwise           = munch ('>':acc) (nesting-1) ds
-        munch acc nesting ((_,'<') : ds)
-	 = munch ('<':acc) (nesting+1) ds
-        munch acc n ((_,x) : ds) = munch (x:acc) n ds
-        munch acc _ [] = (acc,[]) -- unterminated DTD markup
-
--special c cs = tag (c : cs) -- invalid specials are processed as tags
-
-
-qualName           :: LString -> (QName,LString)
-qualName xs         = let (as,bs) = breakn endName xs
-                          (q,n)   = case break (':'==) as of
-                                      (q1,_:n1) -> (Just q1, n1)
-                                      _         -> (Nothing, as)
-                      in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs)
-  where endName x = isSpace x || x == '=' || x == '>' || x == '/'
-
-
-
-
-
-tag              :: LString -> [Token]
-tag ((p,'/') : cs)    = let (n,ds) = qualName (dropSpace cs)
-                        in TokEnd p n : case ds of
-                                          (_,'>') : es -> tokens' es
-                                          -- tag was not properly closed...
-                                          _        -> tokens' ds
-tag []            = []
-tag cs            = let (n,ds)  = qualName cs
-                        (as,b,ts) = attribs (dropSpace ds)
-                    in TokStart (fst (head cs)) n as b : ts
-
-attribs          :: LString -> ([Attr], Bool, [Token])
-attribs cs        = case cs of
-                      (_,'>') : ds -> ([], False, tokens' ds)
-
-                      (_,'/') : ds -> ([], True, case ds of
-                                              (_,'>') : es -> tokens' es
-                                              -- insert missing >  ...
-                                              _ -> tokens' ds)
-
-                      (_,'?') : (_,'>') : ds -> ([], True, tokens' ds)
-
-                      -- doc ended within a tag..
-                      []       -> ([],False,[])
-
-                      _        -> let (a,cs1) = attrib cs
-                                      (as,b,ts) = attribs cs1
-                                  in (a:as,b,ts)
-
-attrib             :: LString -> (Attr,LString)
-attrib cs           = let (ks,cs1)  = qualName cs
-                          (vs,cs2)  = attr_val (dropSpace cs1)
-                      in ((Attr ks (decode_attr vs)),dropSpace cs2)
-
-attr_val           :: LString -> (String,LString)
-attr_val ((_,'=') : cs) = string (dropSpace cs)
-attr_val cs         = ("",cs)
-
-
-dropSpace :: LString -> LString
-dropSpace = dropWhile (isSpace . snd)
-
-- | Match the value for an attribute.  For malformed XML we do
-- our best to guess the programmer's intention.
-string             :: LString -> (String,LString)
-string ((_,'"') : cs)   = break' ('"' ==) cs
-
-- Allow attributes to be enclosed between ' '.
-string ((_,'\'') : cs)  = break' ('\'' ==) cs
-
-- Allow attributes that are not enclosed by anything.
-string cs           = breakn eos cs
-  where eos x = isSpace x || x == '>' || x == '/'
-
-
-break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
-break' p xs         = let (as,bs) = breakn p xs
-                      in (as, case bs of
-                                [] -> []
-                                _ : cs -> cs)
-
-breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
-breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l
-
-
-
-decode_attr :: String -> String
-decode_attr cs = concatMap cvt (decode_text cs)
-  where cvt (TxtBit x) = x
-        cvt (CRefBit x) = case cref_to_char x of
-                            Just c -> [c]
-                            Nothing -> '&' : x ++ ";"
-
-data Txt = TxtBit String | CRefBit String deriving Show
-
-decode_text :: [Char] -> [Txt]
-decode_text xs@('&' : cs) = case break (';' ==) cs of
-                              (as,_:bs) -> CRefBit as : decode_text bs
-                              _ -> [TxtBit xs]
-decode_text []  = []
-decode_text cs  = let (as,bs) = break ('&' ==) cs
-                  in TxtBit as : decode_text bs
-
-cref_to_char :: [Char] -> Maybe Char
-cref_to_char cs = case cs of
-  '#' : ds  -> num_esc ds
-  "lt"      -> Just '<'
-  "gt"      -> Just '>'
-  "amp"     -> Just '&'
-  "apos"    -> Just '\''
-  "quot"    -> Just '"'
-  _         -> Nothing
-
-num_esc :: String -> Maybe Char
-num_esc cs = case cs of
-               'x' : ds -> check (readHex ds)
-               _        -> check (reads cs)
-
-  where check [(n,"")]  = cvt_char n
-        check _         = Nothing
-
-cvt_char :: Int -> Maybe Char
-cvt_char x
-  | fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char)
-                = Just (toEnum x)
-  | otherwise = Nothing
-
-preprocess :: String -> String
-preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs
-preprocess ('\r' : cs)        = '\n' : preprocess cs
-preprocess (c : cs)           = c : preprocess cs
-preprocess []                 = []
-
-linenumber :: Line -> String -> LString
-linenumber _ [] = []
-linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1
-linenumber n (c:s)    = (n,c) : linenumber n s
--- a/Text/XML/Light/Output.hs
+++ b/Text/XML/Light/Output.hs
@ -1,150 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module    : Text.XML.Light.Output
-- Copyright : (c) Galois, Inc. 2007
-- License   : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- Output handling for the lightweight XML lib.
--
-
-module Text.XML.Light.Output
-  ( showTopElement, showContent, showElement, showCData, showQName, showAttr
-  , ppTopElement, ppContent, ppElement
-  , tagEnd, xml_header
-  ) where
-
-import Text.XML.Light.Types
-import Data.Char
-import Data.List ( isPrefixOf )
-
-- | The XML 1.0 header
-xml_header :: String
-xml_header = "<?xml version='1.0' ?>"
-
-- | Pretty printing renders XML documents faithfully,
-- with the exception that whitespace may be added\/removed
-- in non-verbatim character data.
-ppTopElement       :: Element -> String
-ppTopElement e      = unlines [xml_header,ppElement e]
-
-- | Pretty printing elements
-ppElement          :: Element -> String
-ppElement e         = ppElementS "" e ""
-
-- | Pretty printing content
-ppContent          :: Content -> String
-ppContent x         = ppContentS "" x ""
-
-- | Pretty printing content using ShowS
-ppContentS         :: String -> Content -> ShowS
-ppContentS i x xs   = case x of
-                        Elem e -> ppElementS i e xs
-                        Text c -> ppCData i c xs
-                        CRef r -> showCRefS r xs
-
-ppElementS         :: String -> Element -> ShowS
-ppElementS i e xs   = i ++ (tagStart (elName e) (elAttribs e) $
-  case elContent e of
-    [] 
-     | not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs
-     | otherwise -> " ?>" ++ xs
-    [Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs)
-    cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs
-      where ppSub e1 = ppContentS ("  " ++ i) e1 . showChar '\n'
-  )
-
-ppCData            :: String -> CData -> ShowS
-ppCData i c xs      = i ++ if (cdVerbatim c /= CDataText )
-                              then showCDataS c xs
-                              else foldr cons xs (showCData c)
-
-  where cons         :: Char -> String -> String
-        cons '\n' ys  = "\n" ++ i ++ ys
-        cons y ys     = y : ys
-
-
-
--------------------------------------------------------------------------------
-- | Adds the <?xml?> header.
-showTopElement     :: Element -> String
-showTopElement c    = xml_header ++ showElement c
-
-showContent        :: Content -> String
-showContent c       = showContentS c ""
-
-showElement        :: Element -> String
-showElement c       = showElementS c ""
-
-showCData          :: CData -> String
-showCData c         = showCDataS c ""
-
-- Note: crefs should not contain '&', ';', etc.
-showCRefS          :: String -> ShowS
-showCRefS r xs      = '&' : r ++ ';' : xs
-
-- | Good for transmition (no extra white space etc.) but less readable.
-showContentS           :: Content -> ShowS
-showContentS (Elem e)   = showElementS e
-showContentS (Text cs)  = showCDataS cs
-showContentS (CRef cs)  = showCRefS cs
-
-- | Good for transmition (no extra white space etc.) but less readable.
-showElementS       :: Element -> ShowS
-showElementS e xs =
-  tagStart (elName e) (elAttribs e)
-    $ case elContent e of
-        [] -> " />" ++ xs
-        ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch
-
-- | Convert a text element to characters.
-showCDataS         :: CData -> ShowS
-showCDataS cd = 
- case cdVerbatim cd of
-   CDataText     -> escStr (cdData cd)
-   CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>"
-   CDataRaw      -> \ xs -> cdData cd ++ xs
-
--------------------------------------------------------------------------------
-escCData           :: String -> ShowS
-escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs
-escCData (c : cs)               = showChar c . escCData cs
-escCData []                     = id
-
-escChar            :: Char -> ShowS
-escChar c = case c of
-  '<'   -> showString "&lt;"
-  '>'   -> showString "&gt;"
-  '&'   -> showString "&amp;"
-  '"'   -> showString "&quot;"
-  '\''  -> showString "&apos;"
-  -- XXX: Is this really wortherd?
-  -- We could deal with these issues when we convert characters to bytes.
-  _ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c
-    | otherwise -> showString "&#" . shows oc . showChar ';'
-      where oc = ord c
-
-escStr             :: String -> ShowS
-escStr cs rs        = foldr escChar rs cs
-
-tagEnd             :: QName -> ShowS
-tagEnd qn rs        = '<':'/':showQName qn ++ '>':rs
-
-tagStart           :: QName -> [Attr] -> ShowS
-tagStart qn as rs   = '<':showQName qn ++ as_str ++ rs
- where as_str       = if null as then "" else ' ' : unwords (map showAttr as)
-
-showAttr           :: Attr -> String
-showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\""
-
-showQName          :: QName -> String
-showQName q         = pre ++ qName q
-  where pre = case qPrefix q of
-                Nothing -> ""
-                Just p  -> p ++ ":"
-
-
-
--- a/Text/XML/Light/Proc.hs
+++ b/Text/XML/Light/Proc.hs
@ -1,103 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module    : Text.XML.Light.Proc
-- Copyright : (c) Galois, Inc. 2007
-- License   : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
--------------------------------------------------------------------
-
-
-module Text.XML.Light.Proc where
-
-import Text.XML.Light.Types
-
-import Data.Maybe(listToMaybe)
-import Data.List(find)
-
-- | Get the text value of an XML element.  This function
-- ignores non-text elements, and concatenates all text elements.
-strContent         :: Element -> String
-strContent e        = concatMap cdData $ onlyText $ elContent e
-
-- | Select only the elements from a list of XML content.
-onlyElems          :: [Content] -> [Element]
-onlyElems xs        = [ x | Elem x <- xs ]
-
-- | Select only the elements from a parent.
-elChildren         :: Element -> [Element]
-elChildren e        = [ x | Elem x <- elContent e ]
-
-- | Select only the text from a list of XML content.
-onlyText           :: [Content] -> [CData]
-onlyText xs         = [ x | Text x <- xs ]
-
-- | Find all immediate children with the given name.
-findChildren       :: QName -> Element -> [Element]
-findChildren q e    = filterChildren ((q ==) . elName) e
-
-- | Filter all immediate children wrt a given predicate.
-filterChildren       :: (Element -> Bool) -> Element -> [Element]
-filterChildren p e    = filter p (onlyElems (elContent e))
-
-
-- | Filter all immediate children wrt a given predicate over their names.
-filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
-filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
-
-
-- | Find an immediate child with the given name.
-findChild          :: QName -> Element -> Maybe Element
-findChild q e       = listToMaybe (findChildren q e)
-
-- | Find an immediate child with the given name.
-filterChild          :: (Element -> Bool) -> Element -> Maybe Element
-filterChild p e       = listToMaybe (filterChildren p e)
-
-- | Find an immediate child with name matching a predicate.
-filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
-filterChildName p e   = listToMaybe (filterChildrenName p e)
-
-- | Find the left-most occurrence of an element matching given name.
-findElement        :: QName -> Element -> Maybe Element
-findElement q e     = listToMaybe (findElements q e)
-
-- | Filter the left-most occurrence of an element wrt. given predicate.
-filterElement        :: (Element -> Bool) -> Element -> Maybe Element
-filterElement p e     = listToMaybe (filterElements p e)
-
-- | Filter the left-most occurrence of an element wrt. given predicate.
-filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
-filterElementName p e  = listToMaybe (filterElementsName p e)
-
-- | Find all non-nested occurances of an element.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
-findElements       :: QName -> Element -> [Element]
-findElements qn e = filterElementsName (qn==) e
-
-- | Find all non-nested occurrences of an element wrt. given predicate.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
-filterElements       :: (Element -> Bool) -> Element -> [Element]
-filterElements p e
- | p e        = [e]
- | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
-
-- | Find all non-nested occurences of an element wrt a predicate over element names.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
-filterElementsName       :: (QName -> Bool) -> Element -> [Element]
-filterElementsName p e = filterElements (p.elName) e
-
-- | Lookup the value of an attribute.
-findAttr           :: QName -> Element -> Maybe String
-findAttr x e        = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e)
-
-- | Lookup attribute name from list.
-lookupAttr           :: QName -> [Attr] -> Maybe String
-lookupAttr x as        = attrVal `fmap` find ((x ==) . attrKey) as
-
--- a/Text/XML/Light/Types.hs
+++ b/Text/XML/Light/Types.hs
@ -1,91 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module    : Text.XML.Light.Types
-- Copyright : (c) Galois, Inc. 2007
-- License   : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- Basic XML types.
--
-
-module Text.XML.Light.Types where
-
-- | A line is an Integer
-type Line     = Integer
-
-- | XML content
-data Content  = Elem Element
-              | Text CData
-              | CRef String
-                deriving Show
-
-- | XML elements
-data Element  = Element {
-                  elName      :: QName,
-                  elAttribs   :: [Attr],
-                  elContent   :: [Content],
-                  elLine      :: Maybe Line
-                } deriving Show
-
-- | XML attributes
-data Attr     = Attr {
-                  attrKey :: QName,
-                  attrVal :: String
-                } deriving (Eq,Ord,Show)
-
-- | XML CData
-data CData    = CData {
-                  cdVerbatim  :: CDataKind,
-                  cdData      :: String,
-                  cdLine      :: Maybe Line
-                } deriving Show
-
-data CDataKind
- = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
- | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
- | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
-   deriving ( Eq, Show )
-
-- | XML qualified names
-data QName    = QName {
-                  qName   :: String,
-                  qURI    :: Maybe String,
-                  qPrefix :: Maybe String
-                } deriving Show
-
-
-instance Eq QName where
-  q1 == q2  = compare q1 q2 == EQ
-
-instance Ord QName where
-  compare q1 q2 =
-    case compare (qName q1) (qName q2) of
-      EQ  -> case (qURI q1, qURI q2) of
-               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
-               (u1,u2)           -> compare u1 u2
-      x   -> x
-
-
-- blank elements --------------------------------------------------------------
-
-- | Blank names
-blank_name :: QName
-blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing }
-
-- | Blank cdata
-blank_cdata :: CData
-blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing }
-
-- | Blank elements
-blank_element :: Element
-blank_element = Element
-                  { elName    = blank_name
-                  , elAttribs = []
-                  , elContent = []
-                  , elLine    = Nothing
-                  }
-
-
--- a/debian/control
+++ b/debian/control
@ -2,7 +2,7 @@ Source: pandoc
 Section: text
 Priority: optional
 Maintainer: Recai Oktaş <roktas@debian.org>
-Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip
+Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev
 Build-Depends-Indep: haddock
 Standards-Version: 3.7.3
 Homepage: http://johnmacfarlane.net/pandoc/
@ -12,7 +12,7 @@ XS-Vcs-Browser: http://pandoc.googlecode.com/svn/trunk
 Package: pandoc
 Section: text
 Architecture: any
-Depends: ${shlibs:Depends}, zip, texlive-latex-recommended | tetex-extra
+Depends: ${shlibs:Depends}, texlive-latex-recommended | tetex-extra
 Suggests: tidy, wget | w3m
 Description: general markup converter
 Pandoc is a Haskell library for converting from one markup format to
@ -38,7 +38,7 @@ Description: general markup converter
 Package: libghc6-pandoc-dev
 Section: libdevel
 Architecture: any
-Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip, texlive-latex-recommended | tetex-extra
+Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, texlive-latex-recommended | tetex-extra
 Suggests: pandoc-doc
 Description: general markup converter
 Pandoc is a Haskell library for converting from one markup format to
--- a/debian/copyright
+++ b/debian/copyright
@ -57,68 +57,6 @@ by Eric A. Meyer

 Released under an explicit Public Domain License

----------------------------------------------------------------------
-System/IO/UTF8.hs and Codec/Binary/UTF8/String.hs
-from the utf8-string package on HackageDB
-Copyright (c) 2007, Galois Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of Galois Inc. nor the
-      names of its contributors may be used to endorse or promote products
-      derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY Galois Inc. ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL Galois Inc. BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
----------------------------------------------------------------------
-Text/XML/Light/*
-from the xml package on HackageDB
-(c) 2007 Galois Inc.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the author nor the names of his contributors
-may be used to endorse or promote products derived from this software
-without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
-OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
 ------------------------------------------------------------------------
 Diff.hs in tests/
 from the Diff package v 0.1.2 (Data.Algorithm.Diff)
--- a/odt-styles/content.xml
+++ b/odt-styles/content.xml
@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-
-<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.0">
-  <office:scripts />
-  <office:font-face-decls>
-    <style:font-face style:name="&amp;apos;Lucida Sans Unicode&amp;apos;" svg:font-family="Lucida Sans Unicode" />
-    <style:font-face style:name="&amp;apos;Tahoma&amp;apos;" svg:font-family="Tahoma" />
-    <style:font-face style:name="&amp;apos;Times New Roman&amp;apos;" svg:font-family="Times New Roman" />
-  </office:font-face-decls>
-  <office:automatic-styles>
-  </office:automatic-styles>
-  <office:body>
-    <office:text>
-    </office:text>
-  </office:body>
-  
-</office:document-content>
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -38,12 +38,12 @@ Extra-Source-Files:
                 -- documentation
                 README, INSTALL, COPYRIGHT, COPYING, changelog,
                 -- sources for man pages
-                 man/man1/pandoc.1.md,
+                 man/man1/pandoc.1.md, man/man1/markdown2pdf.1.md,
                 man/man1/html2markdown.1.md, man/man1/hsmarkdown.1.md,
                 -- Makefile
                 Makefile,
                 -- wrappers
-                 html2markdown, hsmarkdown,
+                 markdown2pdf, html2markdown, hsmarkdown,
                 -- data for DefaultHeaders.hs
                 data/headers/ConTeXt.header,
                 data/headers/Docbook.header,
@ -65,13 +65,11 @@ Extra-Source-Files:
                 data/ui/default/print.css,
                 -- data for ODT writer
                 odt-styles/meta.xml,
-                 odt-styles/content.xml,
                 odt-styles/settings.xml,
                 odt-styles/META-INF/manifest.xml,
                 odt-styles/Thumbnails/thumbnail.png,
                 odt-styles/styles.xml,
                 odt-styles/mimetype,
-                 odt-styles/reference.odt,
                 -- tests
                 tests/bodybg.gif,
                 tests/writer.latex,
@ -151,7 +149,7 @@ Library
    Exposed-Modules: Text.Pandoc.Biblio
    cpp-options:   -D_CITEPROC
  Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory,
-                 template-haskell, bytestring
+                 template-haskell, bytestring, zip-archive, utf8-string, old-time
  Hs-Source-Dirs:  .
  Exposed-Modules: Text.Pandoc,
                   Text.Pandoc.Blocks,
@ -180,17 +178,7 @@ Library
                   Text.Pandoc.Writers.RTF,
                   Text.Pandoc.Writers.S5
  Other-Modules:   Text.Pandoc.XML,
-                   Text.Pandoc.TH,
-                   -- from xml package
-                   Text.XML.Light,
-                   Text.XML.Light.Types,
-                   Text.XML.Light.Output,
-                   Text.XML.Light.Input,
-                   Text.XML.Light.Proc,
-                   Text.XML.Light.Cursor,
-                   -- from utf8-string package
-                   System.IO.UTF8,
-                   Codec.Binary.UTF8.String
+                   Text.Pandoc.TH
  Extensions:      CPP, TemplateHaskell, FlexibleInstances
  Ghc-Options:     -O2 -Wall -threaded
  Ghc-Prof-Options: -auto-all