Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.

+ Removed utf8-string and xml-light modules, and unneeded content.xml.
+ Removed code for building reference.odt from Setup.hs.
  The ODT is now built using template haskell in Text.Pandoc.ODT.
+ Removed copyright statements for utf8-string and xml modules,
  since they are no longer included in the source.
+ README: Removed claim that 'zip' is needed for ODT production.
+ Removed dependency on 'zip' from debian/control.
+ Text.Pandoc.Shared: Removed withTempDir, added inDirectory.
+ Added makeZip to Text.Pandoc.TH.
+ pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string.
  Added markdown2pdf files to extra-sources list.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2008-09-04 02:51:28 +00:00
parent 9b7ec2d366
commit 4dca8f6e75
17 changed files with 75 additions and 1537 deletions

View file

@ -1,97 +0,0 @@
--
-- |
-- Module : Codec.Binary.UTF8.String
-- Copyright : (c) Eric Mertens 2007
-- License : BSD3-style (see LICENSE)
--
-- Maintainer: emertens@galois.com
-- Stability : experimental
-- Portability : portable
--
-- Support for encoding UTF8 Strings to and from @[Word8]@
--
module Codec.Binary.UTF8.String (
encode
, decode
, encodeString
, decodeString
) where
import Data.Word (Word8)
import Data.Bits ((.|.),(.&.),shiftL,shiftR)
import Data.Char (chr,ord)
default(Int)
-- | Encode a string using 'encode' and store the result in a 'String'.
encodeString :: String -> String
encodeString xs = map (toEnum . fromEnum) (encode xs)
-- | Decode a string using 'decode' using a 'String' as input.
-- | This is not safe but it is necessary if UTF-8 encoded text
-- | has been loaded into a 'String' prior to being decoded.
decodeString :: String -> String
decodeString xs = decode (map (toEnum . fromEnum) xs)
replacement_character :: Char
replacement_character = '\xfffd'
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
encode :: String -> [Word8]
encode = concatMap (map fromIntegral . go . ord)
where
go oc
| oc <= 0x7f = [oc]
| oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6)
, 0x80 + oc .&. 0x3f
]
| oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12)
, 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
]
| otherwise = [ 0xf0 + (oc `shiftR` 18)
, 0x80 + ((oc `shiftR` 12) .&. 0x3f)
, 0x80 + ((oc `shiftR` 6) .&. 0x3f)
, 0x80 + oc .&. 0x3f
]
--
-- | Decode a UTF8 string packed into a list of Word8 values, directly to String
--
decode :: [Word8] -> String
decode [ ] = ""
decode (c:cs)
| c < 0x80 = chr (fromEnum c) : decode cs
| c < 0xc0 = replacement_character : decode cs
| c < 0xe0 = multi1
| c < 0xf0 = multi_byte 2 0xf 0x800
| c < 0xf8 = multi_byte 3 0x7 0x10000
| c < 0xfc = multi_byte 4 0x3 0x200000
| c < 0xfe = multi_byte 5 0x1 0x4000000
| otherwise = replacement_character : decode cs
where
multi1 = case cs of
c1 : ds | c1 .&. 0xc0 == 0x80 ->
let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
in if d >= 0x000080 then toEnum d : decode ds
else replacement_character : decode ds
_ -> replacement_character : decode cs
multi_byte :: Int -> Word8 -> Int -> [Char]
multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
where
aux 0 rs acc
| overlong <= acc && acc <= 0x10ffff &&
(acc < 0xd800 || 0xdfff < acc) &&
(acc < 0xfffe || 0xffff < acc) = chr acc : decode rs
| otherwise = replacement_character : decode rs
aux n (r:rs) acc
| r .&. 0xc0 == 0x80 = aux (n-1) rs
$ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
aux _ rs _ = replacement_character : decode rs

5
README
View file

@ -87,11 +87,6 @@ Supported output formats include `markdown`, `latex`, `context`
Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5`
(which produces an HTML file that acts like powerpoint).
For `odt` output, you must have `zip` in the path. If you
don't have it installed, you can get the free [Info-ZIP].
[Info-ZIP]: http://www.info-zip.org/Zip.html
Supported input formats include `markdown`, `html`, `latex`, and `rst`.
Note that the `rst` reader only parses a subset of reStructuredText
syntax. For example, it doesn't handle tables, option lists, or

View file

@ -12,7 +12,6 @@ import Data.Maybe ( fromJust, isNothing, catMaybes )
main = defaultMainWithHooks $
simpleUserHooks { runTests = runTestSuite
, preConf = checkReferenceODT
, postBuild = makeManPages }
-- | Run test suite.
@ -20,34 +19,6 @@ runTestSuite _ _ _ _ = do
inDirectory "tests" $ runCommand "runhaskell -i.. RunTests.hs" >>= waitForProcess
return ()
-- | If reference.odt needs rebuilding, build it.
checkReferenceODT _ _ = inDirectory "odt-styles" $ do
let refodt = "reference.odt"
let deps = [ "meta.xml", "content.xml", "settings.xml", "META-INF/manifest.xml",
"Thumbnails/thumbnail.png", "styles.xml", "mimetype" ]
modifiedDeps <- modifiedDependencies refodt deps
if null modifiedDeps
then return ()
else makeReferenceODT modifiedDeps
return emptyHookedBuildInfo
-- | Create reference.odt by zipping up sources in odt-styles directory.
makeReferenceODT :: [FilePath] -> IO ()
makeReferenceODT sources = do
zipPathMaybe <- findExecutable "zip"
if isNothing zipPathMaybe
then error $ "The 'zip' command, which is needed to build reference.odt\n" ++
"from sources in the odt-styles directory, was not found.\n" ++
"Try again after installing zip (http://www.info-zip.org/Zip.html).\n" ++
"Or use the pandoc source tarball, which contains a prebuilt reference.odt."
else do
putStrLn "Creating reference.odt:"
ec <- runProcess (fromJust zipPathMaybe) (["-9", "-r", "reference.odt"] ++ sources)
Nothing Nothing Nothing Nothing (Just stderr) >>= waitForProcess
case ec of
ExitSuccess -> return ()
_ -> error "Error creating ODT."
-- | Build man pages from markdown sources in man/man1/.
makeManPages _ _ _ _ = do
mapM makeManPage ["pandoc.1", "hsmarkdown.1", "html2markdown.1", "markdown2pdf.1"]

View file

@ -1,118 +0,0 @@
-----------------------------------------------------------------------------
-- |
-- Module : System.IO.UTF8
-- Copyright : (c) Eric Mertens 2007
-- License : BSD3-style (see LICENSE)
--
-- Maintainer: emertens@galois.com
-- Stability : experimental
-- Portability : portable
--
-- String IO preserving UTF8 encoding.
--
module System.IO.UTF8 (
print
, putStr
, putStrLn
, getLine
, readLn
, readFile
, writeFile
, appendFile
, getContents
, hGetLine
, hGetContents
, hPutStr
, hPutStrLn
) where
import Control.Monad (liftM)
import Data.Char (ord, chr)
import Data.Word (Word8)
import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read,
Show(..))
import System.IO (Handle, IO, FilePath)
import qualified System.IO as IO
import Codec.Binary.UTF8.String (encode, decode)
-- | Encode a string in UTF8 form.
encodeString :: String -> String
encodeString xs = bytesToString (encode xs)
-- | Decode a string from UTF8
decodeString :: String -> String
decodeString xs = decode (stringToBytes xs)
-- | Convert a list of bytes to a String
bytesToString :: [Word8] -> String
bytesToString xs = map (chr . fromEnum) xs
-- | String to list of bytes.
stringToBytes :: String -> [Word8]
stringToBytes xs = map (toEnum . ord) xs
-- | The 'print' function outputs a value of any printable type to the
-- standard output device. This function differs from the
-- System.IO.print in that it preserves any UTF8 encoding of the shown value.
--
print :: Show a => a -> IO ()
print x = putStrLn (show x)
-- | Write a UTF8 string to the standard output device
putStr :: String -> IO ()
putStr x = IO.putStr (encodeString x)
-- | The same as 'putStr', but adds a newline character.
putStrLn :: String -> IO ()
putStrLn x = IO.putStrLn (encodeString x)
-- | Read a UTF8 line from the standard input device
getLine :: IO String
getLine = liftM decodeString IO.getLine
-- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8
readLn :: Read a => IO a
readLn = IO.readIO =<< getLine
-- | The 'readFile' function reads a file and
-- returns the contents of the file as a UTF8 string.
-- The file is read lazily, on demand, as with 'getContents'.
readFile :: FilePath -> IO String
readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>=
IO.hGetContents)
-- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@,
-- to the file @file@.
writeFile :: FilePath -> String -> IO ()
writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h ->
IO.hPutStr h $ encodeString c
-- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@,
-- to the file @file@.
appendFile :: FilePath -> String -> IO ()
appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h ->
IO.hPutStr h $ encodeString c
-- | Read a UTF8 line from a Handle
hGetLine :: Handle -> IO String
hGetLine h = liftM decodeString $ IO.hGetLine h
-- | Lazily read a UTF8 string from a Handle
hGetContents :: Handle -> IO String
hGetContents h = liftM decodeString (IO.hGetContents h)
-- | Write a UTF8 string to a Handle.
hPutStr :: Handle -> String -> IO ()
hPutStr h s = IO.hPutStr h (encodeString s)
-- | Write a UTF8 string to a Handle, appending a newline.
hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = IO.hPutStrLn h (encodeString s)
-- | Lazily read stdin as a UTF8 string.
getContents :: IO String
getContents = liftM decodeString IO.getContents

View file

@ -29,22 +29,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Functions for producing an ODT file from OpenDocument XML.
-}
module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
import Text.Pandoc.TH ( binaryContentsOf )
import Data.Maybe ( fromJust )
import Data.List ( partition, intersperse )
import System.Directory
import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
import System.Process ( runProcess, waitForProcess )
import System.Exit
import Text.XML.Light
import Text.XML.Light.Cursor
import Text.Pandoc.Shared ( withTempDir )
import Network.URI ( isURI )
import qualified Data.ByteString as B ( writeFile, pack )
import Data.ByteString.Internal ( c2w )
import Text.Pandoc.TH ( makeZip )
import Data.List ( find )
import System.FilePath ( (</>), takeFileName )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Prelude hiding ( writeFile, readFile )
import System.IO ( stderr )
import System.IO.UTF8
import Codec.Archive.Zip
import Control.Applicative ( (<$>) )
import Text.ParserCombinators.Parsec
import System.Time
-- | Produce an ODT file from OpenDocument XML.
saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
@ -52,89 +46,43 @@ saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
-> String -- ^ OpenDocument XML contents.
-> IO ()
saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
let zipCmd = "zip"
-- check for zip in path:
zipPathMaybe <- findExecutable zipCmd
let zipPath = case zipPathMaybe of
Nothing -> error $ "The '" ++ zipCmd ++
"' command, which is needed to build an ODT file, was not found.\n" ++
"It can be obtained from http://www.info-zip.org/Zip.html\n" ++
"Debian (and Debian-based) linux: apt-get install zip\n" ++
"Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
Just x -> x
withTempDir "pandoc-odt" $ \tempDir -> do
let tempODT = tempDir </> "reference.odt"
B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt")
xml' <- handlePictures tempODT sourceDirRelative xml
writeFile (tempDir </> "content.xml") xml'
ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"]
(Just tempDir) Nothing Nothing Nothing (Just stderr)
ec <- waitForProcess ph -- requires compilation with -threaded
case ec of
ExitSuccess -> copyFile tempODT destinationODTPath
_ -> error "Error creating ODT." >> exitWith ec
let refArchive = read $(makeZip "odt-styles")
-- handle pictures
let (newContents, pics) =
case runParser pPictures [] "OpenDocument XML contents" xml of
Left err -> error $ show err
Right x -> x
picEntries <- mapM (makePictureEntry sourceDirRelative) pics
(TOD epochTime _) <- getClockTime
let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
B.writeFile destinationODTPath $ fromArchive archive
-- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
-- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes
-- to point to the new locations in Pictures/. Return modified XML.
handlePictures :: FilePath -- ^ Path of ODT file in temp directory
-> FilePath -- ^ Directory (relative) containing source file
-> String -- ^ OpenDocument XML string
-> IO String -- ^ Modified XML
handlePictures tempODT sourceDirRelative xml = do
let parsed = case parseXMLDoc xml of
Nothing -> error "Could not parse OpenDocument XML."
Just x -> x
let cursor = case (fromForest $ elContent parsed) of
Nothing -> error "ODT appears empty"
Just x -> x
cursor' <- scanPictures tempODT sourceDirRelative cursor
let modified = parsed { elContent = toForest $ root cursor' }
return $ showTopElement modified
makePictureEntry :: FilePath -- ^ Relative directory of source file
-> (FilePath, String) -- ^ Path and new path of picture
-> IO Entry
makePictureEntry sourceDirRelative (path, newPath) = do
entry <- readEntry [] $ sourceDirRelative </> path
return (entry { eRelativePath = newPath })
scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
scanPictures tempODT sourceDirRelative cursor = do
cursor' <- handleTree tempODT sourceDirRelative cursor
case right cursor' of
Just n -> scanPictures tempODT sourceDirRelative n
Nothing -> return cursor'
handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
handleTree tempODT sourceDirRelative cursor = do
case firstChild cursor of
Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
-- | If content is an image link, handle it appropriately.
-- Otherwise, handle children if any.
handleContent :: FilePath -> FilePath -> Content -> IO Content
handleContent tempODT sourceDirRelative content@(Elem el) = do
if qName (elName el) == "image"
then do
let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
let href = case hrefs of
[] -> error $ "No href found in " ++ show el
[x] -> x
_ -> error $ "Multiple hrefs found in " ++ show el
if isURI $ attrVal href
then return content
else do -- treat as filename
let oldLoc = sourceDirRelative </> attrVal href
fileExists <- doesFileExist oldLoc
if fileExists
then do
let pref = take 230 $ concat $ intersperse "_" $
splitDirectories $ takeDirectory $ attrVal href
let picName = pref ++ "_" ++ (takeFileName $ attrVal href)
let tempDir = takeDirectory tempODT
createDirectoryIfMissing False $ tempDir </> "Pictures"
copyFile oldLoc $ tempDir </> "Pictures" </> picName
let newAttrs = (href { attrVal = "Pictures/" ++ picName }) : rest
return $ Elem (el { elAttribs = newAttrs })
else do
hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring."
return content
else return content
handleContent _ _ c = return c -- not Element
pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
pPictures = do
contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
pics <- getState
return (contents, pics)
pPicture :: GenParser Char [(FilePath, String)] [Char]
pPicture = try $ do
string "<draw:image xlink:href=\""
path <- manyTill anyChar (char '"')
let filename = takeFileName path
pics <- getState
newPath <- case find (\(o, _) -> o == path) pics of
Just (_, new) -> return new
Nothing -> do
-- get a unique name
let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics
let new = "Pictures/" ++ replicate dups '0' ++ filename
updateState ((path, new) :)
return new
return $ "<draw:image xlink:href=\"" ++ newPath ++ "\""

View file

@ -101,7 +101,7 @@ module Text.Pandoc.Shared (
WriterOptions (..),
defaultWriterOptions,
-- * File handling
withTempDir
inDirectory
) where
import Text.Pandoc.Definition
@ -112,10 +112,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
import Data.List ( find, isPrefixOf )
import Control.Monad ( join )
import Control.Exception ( bracket )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
import System.FilePath ( (</>), (<.>) )
import System.IO.Error ( catch, ioError, isAlreadyExistsError )
import System.Directory
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import System.IO.UTF8
@ -920,16 +917,11 @@ defaultWriterOptions =
-- File handling
--
-- | Perform a function in a temporary directory and clean up.
withTempDir :: FilePath -> (FilePath -> IO a) -> IO a
withTempDir baseName = bracket (createTempDir 0 baseName) (removeDirectoryRecursive)
-- | Create a temporary directory with a unique name.
createTempDir :: Integer -> FilePath -> IO FilePath
createTempDir num baseName = do
sysTempDir <- getTemporaryDirectory
let dirName = sysTempDir </> baseName <.> show num
catch (createDirectory dirName >> return dirName) $
\e -> if isAlreadyExistsError e
then createTempDir (num + 1) baseName
else ioError e
-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = do
oldDir <- getCurrentDirectory
setCurrentDirectory path
result <- action
setCurrentDirectory oldDir
return result

View file

@ -30,7 +30,8 @@ Template haskell functions used by Pandoc modules.
-}
module Text.Pandoc.TH (
contentsOf,
binaryContentsOf
binaryContentsOf,
makeZip
) where
import Language.Haskell.TH
@ -39,6 +40,8 @@ import qualified Data.ByteString as B
import Data.ByteString.Internal ( w2c )
import Prelude hiding ( readFile )
import System.IO.UTF8
import Codec.Archive.Zip
import Text.Pandoc.Shared ( inDirectory )
-- | Insert contents of text file into a template.
contentsOf :: FilePath -> ExpQ
@ -51,3 +54,12 @@ binaryContentsOf p = lift =<< (runIO $ B.readFile p)
instance Lift B.ByteString where
lift x = return (LitE (StringL $ map w2c $ B.unpack x))
instance Lift Archive where
lift x = return (LitE (StringL $ show x ))
-- | Construct zip file from files in a directory, and
-- insert into a template.
makeZip :: FilePath -> ExpQ
makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."])

View file

@ -1,96 +0,0 @@
{-# LANGUAGE FlexibleInstances #-}
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability: portability
--
-- A lightweight XML parsing, filtering and generating library.
--
-- This module reexports functions from:
--
-- * "Text.XML.Light.Types"
--
-- * "Text.XML.Light.Proc"
--
-- * "Text.XML.Light.Input"
--
-- * "Text.XML.Light.Output"
--
module Text.XML.Light (
module Text.XML.Light,
module Text.XML.Light.Types,
module Text.XML.Light.Proc,
module Text.XML.Light.Input,
module Text.XML.Light.Output
) where
import Text.XML.Light.Types
import Text.XML.Light.Proc
import Text.XML.Light.Input
import Text.XML.Light.Output
-- | Add an attribute to an element.
add_attr :: Attr -> Element -> Element
add_attr a e = add_attrs [a] e
-- | Add some attributes to an element.
add_attrs :: [Attr] -> Element -> Element
add_attrs as e = e { elAttribs = as ++ elAttribs e }
-- | Create an unqualified name.
unqual :: String -> QName
unqual x = blank_name { qName = x }
-- | A smart element constructor which uses the type of its argument
-- to determine what sort of element to make.
class Node t where
node :: QName -> t -> Element
instance Node ([Attr],[Content]) where
node n (attrs,cont) = blank_element { elName = n
, elAttribs = attrs
, elContent = cont
}
instance Node [Attr] where node n as = node n (as,[]::[Content])
instance Node Attr where node n a = node n [a]
instance Node () where node n () = node n ([]::[Attr])
instance Node [Content] where node n cs = node n ([]::[Attr],cs)
instance Node Content where node n c = node n [c]
instance Node ([Attr],Content) where node n (as,c) = node n (as,[c])
instance Node (Attr,Content) where node n (a,c) = node n ([a],[c])
instance Node ([Attr],[Element]) where
node n (as,cs) = node n (as,map Elem cs)
instance Node ([Attr],Element) where node n (as,c) = node n (as,[c])
instance Node (Attr,Element) where node n (a,c) = node n ([a],c)
instance Node ([Element]) where node n es = node n ([]::[Attr],es)
instance Node (Element) where node n e = node n [e]
instance Node ([Attr],[CData]) where
node n (as,cs) = node n (as,map Text cs)
instance Node ([Attr],CData) where node n (as,c) = node n (as,[c])
instance Node (Attr,CData) where node n (a,c) = node n ([a],c)
instance Node [CData] where node n es = node n ([]::[Attr],es)
instance Node CData where node n e = node n [e]
instance Node ([Attr],String) where
node n (as,t) = node n (as,blank_cdata { cdData = t })
instance Node (Attr,String) where node n (a,t) = node n ([a],t)
instance Node [Char] where node n t = node n ([]::[Attr],t)
-- | Create node with unqualified name
unode :: Node t => String -> t -> Element
unode = node . unqual

View file

@ -1,327 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Cursor
-- Copyright : (c) Galois, Inc. 2008
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- XML cursors for working XML content withing the context of
-- an XML document. This implemntation is based on the general
-- tree zipper written by Krasimir Angelov and Iavor S. Diatchki.
--
module Text.XML.Light.Cursor
( Tag(..), getTag, setTag, fromTag
, Cursor(..), Path
-- * Conversions
, fromContent
, fromElement
, fromForest
, toForest
, toTree
-- * Moving around
, parent
, root
, getChild
, firstChild
, lastChild
, left
, right
-- ** Searching
, findChild
, findLeft
, findRight
-- * Node classification
, isRoot
, isFirst
, isLast
, isLeaf
, isChild
, hasChildren
, getNodeIndex
-- * Updates
, setContent
, modifyContent
, modifyContentM
-- ** Inserting content
, insertLeft
, insertRight
, insertGoLeft
, insertGoRight
-- ** Removing content
, removeLeft
, removeRight
, removeGoLeft
, removeGoRight
, removeGoUp
) where
import Text.XML.Light.Types
import Data.Maybe(isNothing)
data Tag = Tag { tagName :: QName
, tagAttribs :: [Attr]
, tagLine :: Maybe Line
} deriving (Show)
getTag :: Element -> Tag
getTag e = Tag { tagName = elName e
, tagAttribs = elAttribs e
, tagLine = elLine e
}
setTag :: Tag -> Element -> Element
setTag t e = fromTag t (elContent e)
fromTag :: Tag -> [Content] -> Element
fromTag t cs = Element { elName = tagName t
, elAttribs = tagAttribs t
, elLine = tagLine t
, elContent = cs
}
type Path = [([Content],Tag,[Content])]
-- | The position of a piece of content in an XML document.
data Cursor = Cur
{ current :: Content -- ^ The currently selected content.
, lefts :: [Content] -- ^ Siblings on the left, closest first.
, rights :: [Content] -- ^ Siblings on the right, closest first.
, parents :: Path -- ^ The contexts of the parent elements of this location.
} deriving (Show)
-- Moving around ---------------------------------------------------------------
-- | The parent of the given location.
parent :: Cursor -> Maybe Cursor
parent loc =
case parents loc of
(pls,v,prs) : ps -> Just
Cur { current = Elem
(fromTag v
(combChildren (lefts loc) (current loc) (rights loc)))
, lefts = pls, rights = prs, parents = ps
}
[] -> Nothing
-- | The top-most parent of the given location.
root :: Cursor -> Cursor
root loc = maybe loc root (parent loc)
-- | The left sibling of the given location.
left :: Cursor -> Maybe Cursor
left loc =
case lefts loc of
t : ts -> Just loc { current = t, lefts = ts
, rights = current loc : rights loc }
[] -> Nothing
-- | The right sibling of the given location.
right :: Cursor -> Maybe Cursor
right loc =
case rights loc of
t : ts -> Just loc { current = t, lefts = current loc : lefts loc
, rights = ts }
[] -> Nothing
-- | The first child of the given location.
firstChild :: Cursor -> Maybe Cursor
firstChild loc =
do (t : ts, ps) <- downParents loc
return Cur { current = t, lefts = [], rights = ts , parents = ps }
-- | The last child of the given location.
lastChild :: Cursor -> Maybe Cursor
lastChild loc =
do (ts, ps) <- downParents loc
case reverse ts of
l : ls -> return Cur { current = l, lefts = ls, rights = []
, parents = ps }
[] -> Nothing
-- | Find the next left sibling that satisfies a predicate.
findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
findLeft p loc = do loc1 <- left loc
if p loc1 then return loc1 else findLeft p loc1
-- | Find the next right sibling that satisfies a predicate.
findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
findRight p loc = do loc1 <- right loc
if p loc1 then return loc1 else findRight p loc1
-- | The first child that satisfies a predicate.
findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
findChild p loc =
do loc1 <- firstChild loc
if p loc1 then return loc1 else findRight p loc1
-- | The child with the given index (starting from 0).
getChild :: Int -> Cursor -> Maybe Cursor
getChild n loc =
do (ts,ps) <- downParents loc
(ls,t,rs) <- splitChildren ts n
return Cur { current = t, lefts = ls, rights = rs, parents = ps }
-- | private: computes the parent for "down" operations.
downParents :: Cursor -> Maybe ([Content], Path)
downParents loc =
case current loc of
Elem e -> Just ( elContent e
, (lefts loc, getTag e, rights loc) : parents loc
)
_ -> Nothing
-- Conversions -----------------------------------------------------------------
-- | A cursor for the guven content.
fromContent :: Content -> Cursor
fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
-- | A cursor for the guven element.
fromElement :: Element -> Cursor
fromElement e = fromContent (Elem e)
-- | The location of the first tree in a forest.
fromForest :: [Content] -> Maybe Cursor
fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
, parents = [] }
fromForest [] = Nothing
-- | Computes the tree containing this location.
toTree :: Cursor -> Content
toTree loc = current (root loc)
-- | Computes the forest containing this location.
toForest :: Cursor -> [Content]
toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
-- Queries ---------------------------------------------------------------------
-- | Are we at the top of the document?
isRoot :: Cursor -> Bool
isRoot loc = null (parents loc)
-- | Are we at the left end of the the document?
isFirst :: Cursor -> Bool
isFirst loc = null (lefts loc)
-- | Are we at the right end of the document?
isLast :: Cursor -> Bool
isLast loc = null (rights loc)
-- | Are we at the bottom of the document?
isLeaf :: Cursor -> Bool
isLeaf loc = isNothing (downParents loc)
-- | Do we have a parent?
isChild :: Cursor -> Bool
isChild loc = not (isRoot loc)
-- | Get the node index inside the sequence of children
getNodeIndex :: Cursor -> Int
getNodeIndex loc = length (lefts loc)
-- | Do we have children?
hasChildren :: Cursor -> Bool
hasChildren loc = not (isLeaf loc)
-- Updates ---------------------------------------------------------------------
-- | Change the current content.
setContent :: Content -> Cursor -> Cursor
setContent t loc = loc { current = t }
-- | Modify the current content.
modifyContent :: (Content -> Content) -> Cursor -> Cursor
modifyContent f loc = setContent (f (current loc)) loc
-- | Modify the current content, allowing for an effect.
modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
modifyContentM f loc = do x <- f (current loc)
return (setContent x loc)
-- | Insert content to the left of the current position.
insertLeft :: Content -> Cursor -> Cursor
insertLeft t loc = loc { lefts = t : lefts loc }
-- | Insert content to the right of the current position.
insertRight :: Content -> Cursor -> Cursor
insertRight t loc = loc { rights = t : rights loc }
-- | Remove the conent on the left of the current position, if any.
removeLeft :: Cursor -> Maybe (Content,Cursor)
removeLeft loc = case lefts loc of
l : ls -> return (l,loc { lefts = ls })
[] -> Nothing
-- | Remove the conent on the right of the current position, if any.
removeRight :: Cursor -> Maybe (Content,Cursor)
removeRight loc = case rights loc of
l : ls -> return (l,loc { rights = ls })
[] -> Nothing
-- | Insert content to the left of the current position.
-- The new content becomes the current position.
insertGoLeft :: Content -> Cursor -> Cursor
insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
-- | Insert content to the right of the current position.
-- The new content becomes the current position.
insertGoRight :: Content -> Cursor -> Cursor
insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
-- | Remove the current element.
-- The new position is the one on the left.
removeGoLeft :: Cursor -> Maybe Cursor
removeGoLeft loc = case lefts loc of
l : ls -> Just loc { current = l, lefts = ls }
[] -> Nothing
-- | Remove the current element.
-- The new position is the one on the right.
removeGoRight :: Cursor -> Maybe Cursor
removeGoRight loc = case rights loc of
l : ls -> Just loc { current = l, rights = ls }
[] -> Nothing
-- | Remove the current element.
-- The new position is the parent of the old position.
removeGoUp :: Cursor -> Maybe Cursor
removeGoUp loc =
case parents loc of
(pls,v,prs) : ps -> Just
Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
, lefts = pls, rights = prs, parents = ps
}
[] -> Nothing
-- | private: Gets the given element of a list.
-- Also returns the preceeding elements (reversed) and the folloing elements.
splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
splitChildren _ n | n < 0 = Nothing
splitChildren cs pos = loop [] cs pos
where loop acc (x:xs) 0 = Just (acc,x,xs)
loop acc (x:xs) n = loop (x:acc) xs $! n-1
loop _ _ _ = Nothing
-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
combChildren :: [a] -> a -> [a] -> [a]
combChildren ls t rs = foldl (flip (:)) (t:rs) ls

View file

@ -1,307 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Input
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability: portable
--
-- Lightweight XML parsing
--
module Text.XML.Light.Input (parseXML,parseXMLDoc) where
import Text.XML.Light.Types
import Text.XML.Light.Proc
import Text.XML.Light.Output(tagEnd)
import Data.Char(isSpace)
import Data.List(isPrefixOf)
import Numeric(readHex)
-- | parseXMLDoc, parse a XMLl document to maybe an element
parseXMLDoc :: String -> Maybe Element
parseXMLDoc xs = strip (parseXML xs)
where strip cs = case onlyElems cs of
e : es
| "?xml" `isPrefixOf` qName (elName e)
-> strip (map Elem es)
| otherwise -> Just e
_ -> Nothing
-- | parseXML to a list of content chunks
parseXML :: String -> [Content]
parseXML xs = parse $ tokens $ preprocess xs
------------------------------------------------------------------------
parse :: [Token] -> [Content]
parse [] = []
parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts
in es ++ parse ts1
-- Information about namespaces.
-- The first component is a map that associates prefixes to URIs,
-- the second is the URI for the default namespace, if one was provided.
type NSInfo = ([(String,String)],Maybe String)
nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token])
nodes ns ps (TokCRef ref : ts) =
let (es,qs,ts1) = nodes ns ps ts
in (CRef ref : es, qs, ts1)
nodes ns ps (TokText txt : ts) =
let (es,qs,ts1) = nodes ns ps ts
(more,es1) = case es of
Text cd : es1'
| cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1')
_ -> ([],es)
in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1)
nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks)
where
new_name = annotName new_info t
new_info = foldr addNS cur_info as
node = Elem Element { elLine = Just p
, elName = new_name
, elAttribs = map (annotAttr new_info) as
, elContent = children
}
(children,(siblings,open,toks))
| empty = ([], nodes cur_info ps ts)
| otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts
in (es1,
case qs1 of
[] -> nodes cur_info ps ts1
_ : qs3 -> ([],qs3,ts1))
nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t
in case break (t1 ==) ps of
(as,_:_) -> ([],as,ts)
-- Unknown closing tag. Insert as text.
(_,[]) ->
let (es,qs,ts1) = nodes ns ps ts
in (Text CData {
cdLine = Just p,
cdVerbatim = CDataText,
cdData = tagEnd t ""
} : es,qs, ts1)
nodes _ ps [] = ([],ps,[])
annotName :: NSInfo -> QName -> QName
annotName (namespaces,def_ns) n =
n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) }
annotAttr :: NSInfo -> Attr -> Attr
annotAttr ns a@(Attr { attrKey = k}) =
case (qPrefix k, qName k) of
(Nothing,"xmlns") -> a
_ -> a { attrKey = annotName ns k }
addNS :: Attr -> NSInfo -> NSInfo
addNS (Attr { attrKey = key, attrVal = val }) (ns,def) =
case (qPrefix key, qName key) of
(Nothing,"xmlns") -> (ns, if null val then Nothing else Just val)
(Just "xmlns", k) -> ((k, val) : ns, def)
_ -> (ns,def)
-- Lexer -----------------------------------------------------------------------
type LChar = (Line,Char)
type LString = [LChar]
data Token = TokStart Line QName [Attr] Bool -- is empty?
| TokEnd Line QName
| TokCRef String
| TokText CData
deriving Show
tokens :: String -> [Token]
tokens = tokens' . linenumber 1
tokens' :: LString -> [Token]
tokens' ((_,'<') : c@(_,'!') : cs) = special c cs
tokens' ((_,'<') : cs) = tag (dropSpace cs) -- we are being nice here
tokens' [] = []
tokens' cs@((l,_):_) = let (as,bs) = breakn ('<' ==) cs
in map cvt (decode_text as) ++ tokens' bs
-- XXX: Note, some of the lines might be a bit inacuarate
where cvt (TxtBit x) = TokText CData { cdLine = Just l
, cdVerbatim = CDataText
, cdData = x
}
cvt (CRefBit x) = case cref_to_char x of
Just c -> TokText CData { cdLine = Just l
, cdVerbatim = CDataText
, cdData = [c]
}
Nothing -> TokCRef x
special :: LChar -> LString -> [Token]
special _ ((_,'-') : (_,'-') : cs) = skip cs
where skip ((_,'-') : (_,'-') : (_,'>') : ds) = tokens' ds
skip (_ : ds) = skip ds
skip [] = [] -- unterminated comment
special c ((_,'[') : (_,'C') : (_,'D') : (_,'A') : (_,'T') : (_,'A') : (_,'[')
: cs) =
let (xs,ts) = cdata cs
in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataVerbatim, cdData = xs }
: tokens' ts
where cdata ((_,']') : (_,']') : (_,'>') : ds) = ([],ds)
cdata ((_,d) : ds) = let (xs,ys) = cdata ds in (d:xs,ys)
cdata [] = ([],[])
special c cs =
let (xs,ts) = munch "" 0 cs
in TokText CData { cdLine = Just (fst c), cdVerbatim = CDataRaw, cdData = '<':'!':(reverse xs) } : tokens' ts
where munch acc nesting ((_,'>') : ds)
| nesting == (0::Int) = ('>':acc,ds)
| otherwise = munch ('>':acc) (nesting-1) ds
munch acc nesting ((_,'<') : ds)
= munch ('<':acc) (nesting+1) ds
munch acc n ((_,x) : ds) = munch (x:acc) n ds
munch acc _ [] = (acc,[]) -- unterminated DTD markup
--special c cs = tag (c : cs) -- invalid specials are processed as tags
qualName :: LString -> (QName,LString)
qualName xs = let (as,bs) = breakn endName xs
(q,n) = case break (':'==) as of
(q1,_:n1) -> (Just q1, n1)
_ -> (Nothing, as)
in (QName { qURI = Nothing, qPrefix = q, qName = n }, bs)
where endName x = isSpace x || x == '=' || x == '>' || x == '/'
tag :: LString -> [Token]
tag ((p,'/') : cs) = let (n,ds) = qualName (dropSpace cs)
in TokEnd p n : case ds of
(_,'>') : es -> tokens' es
-- tag was not properly closed...
_ -> tokens' ds
tag [] = []
tag cs = let (n,ds) = qualName cs
(as,b,ts) = attribs (dropSpace ds)
in TokStart (fst (head cs)) n as b : ts
attribs :: LString -> ([Attr], Bool, [Token])
attribs cs = case cs of
(_,'>') : ds -> ([], False, tokens' ds)
(_,'/') : ds -> ([], True, case ds of
(_,'>') : es -> tokens' es
-- insert missing > ...
_ -> tokens' ds)
(_,'?') : (_,'>') : ds -> ([], True, tokens' ds)
-- doc ended within a tag..
[] -> ([],False,[])
_ -> let (a,cs1) = attrib cs
(as,b,ts) = attribs cs1
in (a:as,b,ts)
attrib :: LString -> (Attr,LString)
attrib cs = let (ks,cs1) = qualName cs
(vs,cs2) = attr_val (dropSpace cs1)
in ((Attr ks (decode_attr vs)),dropSpace cs2)
attr_val :: LString -> (String,LString)
attr_val ((_,'=') : cs) = string (dropSpace cs)
attr_val cs = ("",cs)
dropSpace :: LString -> LString
dropSpace = dropWhile (isSpace . snd)
-- | Match the value for an attribute. For malformed XML we do
-- our best to guess the programmer's intention.
string :: LString -> (String,LString)
string ((_,'"') : cs) = break' ('"' ==) cs
-- Allow attributes to be enclosed between ' '.
string ((_,'\'') : cs) = break' ('\'' ==) cs
-- Allow attributes that are not enclosed by anything.
string cs = breakn eos cs
where eos x = isSpace x || x == '>' || x == '/'
break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
break' p xs = let (as,bs) = breakn p xs
in (as, case bs of
[] -> []
_ : cs -> cs)
breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
breakn p l = (map snd as,bs) where (as,bs) = break (p . snd) l
decode_attr :: String -> String
decode_attr cs = concatMap cvt (decode_text cs)
where cvt (TxtBit x) = x
cvt (CRefBit x) = case cref_to_char x of
Just c -> [c]
Nothing -> '&' : x ++ ";"
data Txt = TxtBit String | CRefBit String deriving Show
decode_text :: [Char] -> [Txt]
decode_text xs@('&' : cs) = case break (';' ==) cs of
(as,_:bs) -> CRefBit as : decode_text bs
_ -> [TxtBit xs]
decode_text [] = []
decode_text cs = let (as,bs) = break ('&' ==) cs
in TxtBit as : decode_text bs
cref_to_char :: [Char] -> Maybe Char
cref_to_char cs = case cs of
'#' : ds -> num_esc ds
"lt" -> Just '<'
"gt" -> Just '>'
"amp" -> Just '&'
"apos" -> Just '\''
"quot" -> Just '"'
_ -> Nothing
num_esc :: String -> Maybe Char
num_esc cs = case cs of
'x' : ds -> check (readHex ds)
_ -> check (reads cs)
where check [(n,"")] = cvt_char n
check _ = Nothing
cvt_char :: Int -> Maybe Char
cvt_char x
| fromEnum (minBound :: Char) <= x && x <= fromEnum (maxBound::Char)
= Just (toEnum x)
| otherwise = Nothing
preprocess :: String -> String
preprocess ('\r' : '\n' : cs) = '\n' : preprocess cs
preprocess ('\r' : cs) = '\n' : preprocess cs
preprocess (c : cs) = c : preprocess cs
preprocess [] = []
linenumber :: Line -> String -> LString
linenumber _ [] = []
linenumber n ('\n':s) = n' `seq` ((n,'\n'):linenumber n' s) where n' = n + 1
linenumber n (c:s) = (n,c) : linenumber n s

View file

@ -1,150 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Output
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- Output handling for the lightweight XML lib.
--
module Text.XML.Light.Output
( showTopElement, showContent, showElement, showCData, showQName, showAttr
, ppTopElement, ppContent, ppElement
, tagEnd, xml_header
) where
import Text.XML.Light.Types
import Data.Char
import Data.List ( isPrefixOf )
-- | The XML 1.0 header
xml_header :: String
xml_header = "<?xml version='1.0' ?>"
-- | Pretty printing renders XML documents faithfully,
-- with the exception that whitespace may be added\/removed
-- in non-verbatim character data.
ppTopElement :: Element -> String
ppTopElement e = unlines [xml_header,ppElement e]
-- | Pretty printing elements
ppElement :: Element -> String
ppElement e = ppElementS "" e ""
-- | Pretty printing content
ppContent :: Content -> String
ppContent x = ppContentS "" x ""
-- | Pretty printing content using ShowS
ppContentS :: String -> Content -> ShowS
ppContentS i x xs = case x of
Elem e -> ppElementS i e xs
Text c -> ppCData i c xs
CRef r -> showCRefS r xs
ppElementS :: String -> Element -> ShowS
ppElementS i e xs = i ++ (tagStart (elName e) (elAttribs e) $
case elContent e of
[]
| not ("?xml" `isPrefixOf` (qName $ elName e)) -> " />" ++ xs
| otherwise -> " ?>" ++ xs
[Text t] -> ">" ++ ppCData "" t (tagEnd (elName e) xs)
cs -> ">\n" ++ foldr ppSub (i ++ tagEnd (elName e) xs) cs
where ppSub e1 = ppContentS (" " ++ i) e1 . showChar '\n'
)
ppCData :: String -> CData -> ShowS
ppCData i c xs = i ++ if (cdVerbatim c /= CDataText )
then showCDataS c xs
else foldr cons xs (showCData c)
where cons :: Char -> String -> String
cons '\n' ys = "\n" ++ i ++ ys
cons y ys = y : ys
--------------------------------------------------------------------------------
-- | Adds the <?xml?> header.
showTopElement :: Element -> String
showTopElement c = xml_header ++ showElement c
showContent :: Content -> String
showContent c = showContentS c ""
showElement :: Element -> String
showElement c = showElementS c ""
showCData :: CData -> String
showCData c = showCDataS c ""
-- Note: crefs should not contain '&', ';', etc.
showCRefS :: String -> ShowS
showCRefS r xs = '&' : r ++ ';' : xs
-- | Good for transmition (no extra white space etc.) but less readable.
showContentS :: Content -> ShowS
showContentS (Elem e) = showElementS e
showContentS (Text cs) = showCDataS cs
showContentS (CRef cs) = showCRefS cs
-- | Good for transmition (no extra white space etc.) but less readable.
showElementS :: Element -> ShowS
showElementS e xs =
tagStart (elName e) (elAttribs e)
$ case elContent e of
[] -> " />" ++ xs
ch -> '>' : foldr showContentS (tagEnd (elName e) xs) ch
-- | Convert a text element to characters.
showCDataS :: CData -> ShowS
showCDataS cd =
case cdVerbatim cd of
CDataText -> escStr (cdData cd)
CDataVerbatim -> showString "<![CDATA[" . escCData (cdData cd) . showString "]]>"
CDataRaw -> \ xs -> cdData cd ++ xs
--------------------------------------------------------------------------------
escCData :: String -> ShowS
escCData (']' : ']' : '>' : cs) = showString "]]]]><![CDATA[>" . escCData cs
escCData (c : cs) = showChar c . escCData cs
escCData [] = id
escChar :: Char -> ShowS
escChar c = case c of
'<' -> showString "&lt;"
'>' -> showString "&gt;"
'&' -> showString "&amp;"
'"' -> showString "&quot;"
'\'' -> showString "&apos;"
-- XXX: Is this really wortherd?
-- We could deal with these issues when we convert characters to bytes.
_ | (oc <= 0x7f && isPrint c) || c == '\n' || c == '\r' -> showChar c
| otherwise -> showString "&#" . shows oc . showChar ';'
where oc = ord c
escStr :: String -> ShowS
escStr cs rs = foldr escChar rs cs
tagEnd :: QName -> ShowS
tagEnd qn rs = '<':'/':showQName qn ++ '>':rs
tagStart :: QName -> [Attr] -> ShowS
tagStart qn as rs = '<':showQName qn ++ as_str ++ rs
where as_str = if null as then "" else ' ' : unwords (map showAttr as)
showAttr :: Attr -> String
showAttr (Attr qn v) = showQName qn ++ '=' : '"' : escStr v "\""
showQName :: QName -> String
showQName q = pre ++ qName q
where pre = case qPrefix q of
Nothing -> ""
Just p -> p ++ ":"

View file

@ -1,103 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Proc
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
--------------------------------------------------------------------
module Text.XML.Light.Proc where
import Text.XML.Light.Types
import Data.Maybe(listToMaybe)
import Data.List(find)
-- | Get the text value of an XML element. This function
-- ignores non-text elements, and concatenates all text elements.
strContent :: Element -> String
strContent e = concatMap cdData $ onlyText $ elContent e
-- | Select only the elements from a list of XML content.
onlyElems :: [Content] -> [Element]
onlyElems xs = [ x | Elem x <- xs ]
-- | Select only the elements from a parent.
elChildren :: Element -> [Element]
elChildren e = [ x | Elem x <- elContent e ]
-- | Select only the text from a list of XML content.
onlyText :: [Content] -> [CData]
onlyText xs = [ x | Text x <- xs ]
-- | Find all immediate children with the given name.
findChildren :: QName -> Element -> [Element]
findChildren q e = filterChildren ((q ==) . elName) e
-- | Filter all immediate children wrt a given predicate.
filterChildren :: (Element -> Bool) -> Element -> [Element]
filterChildren p e = filter p (onlyElems (elContent e))
-- | Filter all immediate children wrt a given predicate over their names.
filterChildrenName :: (QName -> Bool) -> Element -> [Element]
filterChildrenName p e = filter (p.elName) (onlyElems (elContent e))
-- | Find an immediate child with the given name.
findChild :: QName -> Element -> Maybe Element
findChild q e = listToMaybe (findChildren q e)
-- | Find an immediate child with the given name.
filterChild :: (Element -> Bool) -> Element -> Maybe Element
filterChild p e = listToMaybe (filterChildren p e)
-- | Find an immediate child with name matching a predicate.
filterChildName :: (QName -> Bool) -> Element -> Maybe Element
filterChildName p e = listToMaybe (filterChildrenName p e)
-- | Find the left-most occurrence of an element matching given name.
findElement :: QName -> Element -> Maybe Element
findElement q e = listToMaybe (findElements q e)
-- | Filter the left-most occurrence of an element wrt. given predicate.
filterElement :: (Element -> Bool) -> Element -> Maybe Element
filterElement p e = listToMaybe (filterElements p e)
-- | Filter the left-most occurrence of an element wrt. given predicate.
filterElementName :: (QName -> Bool) -> Element -> Maybe Element
filterElementName p e = listToMaybe (filterElementsName p e)
-- | Find all non-nested occurances of an element.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
findElements :: QName -> Element -> [Element]
findElements qn e = filterElementsName (qn==) e
-- | Find all non-nested occurrences of an element wrt. given predicate.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
filterElements :: (Element -> Bool) -> Element -> [Element]
filterElements p e
| p e = [e]
| otherwise = concatMap (filterElements p) $ onlyElems $ elContent e
-- | Find all non-nested occurences of an element wrt a predicate over element names.
-- (i.e., once we have found an element, we do not search
-- for more occurances among the element's children).
filterElementsName :: (QName -> Bool) -> Element -> [Element]
filterElementsName p e = filterElements (p.elName) e
-- | Lookup the value of an attribute.
findAttr :: QName -> Element -> Maybe String
findAttr x e = attrVal `fmap` find ((x ==) . attrKey) (elAttribs e)
-- | Lookup attribute name from list.
lookupAttr :: QName -> [Attr] -> Maybe String
lookupAttr x as = attrVal `fmap` find ((x ==) . attrKey) as

View file

@ -1,91 +0,0 @@
--------------------------------------------------------------------
-- |
-- Module : Text.XML.Light.Types
-- Copyright : (c) Galois, Inc. 2007
-- License : BSD3
--
-- Maintainer: Iavor S. Diatchki <diatchki@galois.com>
-- Stability : provisional
-- Portability:
--
-- Basic XML types.
--
module Text.XML.Light.Types where
-- | A line is an Integer
type Line = Integer
-- | XML content
data Content = Elem Element
| Text CData
| CRef String
deriving Show
-- | XML elements
data Element = Element {
elName :: QName,
elAttribs :: [Attr],
elContent :: [Content],
elLine :: Maybe Line
} deriving Show
-- | XML attributes
data Attr = Attr {
attrKey :: QName,
attrVal :: String
} deriving (Eq,Ord,Show)
-- | XML CData
data CData = CData {
cdVerbatim :: CDataKind,
cdData :: String,
cdLine :: Maybe Line
} deriving Show
data CDataKind
= CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc.
| CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
| CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
deriving ( Eq, Show )
-- | XML qualified names
data QName = QName {
qName :: String,
qURI :: Maybe String,
qPrefix :: Maybe String
} deriving Show
instance Eq QName where
q1 == q2 = compare q1 q2 == EQ
instance Ord QName where
compare q1 q2 =
case compare (qName q1) (qName q2) of
EQ -> case (qURI q1, qURI q2) of
(Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
(u1,u2) -> compare u1 u2
x -> x
-- blank elements --------------------------------------------------------------
-- | Blank names
blank_name :: QName
blank_name = QName { qName = "", qURI = Nothing, qPrefix = Nothing }
-- | Blank cdata
blank_cdata :: CData
blank_cdata = CData { cdVerbatim = CDataText, cdData = "", cdLine = Nothing }
-- | Blank elements
blank_element :: Element
blank_element = Element
{ elName = blank_name
, elAttribs = []
, elContent = []
, elLine = Nothing
}

6
debian/control vendored
View file

@ -2,7 +2,7 @@ Source: pandoc
Section: text
Priority: optional
Maintainer: Recai Oktaş <roktas@debian.org>
Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip
Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.8.2-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev
Build-Depends-Indep: haddock
Standards-Version: 3.7.3
Homepage: http://johnmacfarlane.net/pandoc/
@ -12,7 +12,7 @@ XS-Vcs-Browser: http://pandoc.googlecode.com/svn/trunk
Package: pandoc
Section: text
Architecture: any
Depends: ${shlibs:Depends}, zip, texlive-latex-recommended | tetex-extra
Depends: ${shlibs:Depends}, texlive-latex-recommended | tetex-extra
Suggests: tidy, wget | w3m
Description: general markup converter
Pandoc is a Haskell library for converting from one markup format to
@ -38,7 +38,7 @@ Description: general markup converter
Package: libghc6-pandoc-dev
Section: libdevel
Architecture: any
Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, zip, texlive-latex-recommended | tetex-extra
Depends: ${haskell:Depends}, libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, texlive-latex-recommended | tetex-extra
Suggests: pandoc-doc
Description: general markup converter
Pandoc is a Haskell library for converting from one markup format to

62
debian/copyright vendored
View file

@ -57,68 +57,6 @@ by Eric A. Meyer
Released under an explicit Public Domain License
----------------------------------------------------------------------
System/IO/UTF8.hs and Codec/Binary/UTF8/String.hs
from the utf8-string package on HackageDB
Copyright (c) 2007, Galois Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Galois Inc. nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY Galois Inc. ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL Galois Inc. BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------
Text/XML/Light/*
from the xml package on HackageDB
(c) 2007 Galois Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------
Diff.hs in tests/
from the Diff package v 0.1.2 (Data.Algorithm.Diff)

View file

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="utf-8" ?>
<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.0">
<office:scripts />
<office:font-face-decls>
<style:font-face style:name="&amp;apos;Lucida Sans Unicode&amp;apos;" svg:font-family="Lucida Sans Unicode" />
<style:font-face style:name="&amp;apos;Tahoma&amp;apos;" svg:font-family="Tahoma" />
<style:font-face style:name="&amp;apos;Times New Roman&amp;apos;" svg:font-family="Times New Roman" />
</office:font-face-decls>
<office:automatic-styles>
</office:automatic-styles>
<office:body>
<office:text>
</office:text>
</office:body>
</office:document-content>

View file

@ -38,12 +38,12 @@ Extra-Source-Files:
-- documentation
README, INSTALL, COPYRIGHT, COPYING, changelog,
-- sources for man pages
man/man1/pandoc.1.md,
man/man1/pandoc.1.md, man/man1/markdown2pdf.1.md,
man/man1/html2markdown.1.md, man/man1/hsmarkdown.1.md,
-- Makefile
Makefile,
-- wrappers
html2markdown, hsmarkdown,
markdown2pdf, html2markdown, hsmarkdown,
-- data for DefaultHeaders.hs
data/headers/ConTeXt.header,
data/headers/Docbook.header,
@ -65,13 +65,11 @@ Extra-Source-Files:
data/ui/default/print.css,
-- data for ODT writer
odt-styles/meta.xml,
odt-styles/content.xml,
odt-styles/settings.xml,
odt-styles/META-INF/manifest.xml,
odt-styles/Thumbnails/thumbnail.png,
odt-styles/styles.xml,
odt-styles/mimetype,
odt-styles/reference.odt,
-- tests
tests/bodybg.gif,
tests/writer.latex,
@ -151,7 +149,7 @@ Library
Exposed-Modules: Text.Pandoc.Biblio
cpp-options: -D_CITEPROC
Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory,
template-haskell, bytestring
template-haskell, bytestring, zip-archive, utf8-string, old-time
Hs-Source-Dirs: .
Exposed-Modules: Text.Pandoc,
Text.Pandoc.Blocks,
@ -180,17 +178,7 @@ Library
Text.Pandoc.Writers.RTF,
Text.Pandoc.Writers.S5
Other-Modules: Text.Pandoc.XML,
Text.Pandoc.TH,
-- from xml package
Text.XML.Light,
Text.XML.Light.Types,
Text.XML.Light.Output,
Text.XML.Light.Input,
Text.XML.Light.Proc,
Text.XML.Light.Cursor,
-- from utf8-string package
System.IO.UTF8,
Codec.Binary.UTF8.String
Text.Pandoc.TH
Extensions: CPP, TemplateHaskell, FlexibleInstances
Ghc-Options: -O2 -Wall -threaded
Ghc-Prof-Options: -auto-all