diff --git a/Hufflepdf.cabal b/Hufflepdf.cabal index d1e0dfa..2b7e561 100644 --- a/Hufflepdf.cabal +++ b/Hufflepdf.cabal @@ -20,6 +20,7 @@ library , PDF.EOL , PDF.Object , PDF.Output + , PDF.Update other-modules: Data.ByteString.Char8.Util , PDF.Body , PDF.Parser diff --git a/examples/getObj.hs b/examples/getObj.hs index d824910..a758179 100644 --- a/examples/getObj.hs +++ b/examples/getObj.hs @@ -3,7 +3,7 @@ import Codec.Compression.Zlib (decompress) import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (readFile) -import qualified Data.ByteString.Lazy.Char8 as Lazy (concat, fromStrict, putStr, toStrict) +import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict) import Data.Map ((!)) import qualified Data.Map as Map (lookup) import PDF (Document(..), parseDocument) @@ -11,6 +11,7 @@ import qualified PDF.EOL as EOL (Style) import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..)) import PDF.Output (ObjectId(..)) import qualified PDF.Output as Output (render) +import PDF.Update (unify) import System.Environment (getArgs) import System.IO (hPutStrLn, stderr) @@ -26,12 +27,14 @@ display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $ extractObject :: ObjectId -> Document -> ByteString extractObject objectId (Document {eolStyle, updates}) = - Lazy.concat $ display eolStyle . (!objectId) . objects <$> updates + display eolStyle . (!objectId) $ objects content + where + content = unify updates main :: IO () main = do [inputFile, objectId] <- getArgs result <- parseDocument <$> BS.readFile inputFile case result of - Left parseError -> hPutStrLn stderr $ show parseError + Left parseError -> hPutStrLn stderr parseError Right doc -> Lazy.putStr $ extractObject (ObjectId (read objectId)) doc diff --git a/src/PDF/Update.hs b/src/PDF/Update.hs new file mode 100644 index 0000000..57ced3c --- /dev/null +++ b/src/PDF/Update.hs @@ -0,0 +1,47 @@ +{-# LANGUAGE NamedFieldPuns #-} +module PDF.Update ( + unify + ) where + +import Data.Map (member) +import qualified Data.Map as Map (empty, union) +import PDF.Object ( + Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..) + , Structure(..) + ) + +emptyContent :: Content +emptyContent = Content { + docStructure = Structure {xRef = Map.empty, trailer = Map.empty} + , objects = Map.empty + , occurrences = [] + } + +unify :: [Content] -> Content +unify = foldl complete emptyContent + where + complete tmpContent older = + let mergedObjects = Map.union (objects tmpContent) (objects older) in + Content { + docStructure = + unifyDocStructure (docStructure tmpContent) (docStructure older) + , objects = mergedObjects + , occurrences = + unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older) + } + +unifyDocStructure :: Structure -> Structure -> Structure +unifyDocStructure update original = Structure { + xRef = Map.union (xRef update) (xRef original) + , trailer = Map.union (trailer update) (trailer original) + } + +unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence] +unifyOccurrences objects update = foldr addOlder update + where + addOlder occurrence@(Comment _) existing = occurrence : existing + addOlder occurrence@(Indirect indirect) existing = + if objectId indirect `member` objects + then occurrence : existing + else existing +