Implement PDF's multilayer updates and use it in getObj to display only the current version of the object taken into account instead of the concatenation of all its versions
This commit is contained in:
parent
3a39c75e6a
commit
68f90d20e2
|
@ -20,6 +20,7 @@ library
|
|||
, PDF.EOL
|
||||
, PDF.Object
|
||||
, PDF.Output
|
||||
, PDF.Update
|
||||
other-modules: Data.ByteString.Char8.Util
|
||||
, PDF.Body
|
||||
, PDF.Parser
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import Codec.Compression.Zlib (decompress)
|
||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (concat, fromStrict, putStr, toStrict)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (lookup)
|
||||
import PDF (Document(..), parseDocument)
|
||||
|
@ -11,6 +11,7 @@ import qualified PDF.EOL as EOL (Style)
|
|||
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
|
||||
import PDF.Output (ObjectId(..))
|
||||
import qualified PDF.Output as Output (render)
|
||||
import PDF.Update (unify)
|
||||
import System.Environment (getArgs)
|
||||
import System.IO (hPutStrLn, stderr)
|
||||
|
||||
|
@ -26,12 +27,14 @@ display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
|
|||
|
||||
extractObject :: ObjectId -> Document -> ByteString
|
||||
extractObject objectId (Document {eolStyle, updates}) =
|
||||
Lazy.concat $ display eolStyle . (!objectId) . objects <$> updates
|
||||
display eolStyle . (!objectId) $ objects content
|
||||
where
|
||||
content = unify updates
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
[inputFile, objectId] <- getArgs
|
||||
result <- parseDocument <$> BS.readFile inputFile
|
||||
case result of
|
||||
Left parseError -> hPutStrLn stderr $ show parseError
|
||||
Left parseError -> hPutStrLn stderr parseError
|
||||
Right doc -> Lazy.putStr $ extractObject (ObjectId (read objectId)) doc
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
module PDF.Update (
|
||||
unify
|
||||
) where
|
||||
|
||||
import Data.Map (member)
|
||||
import qualified Data.Map as Map (empty, union)
|
||||
import PDF.Object (
|
||||
Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
|
||||
, Structure(..)
|
||||
)
|
||||
|
||||
emptyContent :: Content
|
||||
emptyContent = Content {
|
||||
docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
|
||||
, objects = Map.empty
|
||||
, occurrences = []
|
||||
}
|
||||
|
||||
unify :: [Content] -> Content
|
||||
unify = foldl complete emptyContent
|
||||
where
|
||||
complete tmpContent older =
|
||||
let mergedObjects = Map.union (objects tmpContent) (objects older) in
|
||||
Content {
|
||||
docStructure =
|
||||
unifyDocStructure (docStructure tmpContent) (docStructure older)
|
||||
, objects = mergedObjects
|
||||
, occurrences =
|
||||
unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
|
||||
}
|
||||
|
||||
unifyDocStructure :: Structure -> Structure -> Structure
|
||||
unifyDocStructure update original = Structure {
|
||||
xRef = Map.union (xRef update) (xRef original)
|
||||
, trailer = Map.union (trailer update) (trailer original)
|
||||
}
|
||||
|
||||
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
|
||||
unifyOccurrences objects update = foldr addOlder update
|
||||
where
|
||||
addOlder occurrence@(Comment _) existing = occurrence : existing
|
||||
addOlder occurrence@(Indirect indirect) existing =
|
||||
if objectId indirect `member` objects
|
||||
then occurrence : existing
|
||||
else existing
|
||||
|
Loading…
Reference in New Issue