diff --git a/src/Data/ByteString/Lazy/Char8/Util.hs b/src/Data/ByteString/Lazy/Char8/Util.hs index 5686bf1..831be25 100644 --- a/src/Data/ByteString/Lazy/Char8/Util.hs +++ b/src/Data/ByteString/Lazy/Char8/Util.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NamedFieldPuns #-} module Data.ByteString.Lazy.Char8.Util ( previous , subBS diff --git a/src/PDF.hs b/src/PDF.hs index 9cfb05b..1f6cba1 100644 --- a/src/PDF.hs +++ b/src/PDF.hs @@ -1,6 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE MultiParamTypeClasses #-} module PDF ( Document(..) , DirectObject(..) @@ -18,7 +16,8 @@ import qualified Data.Map as Map (lookup) import PDF.Body (populate) import qualified PDF.EOL as EOL (Style(..), charset, parser) import PDF.Object ( - Content(..), DirectObject(..), Name(..), Number(..), Structure(..) + Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..) + , Structure(..) , eofMarker, magicNumber, structure ) import qualified PDF.Output as Output (render, string) @@ -89,17 +88,17 @@ findNextSection offset input = then newOffset + findNextLine newInput else findNextSection (newOffset + 1) (BS.drop 1 newInput) -readStructures :: Int64 -> ByteString -> Either ParseError [Structure] +readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure] readStructures startXref input = parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow where - stopOrFollow c@(Structure {inputTrailer}) = - case Map.lookup (Name "Prev") inputTrailer of - Nothing -> Right [c {startOffset = findNextLine input}] + stopOrFollow s@(Structure {trailer}) = + case Map.lookup (Name "Prev") trailer of + Nothing -> Right [InputStructure (findNextLine input) s] Just (NumberObject (Number newStartXref)) -> let offset = truncate newStartXref in let startOffset = findNextSection offset (BS.drop offset input) in - (c {startOffset}:) <$> (readStructures offset input) + (InputStructure startOffset s:) <$> (readStructures offset input) Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v parseDocument :: ByteString -> Either ParseError Document diff --git a/src/PDF/Body.hs b/src/PDF/Body.hs index d898cdc..23d4a9c 100644 --- a/src/PDF/Body.hs +++ b/src/PDF/Body.hs @@ -10,16 +10,16 @@ import Data.Map ((!)) import qualified Data.Map as Map (empty, insert, lookup) import qualified PDF.EOL as EOL (charset, parser) import PDF.Object ( - Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..) - , Number(..), Object(..), Occurrence(..), Parser, Structure(..) - , XRefEntry(..), XRefSection, XRefSubSection(..) + Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..) + , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..) + , Parser, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..) , blank, dictionary, directObject, integer, line ) import Text.Parsec data UserState = UserState { input :: ByteString - , structure :: Structure + , xreferences :: XRefSection , flow :: Flow } @@ -55,8 +55,8 @@ lookupOffset objectId (xrefSubSection:others) = getOffset :: Int -> SParser Int64 getOffset objectId = do - Structure {xrefSection} <- structure <$> getState - case lookupOffset objectId xrefSection of + table <- xreferences <$> getState + case lookupOffset objectId table of Nothing -> fail $ "obj " ++ show objectId ++ " is referenced but missing in XRef table" Just offset -> return offset @@ -114,19 +114,23 @@ indirectObjCoordinates = do occurrence :: SParser Occurrence occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates -populate :: ByteString -> Structure -> Content +populate :: ByteString -> InputStructure -> Content populate input structure = let bodyInput = BS.drop (startOffset structure) input in case runParser recurseOnOccurrences initialState "" bodyInput of - Left _ -> emptyContent + Left _ -> Content {occurrences = [], objects = Map.empty, docStructure} Right finalState -> let Flow {occurrencesStack, tmpObjects} = flow finalState in - Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer} + Content { + occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure + } where - trailer = inputTrailer structure - emptyContent = Content {occurrences = [], objects = Map.empty, trailer} + docStructure = inputStructure structure + xreferences = xrefSection docStructure initialState = UserState { - input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty} + input, xreferences, flow = Flow { + occurrencesStack = [], tmpObjects = Map.empty + } } recurseOnOccurrences :: SParser UserState diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 3c8a179..f67d130 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -1,12 +1,12 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} module PDF.Object ( Content(..) , DirectObject(..) , Flow(..) , IndirectObjCoordinates(..) + , InputStructure(..) , Name(..) , Number(..) , Object(..) @@ -28,7 +28,7 @@ module PDF.Object ( import Data.ByteString.Lazy.Char8 (ByteString) import Data.Int (Int64) -import Data.Map (Map, (!)) +import Data.Map (Map, (!), mapWithKey) import qualified Data.Map as Map (elems, fromList, toList) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (string) @@ -298,25 +298,33 @@ type XRefSection = [XRefSubSection] -- -- Structure -- -data Structure = Structure { +data InputStructure = InputStructure { startOffset :: Int64 - , xrefSection :: XRefSection - , inputTrailer :: Dictionary - --, startXRef :: Int64 + , inputStructure :: Structure + } + +data Structure = Structure { + xrefSection :: XRefSection + , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = - Structure 0 + Structure <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) <*> (line "trailer" *> dictionary <* EOL.parser) - -- <*> (line "startxref" *> integer) -xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64) -xrefFromOffsets offsets = ( - [] +updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64) +updateXrefs xrefSection offsets = ( + updateSubSection <$> xrefSection , offsets ! StartXRef ) + where + updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) = + subSection {entries = mapWithKey (updateEntry firstObjectId) entries} + updateEntry firstObjectId index e@(InUse {}) = + e {offset = offsets ! (ObjectId $ firstObjectId + index)} + updateEntry _ _ e = e -- -- Flow @@ -332,7 +340,7 @@ data Flow = Flow { data Content = Content { occurrences :: [Occurrence] , objects :: Map Int Object - , trailer :: Dictionary + , docStructure :: Structure } deriving Show outputBody :: ([Occurrence], Map Int Object) -> OBuilder @@ -340,15 +348,16 @@ outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef instance Output Content where - output (Content {occurrences, objects, trailer}) = - fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects)) + output (Content {occurrences, objects, docStructure}) = + fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects)) >>= \(body, (xref, startXRef)) -> body `mappend` "xref" - `nextLine` "trailer" - --`nextLine` output xrefSection - --`mappend` "trailer" + `nextLine` output xref + `mappend` "trailer" `nextLine` output trailer `nextLine` "startxref" `nextLine` (Output.string (printf "%d" startXRef)) `nextLine` byteString eofMarker + where + Structure {xrefSection, trailer} = docStructure