diff --git a/Hufflepdf.cabal b/Hufflepdf.cabal index 3b91000..6921711 100644 --- a/Hufflepdf.cabal +++ b/Hufflepdf.cabal @@ -17,6 +17,7 @@ cabal-version: >=1.10 library exposed-modules: Data.OrderedMap + , Data.Id , PDF , PDF.Box , PDF.CMap @@ -88,6 +89,7 @@ Test-Suite unitTests main-is: Main.hs other-modules: Object , Data.ByteString.Char8.Util + , Data.Id , PDF.EOL , PDF.Parser , PDF.Object diff --git a/examples/getObj.hs b/examples/getObj.hs index 01ff2e9..48bc7f5 100644 --- a/examples/getObj.hs +++ b/examples/getObj.hs @@ -7,6 +7,7 @@ import Control.Monad.Reader (ReaderT, runReaderT) import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn) +import Data.Id (Id(..)) import PDF (Document(..), parseDocument) import PDF.Box (Box(..)) import PDF.Layer (Layer(..), unify) @@ -14,7 +15,7 @@ import PDF.Object (Object(..)) import PDF.Object.Navigation ( Clear(..), Raw(..), (//), objectById, origin ) -import PDF.Output (ObjectId(..), Output) +import PDF.Output (Output) import qualified PDF.Output as Output (render) import Prelude hiding (fail) import System.Environment (getArgs, getProgName) @@ -41,7 +42,7 @@ parse [inputFile] = return (inputFile, display origin) parse [inputFile, key] = return (inputFile, clear . maybe (byPath key) byId $ readMaybe key) where - byId = objectById . ObjectId + byId = objectById . Id byPath path = origin // (explode path) explode "" = [] explode path = diff --git a/examples/getText.hs b/examples/getText.hs index c70598e..7ec1eca 100644 --- a/examples/getText.hs +++ b/examples/getText.hs @@ -3,6 +3,7 @@ import Control.Monad ((>=>)) import Control.Monad.IO.Class (liftIO) import Control.Monad.Reader (runReaderT) import qualified Data.ByteString.Char8 as BS (readFile) +import Data.Id (Id(..)) import Data.Map (mapWithKey) import qualified Data.Map as Map (toList) import Data.OrderedMap (mapi) @@ -11,7 +12,6 @@ import PDF (UnifiedLayers(..), parseDocument) import PDF.Box (Box(..)) import PDF.Content.Text (Chunks(..)) import PDF.Layer (Layer) -import PDF.Output (ObjectId(..)) import PDF.Pages ( Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources ) @@ -30,7 +30,7 @@ displayPage = withResources ( where display a b v = liftIO . putStrLn $ - printf "%d@%s: %s" (getObjectId a) (show b) (Text.unpack v) + printf "%d@%d: %s" (getId a) (getId b) (Text.unpack v) getAll :: Layer -> IO () getAll layer = diff --git a/src/Data/Id.hs b/src/Data/Id.hs new file mode 100644 index 0000000..f7e4683 --- /dev/null +++ b/src/Data/Id.hs @@ -0,0 +1,12 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +module Data.Id ( + Id(..) + , Indexed + ) where + +import Data.Map (Map) + +newtype Id a = Id { + getId :: Int + } deriving (Eq, Enum, Ord, Show) +type Indexed a = Map (Id a) a diff --git a/src/PDF/Body.hs b/src/PDF/Body.hs index f5d81ed..5741a24 100644 --- a/src/PDF/Body.hs +++ b/src/PDF/Body.hs @@ -9,6 +9,7 @@ import Control.Monad.State (get, gets, modify) import Data.Attoparsec.ByteString.Char8 (option) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (cons, drop, unpack) +import Data.Id (Id(..)) import Data.Map ((!)) import qualified Data.Map as Map (empty, insert, lookup) import qualified PDF.EOL as EOL (charset, parser) @@ -19,7 +20,7 @@ import PDF.Object ( , Structure(..), XRefEntry(..), XRefSection , blank, dictionary, directObject, integer, line ) -import PDF.Output (ObjectId(..), Offset(..)) +import PDF.Output (Offset(..)) import PDF.Parser (MonadParser(..), Parser, (), evalParser, on) data UserState = UserState { @@ -33,7 +34,7 @@ type SParser = Parser UserState modifyFlow :: (Flow -> Flow) -> SParser () modifyFlow f = modify $ \state -> state {flow = f $ flow state} -addObject :: ObjectId -> Object -> SParser () +addObject :: (Id Object) -> Object -> SParser () addObject objectId newObject = modifyFlow $ \flow -> flow { tmpObjects = Map.insert objectId newObject $ tmpObjects flow } @@ -48,7 +49,7 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser) where afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset)) -lookupOffset :: ObjectId -> SParser Offset +lookupOffset :: (Id Object) -> SParser Offset lookupOffset objectId = do table <- gets xreferences case Map.lookup objectId table >>= entryOffset of @@ -59,7 +60,7 @@ lookupOffset objectId = do entryOffset (InUse {offset}) = Just offset entryOffset _ = Nothing -loadNumber :: ObjectId -> SParser Double +loadNumber :: (Id Object) -> SParser Double loadNumber objectId = do offset <- getOffset <$> lookupOffset objectId objectStart <- BS.drop offset <$> gets input @@ -98,7 +99,7 @@ object = streamObject <|> Direct <$> directObject indirectObjCoordinates :: SParser IndirectObjCoordinates indirectObjCoordinates = do - objectId <- ObjectId <$> integer + objectId <- Id <$> integer coordinates <- IndirectObjCoordinates objectId <$> integer objectValue <- line "obj" *> object <* blank <* line "endobj" addObject objectId objectValue diff --git a/src/PDF/Content.hs b/src/PDF/Content.hs index e40173b..ab425d2 100644 --- a/src/PDF/Content.hs +++ b/src/PDF/Content.hs @@ -9,8 +9,6 @@ module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) - , Id(..) - , Indexed , TextContext , parse ) where @@ -21,7 +19,8 @@ import Control.Monad.Reader (asks, runReader) import Control.Monad.State (evalStateT, gets, modify) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) -import Data.Map (Map, (!)) +import Data.Id (Id(..), Indexed) +import Data.Map ((!)) import qualified Data.Map as Map (empty, insert, size) import PDF.Box (Box(..)) import PDF.Content.Operator (Instruction, operator) @@ -29,9 +28,7 @@ import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) import PDF.Parser (Parser, runParser, string) -newtype Id a = Id Int deriving (Eq, Ord, Show) data Instructions = Instructions -type Indexed a = Map (Id a) a data GraphicContextUnit = GraphicInstruction (Id Instruction) diff --git a/src/PDF/Content/Text.hs b/src/PDF/Content/Text.hs index 16f0493..ff4f0b9 100644 --- a/src/PDF/Content/Text.hs +++ b/src/PDF/Content/Text.hs @@ -15,14 +15,14 @@ import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT) import Control.Monad.State (MonadState(..), StateT, evalStateT) +import Data.Id (Id(..), Indexed) import Data.Map ((!)) import qualified Data.Map as Map (empty, singleton) import Data.Text (Text, breakOn) import qualified Data.Text as Text (drop) import PDF.Box (Box(..)) import PDF.Content ( - Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..) - , contentUnits + Content, ContentUnit(..), GraphicContextUnit(..), contentUnits ) import qualified PDF.Content as Content (Content(..)) import PDF.Content.Operator (Instruction, Operator(..)) diff --git a/src/PDF/Layer.hs b/src/PDF/Layer.hs index cbdf137..f178511 100644 --- a/src/PDF/Layer.hs +++ b/src/PDF/Layer.hs @@ -7,31 +7,33 @@ module PDF.Layer ( , unify ) where +import Data.Id (Id(..), Indexed) import Data.Map (Map, (!), mapWithKey, member) import qualified Data.Map as Map (empty, keysSet, member, union) import qualified Data.Set as Set (delete, toList) import PDF.Box (Box(..)) import PDF.Object ( - IndexedObjects, IndirectObjCoordinates(..), Object, Occurrence(..) + IndirectObjCoordinates(..), Object, Occurrence(..) , Structure(..), XRefEntry(..), XRefSection, eofMarker, outputBody ) import qualified PDF.Output as Output (line) import PDF.Output ( - ObjectId, Offset(..), Output(..), Resource(..), byteString, getOffset + Offset(..), Output(..), Resource(..), byteString, getOffset , getOffsets, newLine ) import Text.Printf (printf) data Layer = Layer { occurrences :: [Occurrence] - , objects :: IndexedObjects + , objects :: (Indexed Object) , docStructure :: Structure } deriving Show updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset) updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef) where - updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)} + updateEntry objectId e@(InUse {}) = + e {offset = offsets ! (ObjectId $ getId objectId)} updateEntry _ e = e instance Output Layer where @@ -52,7 +54,7 @@ instance Output Layer where data Objects = Objects -instance Monad m => Box m Objects Layer (Map ObjectId Object) where +instance Monad m => Box m Objects Layer (Indexed Object) where r Objects = return . objects w Objects newObjects layer@(Layer {occurrences, docStructure}) = return $ layer { @@ -99,7 +101,7 @@ unifyDocStructure new old = Structure { , trailer = Map.union (trailer new) (trailer old) } -unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence] +unifyOccurrences :: (Indexed Object) -> [Occurrence] -> [Occurrence] -> [Occurrence] unifyOccurrences objects new = foldr addOlder new where addOlder occurrence@(Comment _) existing = occurrence : existing diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index ce0bcdc..6c3efdc 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -5,7 +5,6 @@ module PDF.Object ( Dictionary , DirectObject(..) , Flow(..) - , IndexedObjects , IndirectObjCoordinates(..) , InputStructure(..) , Name(..) @@ -41,6 +40,7 @@ import qualified Data.ByteString.Char8 as Char8 ( cons, length, pack, singleton, snoc, unpack ) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) +import Data.Id (Id(..), Indexed) import Data.Map (Map, (!)) import qualified Data.Map as Map ( delete, empty, fromList, lookup, minViewWithKey, toList, union @@ -49,8 +49,8 @@ import qualified Data.Set as Set (fromList, member) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (line, string) import PDF.Output ( - OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) - , byteString, getObjectId, getOffset, join, newLine, saveOffset + OBuilder, Offset(..), Output(..), Resource(..) + , byteString, getOffset, join, newLine, saveOffset ) import PDF.Parser (MonadParser(..), Parser, (), octDigit, oneOf) import Text.Printf (printf) @@ -83,8 +83,6 @@ integer = decNumber <* blank "decimal integer" -- OBJECTS ------------------------------------- -type IndexedObjects = Map ObjectId Object - -- -- Boolean -- @@ -190,13 +188,13 @@ nullObject = string "null" *> return () "null object" -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { - objectId :: ObjectId + objectId :: (Id Object) , versionNumber :: Int } deriving Show reference :: MonadParser m => m IndirectObjCoordinates reference = IndirectObjCoordinates - <$> (fmap ObjectId integer) <*> integer <* char 'R' "reference to an object" + <$> (fmap Id integer) <*> integer <* char 'R' "reference to an object" -- -- DirectObject @@ -221,7 +219,7 @@ instance Output DirectObject where output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = - Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) + Output.string (printf "%d %d R" (getId objectId) versionNumber) directObject :: MonadParser m => m DirectObject directObject = (peek >>= dispatch) "direct object" @@ -262,16 +260,16 @@ instance Output Object where -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show -outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder +outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = - saveOffset (Object objectId) >> mconcat [ - Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) + saveOffset (ObjectId $ getId objectId) >> mconcat [ + Output.line (printf "%d %d obj" (getId objectId) versionNumber) , output (objects ! objectId), newLine , Output.line "endobj" ] -outputBody :: ([Occurrence], IndexedObjects) -> OBuilder +outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef @@ -286,7 +284,7 @@ data XRefEntry = InUse { offset :: Offset , generation :: Int } | Free { - nextFree :: ObjectId + nextFree :: (Id Object) , generation :: Int } deriving Show @@ -294,7 +292,7 @@ instance Output XRefEntry where output (InUse {offset, generation}) = Output.line (printf "%010d %05d n " (getOffset offset) generation) output (Free {nextFree, generation}) = - Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) + Output.line (printf "%010d %05d f " (getId nextFree) generation) entry :: Parser u XRefEntry entry = do @@ -306,28 +304,28 @@ entry = do char 'n' *> return (InUse {offset = Offset big, generation}) free :: Int -> Int -> Parser u XRefEntry free big generation = - char 'f' *> return (Free {nextFree = ObjectId big, generation}) + char 'f' *> return (Free {nextFree = Id big, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { - firstObjectId :: ObjectId + firstObjectId :: (Id Object) , entries :: [XRefEntry] } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entries}) = - Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) + Output.line (printf "%d %d" (getId firstObjectId) (length entries)) `mappend` output entries xRefSubSection :: Parser u XRefSubSection xRefSubSection = do (firstId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" entries <- count entriesNumber entry - return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} + return $ XRefSubSection {firstObjectId = Id firstId, entries} -type XRefSection = Map ObjectId XRefEntry +type XRefSection = Map (Id Object) XRefEntry instance Output XRefSection where output = output . sections @@ -335,11 +333,11 @@ instance Output XRefSection where sections tmp = case Map.minViewWithKey tmp of Nothing -> [] - Just ((objectId@(ObjectId value), firstEntry), rest) -> + Just ((objectId@(Id value), firstEntry), rest) -> let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in subSection : sections sndRest section firstObjectId stack nextValue tmp = - let nextId = ObjectId nextValue in + let nextId = Id nextValue in case Map.lookup nextId tmp of Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Just nextEntry -> @@ -377,5 +375,5 @@ structure = -- data Flow = Flow { occurrencesStack :: [Occurrence] - , tmpObjects :: IndexedObjects + , tmpObjects :: (Indexed Object) } deriving Show diff --git a/src/PDF/Object/Navigation.hs b/src/PDF/Object/Navigation.hs index df9274f..9bff17b 100644 --- a/src/PDF/Object/Navigation.hs +++ b/src/PDF/Object/Navigation.hs @@ -23,6 +23,7 @@ import Control.Monad.Reader (MonadReader(..)) import Control.Monad.Fail (MonadFail(..)) import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict) +import Data.Id (Id) import Data.Map ((!)) import qualified Data.Map as Map (lookup) import PDF.Box (Box(..)) @@ -31,7 +32,6 @@ import PDF.Object ( Dictionary, DirectObject(..), IndirectObjCoordinates(..) , Name(..), Object(..), Structure(..) ) -import PDF.Output (ObjectId) import Prelude hiding (fail) import Text.Printf (printf) @@ -55,7 +55,7 @@ getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key) printf "Key %s not found in object %s" key (show object) catchMaybe = maybe (fail errorMessage) return -objectById :: PDFContent m => ObjectId -> m Object +objectById :: PDFContent m => (Id Object) -> m Object objectById objectId = do layer <- ask return (objects layer ! objectId) diff --git a/src/PDF/Object/Navigation.hs.bak b/src/PDF/Object/Navigation.hs.bak new file mode 100644 index 0000000..2d38475 --- /dev/null +++ b/src/PDF/Object/Navigation.hs.bak @@ -0,0 +1,130 @@ +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleInstances #-} +module PDF.Object.Navigation ( + Error(..) + , Except + , PDFContent + , Clear(..) + , Raw(..) + , (./) + , (//) + , (>./) + , (>//) + , castObject + , getDictionary + , getKey + , objectById + , origin + ) where + +import Codec.Compression.Zlib (compress, decompress) +import Control.Applicative (Alternative(..)) +import Control.Monad (MonadPlus(..)) +import Control.Monad.Reader (MonadReader(..), ReaderT) +import Control.Monad.Fail (MonadFail(..)) +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict) +import Data.Map ((!)) +import qualified Data.Map as Map (lookup) +import PDF.Box (Box(..)) +import PDF.Layer (Layer(..)) +import PDF.Object ( + Dictionary, DirectObject(..), IndirectObjCoordinates(..) + , Name(..), Object(..), Structure(..) + ) +import PDF.Output (ObjectId) +import Prelude hiding (fail) +import Text.Printf (printf) + +type Except m = (Alternative m, MonadFail m) +type PDFContent m = ReaderT Layer m +newtype Error a = Error { + runError :: Either String a + } deriving (Alternative, Functor, Applicative, Monad, MonadPlus) +instance MonadFail Error where + fail = Error . Left +type Component = String + +getDictionary :: Except m => Object -> PDFContent m Dictionary +getDictionary (Direct (Dictionary aDict)) = return aDict +getDictionary (Direct (Reference (IndirectObjCoordinates {objectId}))) = + objectById objectId >>= getDictionary +getDictionary (Stream {header}) = return header +getDictionary obj = expected "dictionary : " obj + +expected :: (MonadFail m, Show a) => String -> a -> m b +expected name = fail . printf "Not a %s: %s" name . show + +getKey :: Except m => String -> Object -> PDFContent m DirectObject +getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key) + where + errorMessage = + printf "Key %s not found in object %s" key (show object) + catchMaybe = maybe (fail errorMessage) return + +objectById :: Except m => ObjectId -> PDFContent m Object +objectById objectId = do + layer <- ask + return (objects layer ! objectId) + +(./) :: Except m => PDFContent m Object -> Component -> PDFContent m Object +(./) object key = (object >>= getKey key >>= castObject) + +castObject :: Except m => DirectObject -> PDFContent m Object +castObject (Reference (IndirectObjCoordinates {objectId})) = + objectById objectId +castObject directObject = return $ Direct directObject + +(//) :: Except m => PDFContent m Object -> [Component] -> PDFContent m Object +(//) object [] = object +(//) object (key:keys) = object ./ key // keys + +(>./) :: Except m => Object -> Component -> PDFContent m Object +(>./) object = (return object ./) + +(>//) :: Except m => Object -> [Component] -> PDFContent m Object +(>//) object = (return object //) + +origin :: Except m => PDFContent m Object +origin = Direct . Dictionary . trailer . docStructure <$> ask + +data Clear = Clear +data Raw = Raw + +onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString +onLazy f = Lazy.toStrict . f . Lazy.fromStrict + +contains :: String -> DirectObject -> Bool +contains needle (NameObject (Name n)) = needle == n +contains needle (Array directObjects) = oneOf directObjects (contains needle) + where + oneOf [] _ = False + oneOf (x:xs) p = p x || oneOf xs p +contains _ _ = False + +instance MonadFail m => Box m Clear Object ByteString where + r Clear (Stream {header, streamContent}) = return $ + case Map.lookup (Name "Filter") header of + Just directObject + | contains "FlateDecode" directObject -> onLazy decompress streamContent + _ -> streamContent + r _ obj = expected "stream" obj + + w Clear streamContent obj@(Stream {header}) = return $ + case Map.lookup (Name "Filter") header of + Just directObject + | contains "FlateDecode" directObject -> + obj {streamContent = onLazy compress streamContent} + _ -> obj {streamContent} + w _ _ obj = expected "stream" obj + +instance MonadFail m => Box m Raw Object ByteString where + r Raw (Stream {streamContent}) = return streamContent + r _ obj = expected "stream" obj + + w Raw streamContent obj@(Stream {}) = return $ obj {streamContent} + w _ _ obj = expected "stream" obj diff --git a/src/PDF/Output.hs b/src/PDF/Output.hs index c5ea5d5..f349ab7 100644 --- a/src/PDF/Output.hs +++ b/src/PDF/Output.hs @@ -5,7 +5,6 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} module PDF.Output ( OBuilder - , ObjectId(..) , OContext(..) , Offset(..) , Output(..) @@ -32,10 +31,9 @@ import Data.String (IsString(..)) import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell) import qualified PDF.EOL as EOL (Style(..)) -newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show) newtype Offset = Offset {getOffset :: Int} deriving (Show) -data Resource = StartXRef | Object ObjectId deriving (Eq, Ord) +data Resource = StartXRef | ObjectId Int deriving (Eq, Ord) newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a) type OBuilder = OContext Builder diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index 7bb6130..e313dc4 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -19,9 +19,10 @@ import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (ReaderT, runReaderT) import Control.Monad.State (StateT(..), evalStateT, gets, modify) import Control.Monad.Trans (lift) -import Data.OrderedMap (OrderedMap, build) +import Data.Id (Id) import Data.Map (Map) import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) +import Data.OrderedMap (OrderedMap, build) import PDF.Box (Box(..)) import PDF.CMap (cMap) import PDF.Content (Content(..)) @@ -37,26 +38,25 @@ import PDF.Object.Navigation ( Clear(..), PDFContent, (//), (>./), (>//), getDictionary , getKey, objectById, origin ) -import PDF.Output (ObjectId(..)) import Prelude hiding (fail) import Text.Printf (printf) type Except m = (Alternative m, MonadFail m) type InLayer m = ReaderT Layer m -type CachedFonts = Map ObjectId Font +type CachedFonts = Map (Id Object) Font type FontCache m = StateT CachedFonts (InLayer m) data Page = Page { - contents :: OrderedMap ObjectId Content + contents :: OrderedMap (Id Object) Content , resources :: Dictionary - , source :: ObjectId + , source :: (Id Object) } -loadContents :: Except m => DirectObject -> InLayer m (OrderedMap ObjectId Content) +loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content) loadContents directObject = sequenceA . build loadContent $ objectIds directObject where - loadContent :: Except m => ObjectId -> InLayer m Content + loadContent :: Except m => (Id Object) -> InLayer m Content loadContent objectId = objectById objectId >>= r Clear >>= Content.parse objectIds (Array l) = l >>= getReference objectIds dirObj = getReference dirObj @@ -65,7 +65,7 @@ getFontDictionary :: Except m => Object -> InLayer m Dictionary getFontDictionary pageObj = (pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty -cache :: Except m => (ObjectId -> FontCache m Font) -> ObjectId -> FontCache m Font +cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font cache loader objectId = gets (Map.lookup objectId) >>= maybe load return where @@ -74,7 +74,7 @@ cache loader objectId = modify $ Map.insert objectId value return value -loadFont :: Except m => ObjectId -> FontCache m Font +loadFont :: Except m => (Id Object) -> FontCache m Font loadFont objectId = lift $ objectById objectId >>= tryMappings where tryMappings object = @@ -94,18 +94,18 @@ loadResources = foldM addFont Map.empty . Map.toList flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output -getReference :: DirectObject -> [ObjectId] +getReference :: DirectObject -> [(Id Object)] getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId] getReference _ = [] -loadPage :: Except m => ObjectId -> InLayer m Page +loadPage :: Except m => (Id Object) -> InLayer m Page loadPage source = do page <- objectById source contents <- getKey "Contents" page >>= loadContents resources <- getFontDictionary page return $ Page {contents, resources, source} -pagesList :: Except m => InLayer m [ObjectId] +pagesList :: Except m => InLayer m [(Id Object)] pagesList = (origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences) <|> return [] @@ -131,7 +131,7 @@ instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where firstPage [] = fail "Page is out of bounds" firstPage (p:_) = loadPage p -instance Monad m => Box m Contents Page (OrderedMap ObjectId Content) where +instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where r Contents = return . contents cacheFonts :: Monad m => StateT CachedFonts m a -> m a