Generalize Ids out of Content to handle Object Ids too

This commit is contained in:
Tissevert 2020-03-14 22:30:28 +01:00
parent 0f857c457d
commit f31e9eb38b
13 changed files with 203 additions and 62 deletions

View file

@ -17,6 +17,7 @@ cabal-version: >=1.10
library library
exposed-modules: Data.OrderedMap exposed-modules: Data.OrderedMap
, Data.Id
, PDF , PDF
, PDF.Box , PDF.Box
, PDF.CMap , PDF.CMap
@ -88,6 +89,7 @@ Test-Suite unitTests
main-is: Main.hs main-is: Main.hs
other-modules: Object other-modules: Object
, Data.ByteString.Char8.Util , Data.ByteString.Char8.Util
, Data.Id
, PDF.EOL , PDF.EOL
, PDF.Parser , PDF.Parser
, PDF.Object , PDF.Object

View file

@ -7,6 +7,7 @@ import Control.Monad.Reader (ReaderT, runReaderT)
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn) import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
import Data.Id (Id(..))
import PDF (Document(..), parseDocument) import PDF (Document(..), parseDocument)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Layer (Layer(..), unify) import PDF.Layer (Layer(..), unify)
@ -14,7 +15,7 @@ import PDF.Object (Object(..))
import PDF.Object.Navigation ( import PDF.Object.Navigation (
Clear(..), Raw(..), (//), objectById, origin Clear(..), Raw(..), (//), objectById, origin
) )
import PDF.Output (ObjectId(..), Output) import PDF.Output (Output)
import qualified PDF.Output as Output (render) import qualified PDF.Output as Output (render)
import Prelude hiding (fail) import Prelude hiding (fail)
import System.Environment (getArgs, getProgName) import System.Environment (getArgs, getProgName)
@ -41,7 +42,7 @@ parse [inputFile] = return (inputFile, display origin)
parse [inputFile, key] = parse [inputFile, key] =
return (inputFile, clear . maybe (byPath key) byId $ readMaybe key) return (inputFile, clear . maybe (byPath key) byId $ readMaybe key)
where where
byId = objectById . ObjectId byId = objectById . Id
byPath path = origin // (explode path) byPath path = origin // (explode path)
explode "" = [] explode "" = []
explode path = explode path =

View file

@ -3,6 +3,7 @@ import Control.Monad ((>=>))
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Monad.Reader (runReaderT) import Control.Monad.Reader (runReaderT)
import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Char8 as BS (readFile)
import Data.Id (Id(..))
import Data.Map (mapWithKey) import Data.Map (mapWithKey)
import qualified Data.Map as Map (toList) import qualified Data.Map as Map (toList)
import Data.OrderedMap (mapi) import Data.OrderedMap (mapi)
@ -11,7 +12,6 @@ import PDF (UnifiedLayers(..), parseDocument)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Content.Text (Chunks(..)) import PDF.Content.Text (Chunks(..))
import PDF.Layer (Layer) import PDF.Layer (Layer)
import PDF.Output (ObjectId(..))
import PDF.Pages ( import PDF.Pages (
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources
) )
@ -30,7 +30,7 @@ displayPage = withResources (
where where
display a b v = display a b v =
liftIO . putStrLn $ liftIO . putStrLn $
printf "%d@%s: %s" (getObjectId a) (show b) (Text.unpack v) printf "%d@%d: %s" (getId a) (getId b) (Text.unpack v)
getAll :: Layer -> IO () getAll :: Layer -> IO ()
getAll layer = getAll layer =

12
src/Data/Id.hs Normal file
View file

@ -0,0 +1,12 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Id (
Id(..)
, Indexed
) where
import Data.Map (Map)
newtype Id a = Id {
getId :: Int
} deriving (Eq, Enum, Ord, Show)
type Indexed a = Map (Id a) a

View file

@ -9,6 +9,7 @@ import Control.Monad.State (get, gets, modify)
import Data.Attoparsec.ByteString.Char8 (option) import Data.Attoparsec.ByteString.Char8 (option)
import Data.ByteString.Char8 (ByteString) import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack) import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
import Data.Id (Id(..))
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, lookup) import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
@ -19,7 +20,7 @@ import PDF.Object (
, Structure(..), XRefEntry(..), XRefSection , Structure(..), XRefEntry(..), XRefSection
, blank, dictionary, directObject, integer, line , blank, dictionary, directObject, integer, line
) )
import PDF.Output (ObjectId(..), Offset(..)) import PDF.Output (Offset(..))
import PDF.Parser (MonadParser(..), Parser, (<?>), evalParser, on) import PDF.Parser (MonadParser(..), Parser, (<?>), evalParser, on)
data UserState = UserState { data UserState = UserState {
@ -33,7 +34,7 @@ type SParser = Parser UserState
modifyFlow :: (Flow -> Flow) -> SParser () modifyFlow :: (Flow -> Flow) -> SParser ()
modifyFlow f = modify $ \state -> state {flow = f $ flow state} modifyFlow f = modify $ \state -> state {flow = f $ flow state}
addObject :: ObjectId -> Object -> SParser () addObject :: (Id Object) -> Object -> SParser ()
addObject objectId newObject = modifyFlow $ \flow -> flow { addObject objectId newObject = modifyFlow $ \flow -> flow {
tmpObjects = Map.insert objectId newObject $ tmpObjects flow tmpObjects = Map.insert objectId newObject $ tmpObjects flow
} }
@ -48,7 +49,7 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
where where
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset)) afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
lookupOffset :: ObjectId -> SParser Offset lookupOffset :: (Id Object) -> SParser Offset
lookupOffset objectId = do lookupOffset objectId = do
table <- gets xreferences table <- gets xreferences
case Map.lookup objectId table >>= entryOffset of case Map.lookup objectId table >>= entryOffset of
@ -59,7 +60,7 @@ lookupOffset objectId = do
entryOffset (InUse {offset}) = Just offset entryOffset (InUse {offset}) = Just offset
entryOffset _ = Nothing entryOffset _ = Nothing
loadNumber :: ObjectId -> SParser Double loadNumber :: (Id Object) -> SParser Double
loadNumber objectId = do loadNumber objectId = do
offset <- getOffset <$> lookupOffset objectId offset <- getOffset <$> lookupOffset objectId
objectStart <- BS.drop offset <$> gets input objectStart <- BS.drop offset <$> gets input
@ -98,7 +99,7 @@ object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do indirectObjCoordinates = do
objectId <- ObjectId <$> integer objectId <- Id <$> integer
coordinates <- IndirectObjCoordinates objectId <$> integer coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj" objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue addObject objectId objectValue

View file

@ -9,8 +9,6 @@ module PDF.Content (
Content(..) Content(..)
, ContentUnit(..) , ContentUnit(..)
, GraphicContextUnit(..) , GraphicContextUnit(..)
, Id(..)
, Indexed
, TextContext , TextContext
, parse , parse
) where ) where
@ -21,7 +19,8 @@ import Control.Monad.Reader (asks, runReader)
import Control.Monad.State (evalStateT, gets, modify) import Control.Monad.State (evalStateT, gets, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.Attoparsec.ByteString.Char8 (sepBy)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Map (Map, (!)) import Data.Id (Id(..), Indexed)
import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, size) import qualified Data.Map as Map (empty, insert, size)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Content.Operator (Instruction, operator) import PDF.Content.Operator (Instruction, operator)
@ -29,9 +28,7 @@ import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line) import PDF.Output (Output(..), line)
import PDF.Parser (Parser, runParser, string) import PDF.Parser (Parser, runParser, string)
newtype Id a = Id Int deriving (Eq, Ord, Show)
data Instructions = Instructions data Instructions = Instructions
type Indexed a = Map (Id a) a
data GraphicContextUnit = data GraphicContextUnit =
GraphicInstruction (Id Instruction) GraphicInstruction (Id Instruction)

View file

@ -15,14 +15,14 @@ import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT) import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
import Control.Monad.State (MonadState(..), StateT, evalStateT) import Control.Monad.State (MonadState(..), StateT, evalStateT)
import Data.Id (Id(..), Indexed)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (empty, singleton) import qualified Data.Map as Map (empty, singleton)
import Data.Text (Text, breakOn) import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop) import qualified Data.Text as Text (drop)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Content ( import PDF.Content (
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..) Content, ContentUnit(..), GraphicContextUnit(..), contentUnits
, contentUnits
) )
import qualified PDF.Content as Content (Content(..)) import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator (Instruction, Operator(..))

View file

@ -7,31 +7,33 @@ module PDF.Layer (
, unify , unify
) where ) where
import Data.Id (Id(..), Indexed)
import Data.Map (Map, (!), mapWithKey, member) import Data.Map (Map, (!), mapWithKey, member)
import qualified Data.Map as Map (empty, keysSet, member, union) import qualified Data.Map as Map (empty, keysSet, member, union)
import qualified Data.Set as Set (delete, toList) import qualified Data.Set as Set (delete, toList)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Object ( import PDF.Object (
IndexedObjects, IndirectObjCoordinates(..), Object, Occurrence(..) IndirectObjCoordinates(..), Object, Occurrence(..)
, Structure(..), XRefEntry(..), XRefSection, eofMarker, outputBody , Structure(..), XRefEntry(..), XRefSection, eofMarker, outputBody
) )
import qualified PDF.Output as Output (line) import qualified PDF.Output as Output (line)
import PDF.Output ( import PDF.Output (
ObjectId, Offset(..), Output(..), Resource(..), byteString, getOffset Offset(..), Output(..), Resource(..), byteString, getOffset
, getOffsets, newLine , getOffsets, newLine
) )
import Text.Printf (printf) import Text.Printf (printf)
data Layer = Layer { data Layer = Layer {
occurrences :: [Occurrence] occurrences :: [Occurrence]
, objects :: IndexedObjects , objects :: (Indexed Object)
, docStructure :: Structure , docStructure :: Structure
} deriving Show } deriving Show
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset) updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef) updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
where where
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)} updateEntry objectId e@(InUse {}) =
e {offset = offsets ! (ObjectId $ getId objectId)}
updateEntry _ e = e updateEntry _ e = e
instance Output Layer where instance Output Layer where
@ -52,7 +54,7 @@ instance Output Layer where
data Objects = Objects data Objects = Objects
instance Monad m => Box m Objects Layer (Map ObjectId Object) where instance Monad m => Box m Objects Layer (Indexed Object) where
r Objects = return . objects r Objects = return . objects
w Objects newObjects layer@(Layer {occurrences, docStructure}) = w Objects newObjects layer@(Layer {occurrences, docStructure}) =
return $ layer { return $ layer {
@ -99,7 +101,7 @@ unifyDocStructure new old = Structure {
, trailer = Map.union (trailer new) (trailer old) , trailer = Map.union (trailer new) (trailer old)
} }
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence] unifyOccurrences :: (Indexed Object) -> [Occurrence] -> [Occurrence] -> [Occurrence]
unifyOccurrences objects new = foldr addOlder new unifyOccurrences objects new = foldr addOlder new
where where
addOlder occurrence@(Comment _) existing = occurrence : existing addOlder occurrence@(Comment _) existing = occurrence : existing

View file

@ -5,7 +5,6 @@ module PDF.Object (
Dictionary Dictionary
, DirectObject(..) , DirectObject(..)
, Flow(..) , Flow(..)
, IndexedObjects
, IndirectObjCoordinates(..) , IndirectObjCoordinates(..)
, InputStructure(..) , InputStructure(..)
, Name(..) , Name(..)
@ -41,6 +40,7 @@ import qualified Data.ByteString.Char8 as Char8 (
cons, length, pack, singleton, snoc, unpack cons, length, pack, singleton, snoc, unpack
) )
import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
import Data.Id (Id(..), Indexed)
import Data.Map (Map, (!)) import Data.Map (Map, (!))
import qualified Data.Map as Map ( import qualified Data.Map as Map (
delete, empty, fromList, lookup, minViewWithKey, toList, union delete, empty, fromList, lookup, minViewWithKey, toList, union
@ -49,8 +49,8 @@ import qualified Data.Set as Set (fromList, member)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (line, string) import qualified PDF.Output as Output (line, string)
import PDF.Output ( import PDF.Output (
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) OBuilder, Offset(..), Output(..), Resource(..)
, byteString, getObjectId, getOffset, join, newLine, saveOffset , byteString, getOffset, join, newLine, saveOffset
) )
import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf) import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
import Text.Printf (printf) import Text.Printf (printf)
@ -83,8 +83,6 @@ integer = decNumber <* blank <?> "decimal integer"
-- OBJECTS -- OBJECTS
------------------------------------- -------------------------------------
type IndexedObjects = Map ObjectId Object
-- --
-- Boolean -- Boolean
-- --
@ -190,13 +188,13 @@ nullObject = string "null" *> return () <?> "null object"
-- Reference -- Reference
-- --
data IndirectObjCoordinates = IndirectObjCoordinates { data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: ObjectId objectId :: (Id Object)
, versionNumber :: Int , versionNumber :: Int
} deriving Show } deriving Show
reference :: MonadParser m => m IndirectObjCoordinates reference :: MonadParser m => m IndirectObjCoordinates
reference = IndirectObjCoordinates reference = IndirectObjCoordinates
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object" <$> (fmap Id integer) <*> integer <* char 'R' <?> "reference to an object"
-- --
-- DirectObject -- DirectObject
@ -221,7 +219,7 @@ instance Output DirectObject where
output (Dictionary d) = output d output (Dictionary d) = output d
output (Null) = "null" output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) Output.string (printf "%d %d R" (getId objectId) versionNumber)
directObject :: MonadParser m => m DirectObject directObject :: MonadParser m => m DirectObject
directObject = (peek >>= dispatch) <?> "direct object" directObject = (peek >>= dispatch) <?> "direct object"
@ -262,16 +260,16 @@ instance Output Object where
-- --
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) = Output.line c outputOccurrence _ (Comment c) = Output.line c
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (Object objectId) >> mconcat [ saveOffset (ObjectId $ getId objectId) >> mconcat [
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) Output.line (printf "%d %d obj" (getId objectId) versionNumber)
, output (objects ! objectId), newLine , output (objects ! objectId), newLine
, Output.line "endobj" , Output.line "endobj"
] ]
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder
outputBody (occurrences, objects) = outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
@ -286,7 +284,7 @@ data XRefEntry = InUse {
offset :: Offset offset :: Offset
, generation :: Int , generation :: Int
} | Free { } | Free {
nextFree :: ObjectId nextFree :: (Id Object)
, generation :: Int , generation :: Int
} deriving Show } deriving Show
@ -294,7 +292,7 @@ instance Output XRefEntry where
output (InUse {offset, generation}) = output (InUse {offset, generation}) =
Output.line (printf "%010d %05d n " (getOffset offset) generation) Output.line (printf "%010d %05d n " (getOffset offset) generation)
output (Free {nextFree, generation}) = output (Free {nextFree, generation}) =
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) Output.line (printf "%010d %05d f " (getId nextFree) generation)
entry :: Parser u XRefEntry entry :: Parser u XRefEntry
entry = do entry = do
@ -306,28 +304,28 @@ entry = do
char 'n' *> return (InUse {offset = Offset big, generation}) char 'n' *> return (InUse {offset = Offset big, generation})
free :: Int -> Int -> Parser u XRefEntry free :: Int -> Int -> Parser u XRefEntry
free big generation = free big generation =
char 'f' *> return (Free {nextFree = ObjectId big, generation}) char 'f' *> return (Free {nextFree = Id big, generation})
-- --
-- XRefSubSection -- XRefSubSection
-- --
data XRefSubSection = XRefSubSection { data XRefSubSection = XRefSubSection {
firstObjectId :: ObjectId firstObjectId :: (Id Object)
, entries :: [XRefEntry] , entries :: [XRefEntry]
} deriving Show } deriving Show
instance Output XRefSubSection where instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entries}) = output (XRefSubSection {firstObjectId, entries}) =
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) Output.line (printf "%d %d" (getId firstObjectId) (length entries))
`mappend` output entries `mappend` output entries
xRefSubSection :: Parser u XRefSubSection xRefSubSection :: Parser u XRefSubSection
xRefSubSection = do xRefSubSection = do
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection" (firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- count entriesNumber entry entries <- count entriesNumber entry
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} return $ XRefSubSection {firstObjectId = Id firstId, entries}
type XRefSection = Map ObjectId XRefEntry type XRefSection = Map (Id Object) XRefEntry
instance Output XRefSection where instance Output XRefSection where
output = output . sections output = output . sections
@ -335,11 +333,11 @@ instance Output XRefSection where
sections tmp = sections tmp =
case Map.minViewWithKey tmp of case Map.minViewWithKey tmp of
Nothing -> [] Nothing -> []
Just ((objectId@(ObjectId value), firstEntry), rest) -> Just ((objectId@(Id value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest subSection : sections sndRest
section firstObjectId stack nextValue tmp = section firstObjectId stack nextValue tmp =
let nextId = ObjectId nextValue in let nextId = Id nextValue in
case Map.lookup nextId tmp of case Map.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry -> Just nextEntry ->
@ -377,5 +375,5 @@ structure =
-- --
data Flow = Flow { data Flow = Flow {
occurrencesStack :: [Occurrence] occurrencesStack :: [Occurrence]
, tmpObjects :: IndexedObjects , tmpObjects :: (Indexed Object)
} deriving Show } deriving Show

View file

@ -23,6 +23,7 @@ import Control.Monad.Reader (MonadReader(..))
import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Fail (MonadFail(..))
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict) import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
import Data.Id (Id)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (lookup) import qualified Data.Map as Map (lookup)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
@ -31,7 +32,6 @@ import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..) Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..), Structure(..) , Name(..), Object(..), Structure(..)
) )
import PDF.Output (ObjectId)
import Prelude hiding (fail) import Prelude hiding (fail)
import Text.Printf (printf) import Text.Printf (printf)
@ -55,7 +55,7 @@ getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
printf "Key %s not found in object %s" key (show object) printf "Key %s not found in object %s" key (show object)
catchMaybe = maybe (fail errorMessage) return catchMaybe = maybe (fail errorMessage) return
objectById :: PDFContent m => ObjectId -> m Object objectById :: PDFContent m => (Id Object) -> m Object
objectById objectId = do objectById objectId = do
layer <- ask layer <- ask
return (objects layer ! objectId) return (objects layer ! objectId)

View file

@ -0,0 +1,130 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object.Navigation (
Error(..)
, Except
, PDFContent
, Clear(..)
, Raw(..)
, (./)
, (//)
, (>./)
, (>//)
, castObject
, getDictionary
, getKey
, objectById
, origin
) where
import Codec.Compression.Zlib (compress, decompress)
import Control.Applicative (Alternative(..))
import Control.Monad (MonadPlus(..))
import Control.Monad.Reader (MonadReader(..), ReaderT)
import Control.Monad.Fail (MonadFail(..))
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
import Data.Map ((!))
import qualified Data.Map as Map (lookup)
import PDF.Box (Box(..))
import PDF.Layer (Layer(..))
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..), Structure(..)
)
import PDF.Output (ObjectId)
import Prelude hiding (fail)
import Text.Printf (printf)
type Except m = (Alternative m, MonadFail m)
type PDFContent m = ReaderT Layer m
newtype Error a = Error {
runError :: Either String a
} deriving (Alternative, Functor, Applicative, Monad, MonadPlus)
instance MonadFail Error where
fail = Error . Left
type Component = String
getDictionary :: Except m => Object -> PDFContent m Dictionary
getDictionary (Direct (Dictionary aDict)) = return aDict
getDictionary (Direct (Reference (IndirectObjCoordinates {objectId}))) =
objectById objectId >>= getDictionary
getDictionary (Stream {header}) = return header
getDictionary obj = expected "dictionary : " obj
expected :: (MonadFail m, Show a) => String -> a -> m b
expected name = fail . printf "Not a %s: %s" name . show
getKey :: Except m => String -> Object -> PDFContent m DirectObject
getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
where
errorMessage =
printf "Key %s not found in object %s" key (show object)
catchMaybe = maybe (fail errorMessage) return
objectById :: Except m => ObjectId -> PDFContent m Object
objectById objectId = do
layer <- ask
return (objects layer ! objectId)
(./) :: Except m => PDFContent m Object -> Component -> PDFContent m Object
(./) object key = (object >>= getKey key >>= castObject)
castObject :: Except m => DirectObject -> PDFContent m Object
castObject (Reference (IndirectObjCoordinates {objectId})) =
objectById objectId
castObject directObject = return $ Direct directObject
(//) :: Except m => PDFContent m Object -> [Component] -> PDFContent m Object
(//) object [] = object
(//) object (key:keys) = object ./ key // keys
(>./) :: Except m => Object -> Component -> PDFContent m Object
(>./) object = (return object ./)
(>//) :: Except m => Object -> [Component] -> PDFContent m Object
(>//) object = (return object //)
origin :: Except m => PDFContent m Object
origin = Direct . Dictionary . trailer . docStructure <$> ask
data Clear = Clear
data Raw = Raw
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
contains :: String -> DirectObject -> Bool
contains needle (NameObject (Name n)) = needle == n
contains needle (Array directObjects) = oneOf directObjects (contains needle)
where
oneOf [] _ = False
oneOf (x:xs) p = p x || oneOf xs p
contains _ _ = False
instance MonadFail m => Box m Clear Object ByteString where
r Clear (Stream {header, streamContent}) = return $
case Map.lookup (Name "Filter") header of
Just directObject
| contains "FlateDecode" directObject -> onLazy decompress streamContent
_ -> streamContent
r _ obj = expected "stream" obj
w Clear streamContent obj@(Stream {header}) = return $
case Map.lookup (Name "Filter") header of
Just directObject
| contains "FlateDecode" directObject ->
obj {streamContent = onLazy compress streamContent}
_ -> obj {streamContent}
w _ _ obj = expected "stream" obj
instance MonadFail m => Box m Raw Object ByteString where
r Raw (Stream {streamContent}) = return streamContent
r _ obj = expected "stream" obj
w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
w _ _ obj = expected "stream" obj

View file

@ -5,7 +5,6 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
module PDF.Output ( module PDF.Output (
OBuilder OBuilder
, ObjectId(..)
, OContext(..) , OContext(..)
, Offset(..) , Offset(..)
, Output(..) , Output(..)
@ -32,10 +31,9 @@ import Data.String (IsString(..))
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell) import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
import qualified PDF.EOL as EOL (Style(..)) import qualified PDF.EOL as EOL (Style(..))
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
newtype Offset = Offset {getOffset :: Int} deriving (Show) newtype Offset = Offset {getOffset :: Int} deriving (Show)
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord) data Resource = StartXRef | ObjectId Int deriving (Eq, Ord)
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a) newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
type OBuilder = OContext Builder type OBuilder = OContext Builder

View file

@ -19,9 +19,10 @@ import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (ReaderT, runReaderT) import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (StateT(..), evalStateT, gets, modify) import Control.Monad.State (StateT(..), evalStateT, gets, modify)
import Control.Monad.Trans (lift) import Control.Monad.Trans (lift)
import Data.OrderedMap (OrderedMap, build) import Data.Id (Id)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.OrderedMap (OrderedMap, build)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.CMap (cMap) import PDF.CMap (cMap)
import PDF.Content (Content(..)) import PDF.Content (Content(..))
@ -37,26 +38,25 @@ import PDF.Object.Navigation (
Clear(..), PDFContent, (//), (>./), (>//), getDictionary Clear(..), PDFContent, (//), (>./), (>//), getDictionary
, getKey, objectById, origin , getKey, objectById, origin
) )
import PDF.Output (ObjectId(..))
import Prelude hiding (fail) import Prelude hiding (fail)
import Text.Printf (printf) import Text.Printf (printf)
type Except m = (Alternative m, MonadFail m) type Except m = (Alternative m, MonadFail m)
type InLayer m = ReaderT Layer m type InLayer m = ReaderT Layer m
type CachedFonts = Map ObjectId Font type CachedFonts = Map (Id Object) Font
type FontCache m = StateT CachedFonts (InLayer m) type FontCache m = StateT CachedFonts (InLayer m)
data Page = Page { data Page = Page {
contents :: OrderedMap ObjectId Content contents :: OrderedMap (Id Object) Content
, resources :: Dictionary , resources :: Dictionary
, source :: ObjectId , source :: (Id Object)
} }
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap ObjectId Content) loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content)
loadContents directObject = loadContents directObject =
sequenceA . build loadContent $ objectIds directObject sequenceA . build loadContent $ objectIds directObject
where where
loadContent :: Except m => ObjectId -> InLayer m Content loadContent :: Except m => (Id Object) -> InLayer m Content
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
objectIds (Array l) = l >>= getReference objectIds (Array l) = l >>= getReference
objectIds dirObj = getReference dirObj objectIds dirObj = getReference dirObj
@ -65,7 +65,7 @@ getFontDictionary :: Except m => Object -> InLayer m Dictionary
getFontDictionary pageObj = getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty (pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
cache :: Except m => (ObjectId -> FontCache m Font) -> ObjectId -> FontCache m Font cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font
cache loader objectId = cache loader objectId =
gets (Map.lookup objectId) >>= maybe load return gets (Map.lookup objectId) >>= maybe load return
where where
@ -74,7 +74,7 @@ cache loader objectId =
modify $ Map.insert objectId value modify $ Map.insert objectId value
return value return value
loadFont :: Except m => ObjectId -> FontCache m Font loadFont :: Except m => (Id Object) -> FontCache m Font
loadFont objectId = lift $ objectById objectId >>= tryMappings loadFont objectId = lift $ objectById objectId >>= tryMappings
where where
tryMappings object = tryMappings object =
@ -94,18 +94,18 @@ loadResources = foldM addFont Map.empty . Map.toList
flip (Map.insert name) output <$> cache loadFont objectId flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output addFont output _ = return output
getReference :: DirectObject -> [ObjectId] getReference :: DirectObject -> [(Id Object)]
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId] getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
getReference _ = [] getReference _ = []
loadPage :: Except m => ObjectId -> InLayer m Page loadPage :: Except m => (Id Object) -> InLayer m Page
loadPage source = do loadPage source = do
page <- objectById source page <- objectById source
contents <- getKey "Contents" page >>= loadContents contents <- getKey "Contents" page >>= loadContents
resources <- getFontDictionary page resources <- getFontDictionary page
return $ Page {contents, resources, source} return $ Page {contents, resources, source}
pagesList :: Except m => InLayer m [ObjectId] pagesList :: Except m => InLayer m [(Id Object)]
pagesList = pagesList =
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences) (origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
<|> return [] <|> return []
@ -131,7 +131,7 @@ instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
firstPage [] = fail "Page is out of bounds" firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p firstPage (p:_) = loadPage p
instance Monad m => Box m Contents Page (OrderedMap ObjectId Content) where instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
r Contents = return . contents r Contents = return . contents
cacheFonts :: Monad m => StateT CachedFonts m a -> m a cacheFonts :: Monad m => StateT CachedFonts m a -> m a