Generalize Ids out of Content to handle Object Ids too
This commit is contained in:
parent
0f857c457d
commit
f31e9eb38b
13 changed files with 203 additions and 62 deletions
|
@ -17,6 +17,7 @@ cabal-version: >=1.10
|
|||
|
||||
library
|
||||
exposed-modules: Data.OrderedMap
|
||||
, Data.Id
|
||||
, PDF
|
||||
, PDF.Box
|
||||
, PDF.CMap
|
||||
|
@ -88,6 +89,7 @@ Test-Suite unitTests
|
|||
main-is: Main.hs
|
||||
other-modules: Object
|
||||
, Data.ByteString.Char8.Util
|
||||
, Data.Id
|
||||
, PDF.EOL
|
||||
, PDF.Parser
|
||||
, PDF.Object
|
||||
|
|
|
@ -7,6 +7,7 @@ import Control.Monad.Reader (ReaderT, runReaderT)
|
|||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
|
||||
import Data.Id (Id(..))
|
||||
import PDF (Document(..), parseDocument)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Layer (Layer(..), unify)
|
||||
|
@ -14,7 +15,7 @@ import PDF.Object (Object(..))
|
|||
import PDF.Object.Navigation (
|
||||
Clear(..), Raw(..), (//), objectById, origin
|
||||
)
|
||||
import PDF.Output (ObjectId(..), Output)
|
||||
import PDF.Output (Output)
|
||||
import qualified PDF.Output as Output (render)
|
||||
import Prelude hiding (fail)
|
||||
import System.Environment (getArgs, getProgName)
|
||||
|
@ -41,7 +42,7 @@ parse [inputFile] = return (inputFile, display origin)
|
|||
parse [inputFile, key] =
|
||||
return (inputFile, clear . maybe (byPath key) byId $ readMaybe key)
|
||||
where
|
||||
byId = objectById . ObjectId
|
||||
byId = objectById . Id
|
||||
byPath path = origin // (explode path)
|
||||
explode "" = []
|
||||
explode path =
|
||||
|
|
|
@ -3,6 +3,7 @@ import Control.Monad ((>=>))
|
|||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad.Reader (runReaderT)
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import Data.Id (Id(..))
|
||||
import Data.Map (mapWithKey)
|
||||
import qualified Data.Map as Map (toList)
|
||||
import Data.OrderedMap (mapi)
|
||||
|
@ -11,7 +12,6 @@ import PDF (UnifiedLayers(..), parseDocument)
|
|||
import PDF.Box (Box(..))
|
||||
import PDF.Content.Text (Chunks(..))
|
||||
import PDF.Layer (Layer)
|
||||
import PDF.Output (ObjectId(..))
|
||||
import PDF.Pages (
|
||||
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources
|
||||
)
|
||||
|
@ -30,7 +30,7 @@ displayPage = withResources (
|
|||
where
|
||||
display a b v =
|
||||
liftIO . putStrLn $
|
||||
printf "%d@%s: %s" (getObjectId a) (show b) (Text.unpack v)
|
||||
printf "%d@%d: %s" (getId a) (getId b) (Text.unpack v)
|
||||
|
||||
getAll :: Layer -> IO ()
|
||||
getAll layer =
|
||||
|
|
12
src/Data/Id.hs
Normal file
12
src/Data/Id.hs
Normal file
|
@ -0,0 +1,12 @@
|
|||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
module Data.Id (
|
||||
Id(..)
|
||||
, Indexed
|
||||
) where
|
||||
|
||||
import Data.Map (Map)
|
||||
|
||||
newtype Id a = Id {
|
||||
getId :: Int
|
||||
} deriving (Eq, Enum, Ord, Show)
|
||||
type Indexed a = Map (Id a) a
|
|
@ -9,6 +9,7 @@ import Control.Monad.State (get, gets, modify)
|
|||
import Data.Attoparsec.ByteString.Char8 (option)
|
||||
import Data.ByteString.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
|
||||
import Data.Id (Id(..))
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (empty, insert, lookup)
|
||||
import qualified PDF.EOL as EOL (charset, parser)
|
||||
|
@ -19,7 +20,7 @@ import PDF.Object (
|
|||
, Structure(..), XRefEntry(..), XRefSection
|
||||
, blank, dictionary, directObject, integer, line
|
||||
)
|
||||
import PDF.Output (ObjectId(..), Offset(..))
|
||||
import PDF.Output (Offset(..))
|
||||
import PDF.Parser (MonadParser(..), Parser, (<?>), evalParser, on)
|
||||
|
||||
data UserState = UserState {
|
||||
|
@ -33,7 +34,7 @@ type SParser = Parser UserState
|
|||
modifyFlow :: (Flow -> Flow) -> SParser ()
|
||||
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
||||
|
||||
addObject :: ObjectId -> Object -> SParser ()
|
||||
addObject :: (Id Object) -> Object -> SParser ()
|
||||
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
||||
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
||||
}
|
||||
|
@ -48,7 +49,7 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
|
|||
where
|
||||
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
|
||||
|
||||
lookupOffset :: ObjectId -> SParser Offset
|
||||
lookupOffset :: (Id Object) -> SParser Offset
|
||||
lookupOffset objectId = do
|
||||
table <- gets xreferences
|
||||
case Map.lookup objectId table >>= entryOffset of
|
||||
|
@ -59,7 +60,7 @@ lookupOffset objectId = do
|
|||
entryOffset (InUse {offset}) = Just offset
|
||||
entryOffset _ = Nothing
|
||||
|
||||
loadNumber :: ObjectId -> SParser Double
|
||||
loadNumber :: (Id Object) -> SParser Double
|
||||
loadNumber objectId = do
|
||||
offset <- getOffset <$> lookupOffset objectId
|
||||
objectStart <- BS.drop offset <$> gets input
|
||||
|
@ -98,7 +99,7 @@ object = streamObject <|> Direct <$> directObject
|
|||
|
||||
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
||||
indirectObjCoordinates = do
|
||||
objectId <- ObjectId <$> integer
|
||||
objectId <- Id <$> integer
|
||||
coordinates <- IndirectObjCoordinates objectId <$> integer
|
||||
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
||||
addObject objectId objectValue
|
||||
|
|
|
@ -9,8 +9,6 @@ module PDF.Content (
|
|||
Content(..)
|
||||
, ContentUnit(..)
|
||||
, GraphicContextUnit(..)
|
||||
, Id(..)
|
||||
, Indexed
|
||||
, TextContext
|
||||
, parse
|
||||
) where
|
||||
|
@ -21,7 +19,8 @@ import Control.Monad.Reader (asks, runReader)
|
|||
import Control.Monad.State (evalStateT, gets, modify)
|
||||
import Data.Attoparsec.ByteString.Char8 (sepBy)
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Map (Map, (!))
|
||||
import Data.Id (Id(..), Indexed)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (empty, insert, size)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Content.Operator (Instruction, operator)
|
||||
|
@ -29,9 +28,7 @@ import PDF.Object (blank, directObject)
|
|||
import PDF.Output (Output(..), line)
|
||||
import PDF.Parser (Parser, runParser, string)
|
||||
|
||||
newtype Id a = Id Int deriving (Eq, Ord, Show)
|
||||
data Instructions = Instructions
|
||||
type Indexed a = Map (Id a) a
|
||||
|
||||
data GraphicContextUnit =
|
||||
GraphicInstruction (Id Instruction)
|
||||
|
|
|
@ -15,14 +15,14 @@ import Control.Monad (foldM)
|
|||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
|
||||
import Control.Monad.State (MonadState(..), StateT, evalStateT)
|
||||
import Data.Id (Id(..), Indexed)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (empty, singleton)
|
||||
import Data.Text (Text, breakOn)
|
||||
import qualified Data.Text as Text (drop)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Content (
|
||||
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
|
||||
, contentUnits
|
||||
Content, ContentUnit(..), GraphicContextUnit(..), contentUnits
|
||||
)
|
||||
import qualified PDF.Content as Content (Content(..))
|
||||
import PDF.Content.Operator (Instruction, Operator(..))
|
||||
|
|
|
@ -7,31 +7,33 @@ module PDF.Layer (
|
|||
, unify
|
||||
) where
|
||||
|
||||
import Data.Id (Id(..), Indexed)
|
||||
import Data.Map (Map, (!), mapWithKey, member)
|
||||
import qualified Data.Map as Map (empty, keysSet, member, union)
|
||||
import qualified Data.Set as Set (delete, toList)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Object (
|
||||
IndexedObjects, IndirectObjCoordinates(..), Object, Occurrence(..)
|
||||
IndirectObjCoordinates(..), Object, Occurrence(..)
|
||||
, Structure(..), XRefEntry(..), XRefSection, eofMarker, outputBody
|
||||
)
|
||||
import qualified PDF.Output as Output (line)
|
||||
import PDF.Output (
|
||||
ObjectId, Offset(..), Output(..), Resource(..), byteString, getOffset
|
||||
Offset(..), Output(..), Resource(..), byteString, getOffset
|
||||
, getOffsets, newLine
|
||||
)
|
||||
import Text.Printf (printf)
|
||||
|
||||
data Layer = Layer {
|
||||
occurrences :: [Occurrence]
|
||||
, objects :: IndexedObjects
|
||||
, objects :: (Indexed Object)
|
||||
, docStructure :: Structure
|
||||
} deriving Show
|
||||
|
||||
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
|
||||
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
|
||||
where
|
||||
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
|
||||
updateEntry objectId e@(InUse {}) =
|
||||
e {offset = offsets ! (ObjectId $ getId objectId)}
|
||||
updateEntry _ e = e
|
||||
|
||||
instance Output Layer where
|
||||
|
@ -52,7 +54,7 @@ instance Output Layer where
|
|||
|
||||
data Objects = Objects
|
||||
|
||||
instance Monad m => Box m Objects Layer (Map ObjectId Object) where
|
||||
instance Monad m => Box m Objects Layer (Indexed Object) where
|
||||
r Objects = return . objects
|
||||
w Objects newObjects layer@(Layer {occurrences, docStructure}) =
|
||||
return $ layer {
|
||||
|
@ -99,7 +101,7 @@ unifyDocStructure new old = Structure {
|
|||
, trailer = Map.union (trailer new) (trailer old)
|
||||
}
|
||||
|
||||
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
|
||||
unifyOccurrences :: (Indexed Object) -> [Occurrence] -> [Occurrence] -> [Occurrence]
|
||||
unifyOccurrences objects new = foldr addOlder new
|
||||
where
|
||||
addOlder occurrence@(Comment _) existing = occurrence : existing
|
||||
|
|
|
@ -5,7 +5,6 @@ module PDF.Object (
|
|||
Dictionary
|
||||
, DirectObject(..)
|
||||
, Flow(..)
|
||||
, IndexedObjects
|
||||
, IndirectObjCoordinates(..)
|
||||
, InputStructure(..)
|
||||
, Name(..)
|
||||
|
@ -41,6 +40,7 @@ import qualified Data.ByteString.Char8 as Char8 (
|
|||
cons, length, pack, singleton, snoc, unpack
|
||||
)
|
||||
import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
|
||||
import Data.Id (Id(..), Indexed)
|
||||
import Data.Map (Map, (!))
|
||||
import qualified Data.Map as Map (
|
||||
delete, empty, fromList, lookup, minViewWithKey, toList, union
|
||||
|
@ -49,8 +49,8 @@ import qualified Data.Set as Set (fromList, member)
|
|||
import qualified PDF.EOL as EOL (charset, parser)
|
||||
import qualified PDF.Output as Output (line, string)
|
||||
import PDF.Output (
|
||||
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
|
||||
, byteString, getObjectId, getOffset, join, newLine, saveOffset
|
||||
OBuilder, Offset(..), Output(..), Resource(..)
|
||||
, byteString, getOffset, join, newLine, saveOffset
|
||||
)
|
||||
import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
|
||||
import Text.Printf (printf)
|
||||
|
@ -83,8 +83,6 @@ integer = decNumber <* blank <?> "decimal integer"
|
|||
-- OBJECTS
|
||||
-------------------------------------
|
||||
|
||||
type IndexedObjects = Map ObjectId Object
|
||||
|
||||
--
|
||||
-- Boolean
|
||||
--
|
||||
|
@ -190,13 +188,13 @@ nullObject = string "null" *> return () <?> "null object"
|
|||
-- Reference
|
||||
--
|
||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||
objectId :: ObjectId
|
||||
objectId :: (Id Object)
|
||||
, versionNumber :: Int
|
||||
} deriving Show
|
||||
|
||||
reference :: MonadParser m => m IndirectObjCoordinates
|
||||
reference = IndirectObjCoordinates
|
||||
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
|
||||
<$> (fmap Id integer) <*> integer <* char 'R' <?> "reference to an object"
|
||||
|
||||
--
|
||||
-- DirectObject
|
||||
|
@ -221,7 +219,7 @@ instance Output DirectObject where
|
|||
output (Dictionary d) = output d
|
||||
output (Null) = "null"
|
||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
|
||||
Output.string (printf "%d %d R" (getId objectId) versionNumber)
|
||||
|
||||
directObject :: MonadParser m => m DirectObject
|
||||
directObject = (peek >>= dispatch) <?> "direct object"
|
||||
|
@ -262,16 +260,16 @@ instance Output Object where
|
|||
--
|
||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||
|
||||
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
|
||||
outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder
|
||||
outputOccurrence _ (Comment c) = Output.line c
|
||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
saveOffset (Object objectId) >> mconcat [
|
||||
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
|
||||
saveOffset (ObjectId $ getId objectId) >> mconcat [
|
||||
Output.line (printf "%d %d obj" (getId objectId) versionNumber)
|
||||
, output (objects ! objectId), newLine
|
||||
, Output.line "endobj"
|
||||
]
|
||||
|
||||
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
|
||||
outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder
|
||||
outputBody (occurrences, objects) =
|
||||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||
|
||||
|
@ -286,7 +284,7 @@ data XRefEntry = InUse {
|
|||
offset :: Offset
|
||||
, generation :: Int
|
||||
} | Free {
|
||||
nextFree :: ObjectId
|
||||
nextFree :: (Id Object)
|
||||
, generation :: Int
|
||||
} deriving Show
|
||||
|
||||
|
@ -294,7 +292,7 @@ instance Output XRefEntry where
|
|||
output (InUse {offset, generation}) =
|
||||
Output.line (printf "%010d %05d n " (getOffset offset) generation)
|
||||
output (Free {nextFree, generation}) =
|
||||
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
|
||||
Output.line (printf "%010d %05d f " (getId nextFree) generation)
|
||||
|
||||
entry :: Parser u XRefEntry
|
||||
entry = do
|
||||
|
@ -306,28 +304,28 @@ entry = do
|
|||
char 'n' *> return (InUse {offset = Offset big, generation})
|
||||
free :: Int -> Int -> Parser u XRefEntry
|
||||
free big generation =
|
||||
char 'f' *> return (Free {nextFree = ObjectId big, generation})
|
||||
char 'f' *> return (Free {nextFree = Id big, generation})
|
||||
|
||||
--
|
||||
-- XRefSubSection
|
||||
--
|
||||
data XRefSubSection = XRefSubSection {
|
||||
firstObjectId :: ObjectId
|
||||
firstObjectId :: (Id Object)
|
||||
, entries :: [XRefEntry]
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefSubSection where
|
||||
output (XRefSubSection {firstObjectId, entries}) =
|
||||
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
|
||||
Output.line (printf "%d %d" (getId firstObjectId) (length entries))
|
||||
`mappend` output entries
|
||||
|
||||
xRefSubSection :: Parser u XRefSubSection
|
||||
xRefSubSection = do
|
||||
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
||||
entries <- count entriesNumber entry
|
||||
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
|
||||
return $ XRefSubSection {firstObjectId = Id firstId, entries}
|
||||
|
||||
type XRefSection = Map ObjectId XRefEntry
|
||||
type XRefSection = Map (Id Object) XRefEntry
|
||||
|
||||
instance Output XRefSection where
|
||||
output = output . sections
|
||||
|
@ -335,11 +333,11 @@ instance Output XRefSection where
|
|||
sections tmp =
|
||||
case Map.minViewWithKey tmp of
|
||||
Nothing -> []
|
||||
Just ((objectId@(ObjectId value), firstEntry), rest) ->
|
||||
Just ((objectId@(Id value), firstEntry), rest) ->
|
||||
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
|
||||
subSection : sections sndRest
|
||||
section firstObjectId stack nextValue tmp =
|
||||
let nextId = ObjectId nextValue in
|
||||
let nextId = Id nextValue in
|
||||
case Map.lookup nextId tmp of
|
||||
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
|
||||
Just nextEntry ->
|
||||
|
@ -377,5 +375,5 @@ structure =
|
|||
--
|
||||
data Flow = Flow {
|
||||
occurrencesStack :: [Occurrence]
|
||||
, tmpObjects :: IndexedObjects
|
||||
, tmpObjects :: (Indexed Object)
|
||||
} deriving Show
|
||||
|
|
|
@ -23,6 +23,7 @@ import Control.Monad.Reader (MonadReader(..))
|
|||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
|
||||
import Data.Id (Id)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (lookup)
|
||||
import PDF.Box (Box(..))
|
||||
|
@ -31,7 +32,6 @@ import PDF.Object (
|
|||
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
||||
, Name(..), Object(..), Structure(..)
|
||||
)
|
||||
import PDF.Output (ObjectId)
|
||||
import Prelude hiding (fail)
|
||||
import Text.Printf (printf)
|
||||
|
||||
|
@ -55,7 +55,7 @@ getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
|
|||
printf "Key %s not found in object %s" key (show object)
|
||||
catchMaybe = maybe (fail errorMessage) return
|
||||
|
||||
objectById :: PDFContent m => ObjectId -> m Object
|
||||
objectById :: PDFContent m => (Id Object) -> m Object
|
||||
objectById objectId = do
|
||||
layer <- ask
|
||||
return (objects layer ! objectId)
|
||||
|
|
130
src/PDF/Object/Navigation.hs.bak
Normal file
130
src/PDF/Object/Navigation.hs.bak
Normal file
|
@ -0,0 +1,130 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
module PDF.Object.Navigation (
|
||||
Error(..)
|
||||
, Except
|
||||
, PDFContent
|
||||
, Clear(..)
|
||||
, Raw(..)
|
||||
, (./)
|
||||
, (//)
|
||||
, (>./)
|
||||
, (>//)
|
||||
, castObject
|
||||
, getDictionary
|
||||
, getKey
|
||||
, objectById
|
||||
, origin
|
||||
) where
|
||||
|
||||
import Codec.Compression.Zlib (compress, decompress)
|
||||
import Control.Applicative (Alternative(..))
|
||||
import Control.Monad (MonadPlus(..))
|
||||
import Control.Monad.Reader (MonadReader(..), ReaderT)
|
||||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (lookup)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Layer (Layer(..))
|
||||
import PDF.Object (
|
||||
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
||||
, Name(..), Object(..), Structure(..)
|
||||
)
|
||||
import PDF.Output (ObjectId)
|
||||
import Prelude hiding (fail)
|
||||
import Text.Printf (printf)
|
||||
|
||||
type Except m = (Alternative m, MonadFail m)
|
||||
type PDFContent m = ReaderT Layer m
|
||||
newtype Error a = Error {
|
||||
runError :: Either String a
|
||||
} deriving (Alternative, Functor, Applicative, Monad, MonadPlus)
|
||||
instance MonadFail Error where
|
||||
fail = Error . Left
|
||||
type Component = String
|
||||
|
||||
getDictionary :: Except m => Object -> PDFContent m Dictionary
|
||||
getDictionary (Direct (Dictionary aDict)) = return aDict
|
||||
getDictionary (Direct (Reference (IndirectObjCoordinates {objectId}))) =
|
||||
objectById objectId >>= getDictionary
|
||||
getDictionary (Stream {header}) = return header
|
||||
getDictionary obj = expected "dictionary : " obj
|
||||
|
||||
expected :: (MonadFail m, Show a) => String -> a -> m b
|
||||
expected name = fail . printf "Not a %s: %s" name . show
|
||||
|
||||
getKey :: Except m => String -> Object -> PDFContent m DirectObject
|
||||
getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
|
||||
where
|
||||
errorMessage =
|
||||
printf "Key %s not found in object %s" key (show object)
|
||||
catchMaybe = maybe (fail errorMessage) return
|
||||
|
||||
objectById :: Except m => ObjectId -> PDFContent m Object
|
||||
objectById objectId = do
|
||||
layer <- ask
|
||||
return (objects layer ! objectId)
|
||||
|
||||
(./) :: Except m => PDFContent m Object -> Component -> PDFContent m Object
|
||||
(./) object key = (object >>= getKey key >>= castObject)
|
||||
|
||||
castObject :: Except m => DirectObject -> PDFContent m Object
|
||||
castObject (Reference (IndirectObjCoordinates {objectId})) =
|
||||
objectById objectId
|
||||
castObject directObject = return $ Direct directObject
|
||||
|
||||
(//) :: Except m => PDFContent m Object -> [Component] -> PDFContent m Object
|
||||
(//) object [] = object
|
||||
(//) object (key:keys) = object ./ key // keys
|
||||
|
||||
(>./) :: Except m => Object -> Component -> PDFContent m Object
|
||||
(>./) object = (return object ./)
|
||||
|
||||
(>//) :: Except m => Object -> [Component] -> PDFContent m Object
|
||||
(>//) object = (return object //)
|
||||
|
||||
origin :: Except m => PDFContent m Object
|
||||
origin = Direct . Dictionary . trailer . docStructure <$> ask
|
||||
|
||||
data Clear = Clear
|
||||
data Raw = Raw
|
||||
|
||||
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
|
||||
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
|
||||
|
||||
contains :: String -> DirectObject -> Bool
|
||||
contains needle (NameObject (Name n)) = needle == n
|
||||
contains needle (Array directObjects) = oneOf directObjects (contains needle)
|
||||
where
|
||||
oneOf [] _ = False
|
||||
oneOf (x:xs) p = p x || oneOf xs p
|
||||
contains _ _ = False
|
||||
|
||||
instance MonadFail m => Box m Clear Object ByteString where
|
||||
r Clear (Stream {header, streamContent}) = return $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just directObject
|
||||
| contains "FlateDecode" directObject -> onLazy decompress streamContent
|
||||
_ -> streamContent
|
||||
r _ obj = expected "stream" obj
|
||||
|
||||
w Clear streamContent obj@(Stream {header}) = return $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just directObject
|
||||
| contains "FlateDecode" directObject ->
|
||||
obj {streamContent = onLazy compress streamContent}
|
||||
_ -> obj {streamContent}
|
||||
w _ _ obj = expected "stream" obj
|
||||
|
||||
instance MonadFail m => Box m Raw Object ByteString where
|
||||
r Raw (Stream {streamContent}) = return streamContent
|
||||
r _ obj = expected "stream" obj
|
||||
|
||||
w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
|
||||
w _ _ obj = expected "stream" obj
|
|
@ -5,7 +5,6 @@
|
|||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
module PDF.Output (
|
||||
OBuilder
|
||||
, ObjectId(..)
|
||||
, OContext(..)
|
||||
, Offset(..)
|
||||
, Output(..)
|
||||
|
@ -32,10 +31,9 @@ import Data.String (IsString(..))
|
|||
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
||||
import qualified PDF.EOL as EOL (Style(..))
|
||||
|
||||
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
|
||||
newtype Offset = Offset {getOffset :: Int} deriving (Show)
|
||||
|
||||
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
|
||||
data Resource = StartXRef | ObjectId Int deriving (Eq, Ord)
|
||||
|
||||
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
|
||||
type OBuilder = OContext Builder
|
||||
|
|
|
@ -19,9 +19,10 @@ import Control.Monad.Fail (MonadFail(..))
|
|||
import Control.Monad.Reader (ReaderT, runReaderT)
|
||||
import Control.Monad.State (StateT(..), evalStateT, gets, modify)
|
||||
import Control.Monad.Trans (lift)
|
||||
import Data.OrderedMap (OrderedMap, build)
|
||||
import Data.Id (Id)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
||||
import Data.OrderedMap (OrderedMap, build)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.CMap (cMap)
|
||||
import PDF.Content (Content(..))
|
||||
|
@ -37,26 +38,25 @@ import PDF.Object.Navigation (
|
|||
Clear(..), PDFContent, (//), (>./), (>//), getDictionary
|
||||
, getKey, objectById, origin
|
||||
)
|
||||
import PDF.Output (ObjectId(..))
|
||||
import Prelude hiding (fail)
|
||||
import Text.Printf (printf)
|
||||
|
||||
type Except m = (Alternative m, MonadFail m)
|
||||
type InLayer m = ReaderT Layer m
|
||||
|
||||
type CachedFonts = Map ObjectId Font
|
||||
type CachedFonts = Map (Id Object) Font
|
||||
type FontCache m = StateT CachedFonts (InLayer m)
|
||||
data Page = Page {
|
||||
contents :: OrderedMap ObjectId Content
|
||||
contents :: OrderedMap (Id Object) Content
|
||||
, resources :: Dictionary
|
||||
, source :: ObjectId
|
||||
, source :: (Id Object)
|
||||
}
|
||||
|
||||
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap ObjectId Content)
|
||||
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content)
|
||||
loadContents directObject =
|
||||
sequenceA . build loadContent $ objectIds directObject
|
||||
where
|
||||
loadContent :: Except m => ObjectId -> InLayer m Content
|
||||
loadContent :: Except m => (Id Object) -> InLayer m Content
|
||||
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
|
||||
objectIds (Array l) = l >>= getReference
|
||||
objectIds dirObj = getReference dirObj
|
||||
|
@ -65,7 +65,7 @@ getFontDictionary :: Except m => Object -> InLayer m Dictionary
|
|||
getFontDictionary pageObj =
|
||||
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
|
||||
|
||||
cache :: Except m => (ObjectId -> FontCache m Font) -> ObjectId -> FontCache m Font
|
||||
cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font
|
||||
cache loader objectId =
|
||||
gets (Map.lookup objectId) >>= maybe load return
|
||||
where
|
||||
|
@ -74,7 +74,7 @@ cache loader objectId =
|
|||
modify $ Map.insert objectId value
|
||||
return value
|
||||
|
||||
loadFont :: Except m => ObjectId -> FontCache m Font
|
||||
loadFont :: Except m => (Id Object) -> FontCache m Font
|
||||
loadFont objectId = lift $ objectById objectId >>= tryMappings
|
||||
where
|
||||
tryMappings object =
|
||||
|
@ -94,18 +94,18 @@ loadResources = foldM addFont Map.empty . Map.toList
|
|||
flip (Map.insert name) output <$> cache loadFont objectId
|
||||
addFont output _ = return output
|
||||
|
||||
getReference :: DirectObject -> [ObjectId]
|
||||
getReference :: DirectObject -> [(Id Object)]
|
||||
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
|
||||
getReference _ = []
|
||||
|
||||
loadPage :: Except m => ObjectId -> InLayer m Page
|
||||
loadPage :: Except m => (Id Object) -> InLayer m Page
|
||||
loadPage source = do
|
||||
page <- objectById source
|
||||
contents <- getKey "Contents" page >>= loadContents
|
||||
resources <- getFontDictionary page
|
||||
return $ Page {contents, resources, source}
|
||||
|
||||
pagesList :: Except m => InLayer m [ObjectId]
|
||||
pagesList :: Except m => InLayer m [(Id Object)]
|
||||
pagesList =
|
||||
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
|
||||
<|> return []
|
||||
|
@ -131,7 +131,7 @@ instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
|
|||
firstPage [] = fail "Page is out of bounds"
|
||||
firstPage (p:_) = loadPage p
|
||||
|
||||
instance Monad m => Box m Contents Page (OrderedMap ObjectId Content) where
|
||||
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
|
||||
r Contents = return . contents
|
||||
|
||||
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
|
||||
|
|
Loading…
Reference in a new issue