From 6096a1a2379b785b95c3431927f7ace5f5815a72 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Sat, 15 Feb 2020 13:51:24 +0100 Subject: [PATCH] Simplify navigations by centering everything on Objects to avoid needing to many conversion tools between DirectObject / Object / Dictionary --- examples/getObj.hs | 4 +-- src/PDF/Object/Navigation.hs | 60 ++++++++++++++++-------------------- src/PDF/Pages.hs | 57 ++++++++++++++++------------------ 3 files changed, 55 insertions(+), 66 deletions(-) diff --git a/examples/getObj.hs b/examples/getObj.hs index 1f79821..1b05412 100644 --- a/examples/getObj.hs +++ b/examples/getObj.hs @@ -8,7 +8,7 @@ import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn) import PDF (Document(..), parseDocument) import PDF.Object (Content(..), Object(..)) import PDF.Object.Navigation ( - Error(..), (//), castObject, objectById, openStream, origin + Error(..), (//), objectById, openStream, origin ) import PDF.Output (ObjectId(..), Output) import qualified PDF.Output as Output (render) @@ -34,7 +34,7 @@ parse [inputFile, key] = return (inputFile, clear . maybe (byPath key) byId $ readMaybe key) where byId = objectById . ObjectId - byPath path = (origin >>= (// (explode path)) >>= castObject) + byPath path = origin // (explode path) explode "" = [] explode path = case break (== '.') path of diff --git a/src/PDF/Object/Navigation.hs b/src/PDF/Object/Navigation.hs index ad52520..02c0568 100644 --- a/src/PDF/Object/Navigation.hs +++ b/src/PDF/Object/Navigation.hs @@ -4,12 +4,11 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} module PDF.Object.Navigation ( Error(..) + , (./) , (//) - , castObject - , dictionaryById + , (>./) + , (>//) , getDictionary - , getField - , follow , objectById , openStream , origin @@ -39,21 +38,12 @@ newtype Error a = Error { instance MonadFail Error where fail = Error . Left -castDictionary :: MonadFail m => Object -> m Dictionary -castDictionary (Direct (Dictionary aDict)) = return aDict -castDictionary (Stream {header}) = return header -castDictionary obj = expected "dictionary : " obj - -castObject :: PDFContent m => DirectObject -> m Object -castObject directObject = - (castObjectId directObject >>= objectById) <|> return (Direct directObject) - -castObjectId :: MonadFail m => DirectObject -> m ObjectId -castObjectId (Reference (IndirectObjCoordinates {objectId})) = return objectId -castObjectId directObject = expected "reference" directObject - -dictionaryById :: PDFContent m => ObjectId -> m Dictionary -dictionaryById objectId = objectById objectId >>= castDictionary +getDictionary :: PDFContent m => Object -> m Dictionary +getDictionary (Direct (Dictionary aDict)) = return aDict +getDictionary (Direct (Reference (IndirectObjCoordinates {objectId}))) = + objectById objectId >>= getDictionary +getDictionary (Stream {header}) = return header +getDictionary obj = expected "dictionary : " obj expected :: (MonadFail m, Show a) => String -> a -> m b expected name = fail . printf "Not a %s: %s" name . show @@ -65,28 +55,30 @@ getField key aDictionary = errorMessage = printf "Key %s not found in dictionary %s" key (show aDictionary) -follow :: PDFContent m => DirectObject -> m Object -follow directObject = castObjectId directObject >>= objectById - objectById :: PDFContent m => ObjectId -> m Object objectById objectId = do content <- ask return (objects content ! objectId) -getDictionary :: PDFContent m => DirectObject -> m Dictionary -getDictionary (Dictionary aDictionary) = return aDictionary -getDictionary (Reference (IndirectObjCoordinates {objectId})) = - objectById objectId >>= castDictionary -getDictionary aDirectObject = - expected "resource (dictionary or reference)" aDirectObject +(./) :: PDFContent m => m Object -> String -> m Object +(./) object key = (object >>= getDictionary >>= getField key >>= castObject) + where + castObject (Reference (IndirectObjCoordinates {objectId})) = + objectById objectId + castObject directObject = return $ Direct directObject -(//) :: PDFContent m => Dictionary -> [String] -> m DirectObject -(//) aDict [] = return $ Dictionary aDict -(//) aDict [key] = getField key aDict -(//) aDict (key:keys) = getField key aDict >>= getDictionary >>= (// keys) +(//) :: PDFContent m => m Object -> [String] -> m Object +(//) object [] = object +(//) object (key:keys) = object ./ key // keys -origin :: PDFContent m => m Dictionary -origin = trailer . docStructure <$> ask +(>./) :: PDFContent m => Object -> String -> m Object +(>./) object = (return object ./) + +(>//) :: PDFContent m => Object -> [String] -> m Object +(>//) object = (return object //) + +origin :: PDFContent m => m Object +origin = Direct . Dictionary . trailer . docStructure <$> ask openStream :: MonadFail m => Object -> m ByteString openStream (Stream {header, streamContent}) = return $ diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index 7c9b0c2..b332f8d 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -20,10 +20,11 @@ import PDF.Encoding (encoding) import PDF.Font (Font, FontSet) import PDF.Object ( Content(..), Dictionary, DirectObject(..), IndirectObjCoordinates(..) - , Name(..) + , Name(..), Object(..) ,) import PDF.Object.Navigation ( - Error(..), (//), dictionaryById, getDictionary, getField, follow, openStream, origin + Error(..), (//), (>./), (>//), getDictionary, objectById, openStream + , origin ) import PDF.Output (ObjectId(..)) import Prelude hiding (fail) @@ -36,9 +37,9 @@ data Page = Page { , source :: ObjectId } -getFontDictionary :: Dictionary -> T Dictionary -getFontDictionary pageDict = - ((pageDict // ["Resources", "Font"]) >>= getDictionary) +getFontDictionary :: Object -> T Dictionary +getFontDictionary pageObj = + (pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty cache :: (ObjectId -> T Font) -> ObjectId -> T Font @@ -51,20 +52,17 @@ cache loader objectId = return value loadFont :: ObjectId -> T Font -loadFont objectId = dictionaryById objectId >>= tryMappings +loadFont objectId = objectById objectId >>= tryMappings where - tryMappings dictionary = - loadCMap dictionary - <|> (getField "Encoding" dictionary >>= loadEncoding) - <|> (fail $ unknownFormat (show objectId) (show dictionary)) + tryMappings object = + (object >./ "ToUnicode" >>= openStream >>= cMap) + <|> (object >./ "Encoding" >>= loadEncoding) + <|> (fail $ unknownFormat (show objectId) (show object)) unknownFormat = printf "Unknown font format for object #%s : %s" - loadCMap :: Dictionary -> T Font - loadCMap dictionary = - getField "ToUnicode" dictionary >>= follow >>= openStream >>= cMap - loadEncoding :: DirectObject -> T Font - loadEncoding (NameObject (Name name)) = encoding name - loadEncoding directObject = - fail $ printf "Encoding must be a name, not that : %s" $ show directObject + loadEncoding :: Object -> T Font + loadEncoding (Direct (NameObject (Name name))) = encoding name + loadEncoding object = + fail $ printf "Encoding must be a name, not that : %s" $ show object loadFonts :: Dictionary -> T FontSet loadFonts = foldM addFont Map.empty . Map.toList @@ -74,13 +72,13 @@ loadFonts = foldM addFont Map.empty . Map.toList flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output -several :: DirectObject -> [DirectObject] -several (Array l) = l -several directObject = [directObject] +several :: Object -> [Object] +several (Direct (Array l)) = Direct <$> l +several object = [object] pagesList :: T [ObjectId] pagesList = do - pages <- (origin :: T Dictionary) >>= (// ["Root", "Pages"]) >>= getDictionary + pages <- origin // ["Root", "Pages"] >>= getDictionary case Map.lookup (Name "Kids") pages of Just (Array kids) -> return $ getReferences kids _ -> return [] @@ -92,22 +90,21 @@ getReferences objects = do Reference (IndirectObjCoordinates {objectId}) -> [objectId] _ -> [] -extractText :: Dictionary -> T [Text] -extractText pageDict = do - fonts <- loadFonts =<< getFontDictionary pageDict - objects <- several <$> getField "Contents" pageDict +extractText :: Object -> T [Text] +extractText pageObj = do + fonts <- loadFonts =<< getFontDictionary pageObj + objects <- several <$> pageObj >./ "Contents" concat <$> mapM (loadContent fonts) objects where - loadContent :: FontSet -> DirectObject -> T [Text] - loadContent fonts directObject = - follow directObject - >>= openStream + loadContent :: FontSet -> Object -> T [Text] + loadContent fonts object = + openStream object >>= (either fail return . Content.parse) >>= renderText fonts loadPage :: ObjectId -> T Page loadPage source = do - contents <- extractText =<< dictionaryById source + contents <- extractText =<< objectById source return $ Page {contents, source} getAll :: Content -> Either String (Map Int Page)