From 23186100a8b825c2841c36217c23c56f1dabe622 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Sat, 15 Feb 2020 10:25:09 +0100 Subject: [PATCH] Reimplement getObj with the newest tools in PDF.Object.Navigation, in particular implement browsing by paths or random objectId access --- Hufflepdf.cabal | 2 +- examples/getObj.hs | 66 +++++++++++++++++------------------- src/PDF/Object/Navigation.hs | 5 +++ 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/Hufflepdf.cabal b/Hufflepdf.cabal index aa84f0c..8279b94 100644 --- a/Hufflepdf.cabal +++ b/Hufflepdf.cabal @@ -65,7 +65,7 @@ executable getObj , bytestring , containers , Hufflepdf - , zlib + , mtl ghc-options: -Wall default-language: Haskell2010 diff --git a/examples/getObj.hs b/examples/getObj.hs index 778d989..1f79821 100644 --- a/examples/getObj.hs +++ b/examples/getObj.hs @@ -1,53 +1,51 @@ {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE FlexibleContexts #-} -import Codec.Compression.Zlib (decompress) +import Control.Monad.Reader (ReaderT, runReaderT) import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (readFile) -import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict) -import Data.Map ((!?)) -import qualified Data.Map as Map (keys, lookup) +import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn) import PDF (Document(..), parseDocument) -import qualified PDF.EOL as EOL (Style) -import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..)) -import PDF.Output (ObjectId(..)) +import PDF.Object (Content(..), Object(..)) +import PDF.Object.Navigation ( + Error(..), (//), castObject, objectById, openStream, origin + ) +import PDF.Output (ObjectId(..), Output) import qualified PDF.Output as Output (render) import PDF.Update (unify) import System.Environment (getArgs, getProgName) import System.Exit (die) import Text.Printf (printf) +import Text.Read (readMaybe) -display :: EOL.Style -> Object -> ByteString -display eolStyle d@(Direct _) = Output.render eolStyle d -display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $ - case Map.lookup (Name "Filter") header of - Just (NameObject (Name "FlateDecode")) -> Stream { - header - , streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent - } - _ -> s - -extractObject :: ObjectId -> Document -> Either String ByteString -extractObject objectId (Document {eolStyle, updates}) = - case objects content !? objectId of - Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId) - Just o -> Right $ display eolStyle o +decodedStream :: Object -> Object +decodedStream object = + maybe object replaceContent $ openStream object where - content = unify updates + replaceContent streamContent = object {streamContent} -listObjectIds :: Document -> Either String [String] -listObjectIds = - Right . prependTitle . toString . Map.keys . objects . unify . updates +display :: Output a => ReaderT Content Error a -> Document -> Either String ByteString +display getter (Document {eolStyle, updates}) = + Output.render eolStyle <$> runError (runReaderT getter (unify updates)) + +parse :: [String] -> IO (FilePath, Document -> Either String ByteString) +parse [inputFile] = return (inputFile, display origin) +parse [inputFile, key] = + return (inputFile, clear . maybe (byPath key) byId $ readMaybe key) where - toString = fmap (show . getObjectId) - prependTitle = ("ObjectIds defined in this PDF:":) + byId = objectById . ObjectId + byPath path = (origin >>= (// (explode path)) >>= castObject) + explode "" = [] + explode path = + case break (== '.') path of + (name, "") -> [name] + (name, rest) -> name : explode (drop 1 rest) + clear = display . fmap decodedStream +parse _ = + die . printf "Syntax: %s inputFile [OBJECT_ID | PATH_TO_OBJ]\n" =<< getProgName main :: IO () main = do (inputFile, getData) <- parse =<< getArgs input <- BS.readFile inputFile - either die id $ (parseDocument input >>= getData) - where - parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds) - parse [inputFile, objectId] = return - (inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId))) - parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName + either die Lazy.putStrLn $ (parseDocument input >>= getData) diff --git a/src/PDF/Object/Navigation.hs b/src/PDF/Object/Navigation.hs index 3640704..ad52520 100644 --- a/src/PDF/Object/Navigation.hs +++ b/src/PDF/Object/Navigation.hs @@ -5,6 +5,7 @@ module PDF.Object.Navigation ( Error(..) , (//) + , castObject , dictionaryById , getDictionary , getField @@ -43,6 +44,10 @@ castDictionary (Direct (Dictionary aDict)) = return aDict castDictionary (Stream {header}) = return header castDictionary obj = expected "dictionary : " obj +castObject :: PDFContent m => DirectObject -> m Object +castObject directObject = + (castObjectId directObject >>= objectById) <|> return (Direct directObject) + castObjectId :: MonadFail m => DirectObject -> m ObjectId castObjectId (Reference (IndirectObjCoordinates {objectId})) = return objectId castObjectId directObject = expected "reference" directObject