Reimplement getObj with the newest tools in PDF.Object.Navigation, in particular implement browsing by paths or random objectId access
This commit is contained in:
parent
b916ab5206
commit
23186100a8
3 changed files with 38 additions and 35 deletions
|
@ -65,7 +65,7 @@ executable getObj
|
|||
, bytestring
|
||||
, containers
|
||||
, Hufflepdf
|
||||
, zlib
|
||||
, mtl
|
||||
ghc-options: -Wall
|
||||
default-language: Haskell2010
|
||||
|
||||
|
|
|
@ -1,53 +1,51 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
|
||||
import Codec.Compression.Zlib (decompress)
|
||||
import Control.Monad.Reader (ReaderT, runReaderT)
|
||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
|
||||
import Data.Map ((!?))
|
||||
import qualified Data.Map as Map (keys, lookup)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
|
||||
import PDF (Document(..), parseDocument)
|
||||
import qualified PDF.EOL as EOL (Style)
|
||||
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
|
||||
import PDF.Output (ObjectId(..))
|
||||
import PDF.Object (Content(..), Object(..))
|
||||
import PDF.Object.Navigation (
|
||||
Error(..), (//), castObject, objectById, openStream, origin
|
||||
)
|
||||
import PDF.Output (ObjectId(..), Output)
|
||||
import qualified PDF.Output as Output (render)
|
||||
import PDF.Update (unify)
|
||||
import System.Environment (getArgs, getProgName)
|
||||
import System.Exit (die)
|
||||
import Text.Printf (printf)
|
||||
import Text.Read (readMaybe)
|
||||
|
||||
display :: EOL.Style -> Object -> ByteString
|
||||
display eolStyle d@(Direct _) = Output.render eolStyle d
|
||||
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just (NameObject (Name "FlateDecode")) -> Stream {
|
||||
header
|
||||
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
|
||||
}
|
||||
_ -> s
|
||||
|
||||
extractObject :: ObjectId -> Document -> Either String ByteString
|
||||
extractObject objectId (Document {eolStyle, updates}) =
|
||||
case objects content !? objectId of
|
||||
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
|
||||
Just o -> Right $ display eolStyle o
|
||||
decodedStream :: Object -> Object
|
||||
decodedStream object =
|
||||
maybe object replaceContent $ openStream object
|
||||
where
|
||||
content = unify updates
|
||||
replaceContent streamContent = object {streamContent}
|
||||
|
||||
listObjectIds :: Document -> Either String [String]
|
||||
listObjectIds =
|
||||
Right . prependTitle . toString . Map.keys . objects . unify . updates
|
||||
display :: Output a => ReaderT Content Error a -> Document -> Either String ByteString
|
||||
display getter (Document {eolStyle, updates}) =
|
||||
Output.render eolStyle <$> runError (runReaderT getter (unify updates))
|
||||
|
||||
parse :: [String] -> IO (FilePath, Document -> Either String ByteString)
|
||||
parse [inputFile] = return (inputFile, display origin)
|
||||
parse [inputFile, key] =
|
||||
return (inputFile, clear . maybe (byPath key) byId $ readMaybe key)
|
||||
where
|
||||
toString = fmap (show . getObjectId)
|
||||
prependTitle = ("ObjectIds defined in this PDF:":)
|
||||
byId = objectById . ObjectId
|
||||
byPath path = (origin >>= (// (explode path)) >>= castObject)
|
||||
explode "" = []
|
||||
explode path =
|
||||
case break (== '.') path of
|
||||
(name, "") -> [name]
|
||||
(name, rest) -> name : explode (drop 1 rest)
|
||||
clear = display . fmap decodedStream
|
||||
parse _ =
|
||||
die . printf "Syntax: %s inputFile [OBJECT_ID | PATH_TO_OBJ]\n" =<< getProgName
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
(inputFile, getData) <- parse =<< getArgs
|
||||
input <- BS.readFile inputFile
|
||||
either die id $ (parseDocument input >>= getData)
|
||||
where
|
||||
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
|
||||
parse [inputFile, objectId] = return
|
||||
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
|
||||
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
|
||||
either die Lazy.putStrLn $ (parseDocument input >>= getData)
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
module PDF.Object.Navigation (
|
||||
Error(..)
|
||||
, (//)
|
||||
, castObject
|
||||
, dictionaryById
|
||||
, getDictionary
|
||||
, getField
|
||||
|
@ -43,6 +44,10 @@ castDictionary (Direct (Dictionary aDict)) = return aDict
|
|||
castDictionary (Stream {header}) = return header
|
||||
castDictionary obj = expected "dictionary : " obj
|
||||
|
||||
castObject :: PDFContent m => DirectObject -> m Object
|
||||
castObject directObject =
|
||||
(castObjectId directObject >>= objectById) <|> return (Direct directObject)
|
||||
|
||||
castObjectId :: MonadFail m => DirectObject -> m ObjectId
|
||||
castObjectId (Reference (IndirectObjCoordinates {objectId})) = return objectId
|
||||
castObjectId directObject = expected "reference" directObject
|
||||
|
|
Loading…
Reference in a new issue