Hufflepdf/examples/getObj.hs

54 lines
2 KiB
Haskell
Raw Permalink Normal View History

{-# LANGUAGE NamedFieldPuns #-}
import Codec.Compression.Zlib (decompress)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
import Data.Map ((!?))
import qualified Data.Map as Map (keys, lookup)
import PDF (Document(..), parseDocument)
import qualified PDF.EOL as EOL (Style)
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
import PDF.Output (ObjectId(..))
import qualified PDF.Output as Output (render)
import PDF.Update (unify)
import System.Environment (getArgs, getProgName)
import System.Exit (die)
import Text.Printf (printf)
display :: EOL.Style -> Object -> ByteString
display eolStyle d@(Direct _) = Output.render eolStyle d
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) -> Stream {
header
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
}
_ -> s
extractObject :: ObjectId -> Document -> Either String ByteString
extractObject objectId (Document {eolStyle, updates}) =
case objects content !? objectId of
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
Just o -> Right $ display eolStyle o
where
content = unify updates
listObjectIds :: Document -> Either String [String]
listObjectIds =
Right . prependTitle . toString . Map.keys . objects . unify . updates
where
toString = fmap (show . getObjectId)
prependTitle = ("ObjectIds defined in this PDF:":)
main :: IO ()
main = do
(inputFile, getData) <- parse =<< getArgs
input <- BS.readFile inputFile
either die id $ (parseDocument input >>= getData)
where
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
parse [inputFile, objectId] = return
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName