Hufflepdf/examples/getObj.hs

38 lines
1.4 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
import Codec.Compression.Zlib (decompress)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (concat, fromStrict, putStr, toStrict)
import Data.Map ((!))
import qualified Data.Map as Map (lookup)
import PDF (Document(..), parseDocument)
import qualified PDF.EOL as EOL (Style)
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
import PDF.Output (ObjectId(..))
import qualified PDF.Output as Output (render)
import System.Environment (getArgs)
import System.IO (hPutStrLn, stderr)
display :: EOL.Style -> Object -> ByteString
display eolStyle d@(Direct _) = Output.render eolStyle d
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) -> Stream {
header
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
}
_ -> s
extractObject :: ObjectId -> Document -> ByteString
extractObject objectId (Document {eolStyle, updates}) =
Lazy.concat $ display eolStyle . (!objectId) . objects <$> updates
main :: IO ()
main = do
[inputFile, objectId] <- getArgs
result <- parseDocument <$> BS.readFile inputFile
case result of
Left parseError -> hPutStrLn stderr $ show parseError
Right doc -> Lazy.putStr $ extractObject (ObjectId (read objectId)) doc