53 lines
2 KiB
Haskell
53 lines
2 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
|
|
import Codec.Compression.Zlib (decompress)
|
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
|
import qualified Data.ByteString.Char8 as BS (readFile)
|
|
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
|
|
import Data.Map ((!?))
|
|
import qualified Data.Map as Map (keys, lookup)
|
|
import PDF (Document(..), parseDocument)
|
|
import qualified PDF.EOL as EOL (Style)
|
|
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
|
|
import PDF.Output (ObjectId(..))
|
|
import qualified PDF.Output as Output (render)
|
|
import PDF.Update (unify)
|
|
import System.Environment (getArgs, getProgName)
|
|
import System.Exit (die)
|
|
import Text.Printf (printf)
|
|
|
|
display :: EOL.Style -> Object -> ByteString
|
|
display eolStyle d@(Direct _) = Output.render eolStyle d
|
|
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
|
|
case Map.lookup (Name "Filter") header of
|
|
Just (NameObject (Name "FlateDecode")) -> Stream {
|
|
header
|
|
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
|
|
}
|
|
_ -> s
|
|
|
|
extractObject :: ObjectId -> Document -> Either String ByteString
|
|
extractObject objectId (Document {eolStyle, updates}) =
|
|
case objects content !? objectId of
|
|
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
|
|
Just o -> Right $ display eolStyle o
|
|
where
|
|
content = unify updates
|
|
|
|
listObjectIds :: Document -> Either String [String]
|
|
listObjectIds =
|
|
Right . prependTitle . toString . Map.keys . objects . unify . updates
|
|
where
|
|
toString = fmap (show . getObjectId)
|
|
prependTitle = ("ObjectIds defined in this PDF:":)
|
|
|
|
main :: IO ()
|
|
main = do
|
|
(inputFile, getData) <- parse =<< getArgs
|
|
input <- BS.readFile inputFile
|
|
either die id $ (parseDocument input >>= getData)
|
|
where
|
|
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
|
|
parse [inputFile, objectId] = return
|
|
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
|
|
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
|