diff --git a/examples/getObj.hs b/examples/getObj.hs index a758179..778d989 100644 --- a/examples/getObj.hs +++ b/examples/getObj.hs @@ -4,16 +4,17 @@ import Codec.Compression.Zlib (decompress) import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict) -import Data.Map ((!)) -import qualified Data.Map as Map (lookup) +import Data.Map ((!?)) +import qualified Data.Map as Map (keys, lookup) import PDF (Document(..), parseDocument) import qualified PDF.EOL as EOL (Style) import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..)) import PDF.Output (ObjectId(..)) import qualified PDF.Output as Output (render) import PDF.Update (unify) -import System.Environment (getArgs) -import System.IO (hPutStrLn, stderr) +import System.Environment (getArgs, getProgName) +import System.Exit (die) +import Text.Printf (printf) display :: EOL.Style -> Object -> ByteString display eolStyle d@(Direct _) = Output.render eolStyle d @@ -25,16 +26,28 @@ display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $ } _ -> s -extractObject :: ObjectId -> Document -> ByteString +extractObject :: ObjectId -> Document -> Either String ByteString extractObject objectId (Document {eolStyle, updates}) = - display eolStyle . (!objectId) $ objects content + case objects content !? objectId of + Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId) + Just o -> Right $ display eolStyle o where content = unify updates +listObjectIds :: Document -> Either String [String] +listObjectIds = + Right . prependTitle . toString . Map.keys . objects . unify . updates + where + toString = fmap (show . getObjectId) + prependTitle = ("ObjectIds defined in this PDF:":) + main :: IO () main = do - [inputFile, objectId] <- getArgs - result <- parseDocument <$> BS.readFile inputFile - case result of - Left parseError -> hPutStrLn stderr parseError - Right doc -> Lazy.putStr $ extractObject (ObjectId (read objectId)) doc + (inputFile, getData) <- parse =<< getArgs + input <- BS.readFile inputFile + either die id $ (parseDocument input >>= getData) + where + parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds) + parse [inputFile, objectId] = return + (inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId))) + parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 4510686..f4d077b 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -220,16 +220,18 @@ instance Output DirectObject where Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) directObject :: MonadParser m => m DirectObject -directObject = - Boolean <$> boolean - <|> Reference <$> reference {- defined before Number because Number is a prefix of it -} - <|> NumberObject <$> number - <|> StringObject <$> stringObject - <|> NameObject <$> name - <|> Array <$> array - <|> Dictionary <$> dictionary - <|> const Null <$> nullObject - "direct object" +directObject = (peek >>= dispatch) "direct object" + where + dispatch 't' = Boolean <$> boolean + dispatch 'f' = Boolean <$> boolean + dispatch '(' = StringObject <$> stringObject + dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary + dispatch '/' = NameObject <$> name + dispatch '[' = Array <$> array + dispatch 'n' = nullObject *> return Null + dispatch _ = + Reference <$> reference {- defined before Number because Number is a prefix of it -} + <|> NumberObject <$> number -- -- Object diff --git a/src/PDF/Parser.hs b/src/PDF/Parser.hs index 8dc0fc0..34a0432 100644 --- a/src/PDF/Parser.hs +++ b/src/PDF/Parser.hs @@ -17,8 +17,8 @@ import Control.Monad.Fail (MonadFail(..)) import Control.Monad.State (StateT(..), evalStateT) import Control.Monad.Trans (MonadTrans(..)) import qualified Data.Attoparsec.ByteString.Char8 as Atto ( - Parser, char, endOfInput, parseOnly, satisfy, string, take, takeWhile - , takeWhile1 + Parser, char, endOfInput, parseOnly, peekChar', satisfy, string, take + , takeWhile, takeWhile1 ) import Data.ByteString (ByteString) import Data.ByteString.Char8.Util (B16Int(..)) @@ -36,6 +36,7 @@ class MonadDeps m => MonadParser m where endOfInput :: m () hexNumber :: m B16Int oneOf :: String -> m Char + peek :: m Char string :: ByteString -> m ByteString takeAll :: (Char -> Bool) -> m ByteString takeAll1 :: (Char -> Bool) -> m ByteString @@ -47,6 +48,7 @@ instance MonadParser Atto.Parser where decNumber = Atto.takeWhile1 (`Set.member` digits) hexNumber = B16Int <$> Atto.takeWhile1 (`Set.member` hexDigits) oneOf charSet = Atto.satisfy (`elem` charSet) + peek = Atto.peekChar' string s = Atto.string s show s takeAll = Atto.takeWhile takeAll1 = Atto.takeWhile1 @@ -58,6 +60,7 @@ instance (MonadParser m, MonadTrans t, MonadDeps (t m)) => MonadParser (t m) whe decNumber = lift $ decNumber hexNumber = lift $ hexNumber oneOf = lift . oneOf + peek = lift $ peek string = lift . string takeAll = lift . takeAll takeAll1 = lift . takeAll1