From 7eca8759003e4a143402b848342a2515b1b40704 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Fri, 29 Nov 2019 11:53:08 +0100 Subject: [PATCH 1/2] Improve getObj example to catch no-existing ObjectId and default to listing existing ObjectIds when none is provided --- examples/getObj.hs | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/examples/getObj.hs b/examples/getObj.hs index a758179..778d989 100644 --- a/examples/getObj.hs +++ b/examples/getObj.hs @@ -4,16 +4,17 @@ import Codec.Compression.Zlib (decompress) import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict) -import Data.Map ((!)) -import qualified Data.Map as Map (lookup) +import Data.Map ((!?)) +import qualified Data.Map as Map (keys, lookup) import PDF (Document(..), parseDocument) import qualified PDF.EOL as EOL (Style) import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..)) import PDF.Output (ObjectId(..)) import qualified PDF.Output as Output (render) import PDF.Update (unify) -import System.Environment (getArgs) -import System.IO (hPutStrLn, stderr) +import System.Environment (getArgs, getProgName) +import System.Exit (die) +import Text.Printf (printf) display :: EOL.Style -> Object -> ByteString display eolStyle d@(Direct _) = Output.render eolStyle d @@ -25,16 +26,28 @@ display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $ } _ -> s -extractObject :: ObjectId -> Document -> ByteString +extractObject :: ObjectId -> Document -> Either String ByteString extractObject objectId (Document {eolStyle, updates}) = - display eolStyle . (!objectId) $ objects content + case objects content !? objectId of + Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId) + Just o -> Right $ display eolStyle o where content = unify updates +listObjectIds :: Document -> Either String [String] +listObjectIds = + Right . prependTitle . toString . Map.keys . objects . unify . updates + where + toString = fmap (show . getObjectId) + prependTitle = ("ObjectIds defined in this PDF:":) + main :: IO () main = do - [inputFile, objectId] <- getArgs - result <- parseDocument <$> BS.readFile inputFile - case result of - Left parseError -> hPutStrLn stderr parseError - Right doc -> Lazy.putStr $ extractObject (ObjectId (read objectId)) doc + (inputFile, getData) <- parse =<< getArgs + input <- BS.readFile inputFile + either die id $ (parseDocument input >>= getData) + where + parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds) + parse [inputFile, objectId] = return + (inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId))) + parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName From 32f98661062ce2a10aca400e7f6c002e5f96c25a Mon Sep 17 00:00:00 2001 From: Tissevert Date: Wed, 12 Feb 2020 17:34:27 +0100 Subject: [PATCH 2/2] Use peek to improve directObject parser avoiding a large <|> disjunction --- src/PDF/Object.hs | 22 ++++++++++++---------- src/PDF/Parser.hs | 7 +++++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index dba172d..32f62ce 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -220,16 +220,18 @@ instance Output DirectObject where Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) directObject :: MonadParser m => m DirectObject -directObject = - Boolean <$> boolean - <|> Reference <$> reference {- defined before Number because Number is a prefix of it -} - <|> NumberObject <$> number - <|> StringObject <$> stringObject - <|> NameObject <$> name - <|> Array <$> array - <|> Dictionary <$> dictionary - <|> const Null <$> nullObject - "direct object" +directObject = (peek >>= dispatch) "direct object" + where + dispatch 't' = Boolean <$> boolean + dispatch 'f' = Boolean <$> boolean + dispatch '(' = StringObject <$> stringObject + dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary + dispatch '/' = NameObject <$> name + dispatch '[' = Array <$> array + dispatch 'n' = nullObject *> return Null + dispatch _ = + Reference <$> reference {- defined before Number because Number is a prefix of it -} + <|> NumberObject <$> number -- -- Object diff --git a/src/PDF/Parser.hs b/src/PDF/Parser.hs index 8dc0fc0..34a0432 100644 --- a/src/PDF/Parser.hs +++ b/src/PDF/Parser.hs @@ -17,8 +17,8 @@ import Control.Monad.Fail (MonadFail(..)) import Control.Monad.State (StateT(..), evalStateT) import Control.Monad.Trans (MonadTrans(..)) import qualified Data.Attoparsec.ByteString.Char8 as Atto ( - Parser, char, endOfInput, parseOnly, satisfy, string, take, takeWhile - , takeWhile1 + Parser, char, endOfInput, parseOnly, peekChar', satisfy, string, take + , takeWhile, takeWhile1 ) import Data.ByteString (ByteString) import Data.ByteString.Char8.Util (B16Int(..)) @@ -36,6 +36,7 @@ class MonadDeps m => MonadParser m where endOfInput :: m () hexNumber :: m B16Int oneOf :: String -> m Char + peek :: m Char string :: ByteString -> m ByteString takeAll :: (Char -> Bool) -> m ByteString takeAll1 :: (Char -> Bool) -> m ByteString @@ -47,6 +48,7 @@ instance MonadParser Atto.Parser where decNumber = Atto.takeWhile1 (`Set.member` digits) hexNumber = B16Int <$> Atto.takeWhile1 (`Set.member` hexDigits) oneOf charSet = Atto.satisfy (`elem` charSet) + peek = Atto.peekChar' string s = Atto.string s show s takeAll = Atto.takeWhile takeAll1 = Atto.takeWhile1 @@ -58,6 +60,7 @@ instance (MonadParser m, MonadTrans t, MonadDeps (t m)) => MonadParser (t m) whe decNumber = lift $ decNumber hexNumber = lift $ hexNumber oneOf = lift . oneOf + peek = lift $ peek string = lift . string takeAll = lift . takeAll takeAll1 = lift . takeAll1