Compare commits

...

2 Commits

4 changed files with 30 additions and 13 deletions

View File

@ -1,5 +1,9 @@
# Revision history for Hufflepdf
## 0.2.0.1 -- 2019-11-27
* Fix bug discovered while running Hufflepdf on a PDF output from pdftk : magic keywords like `obj`, `stream` or `xref` can have spaces after them before the EOL
## 0.2.0.0 -- 2019-10-14
* Implement PDF's multilayer update mechanism

View File

@ -2,7 +2,7 @@
-- documentation, see http://haskell.org/cabal/users-guide/
name: Hufflepdf
version: 0.2.0.0
version: 0.2.0.1
synopsis: A PDF parser
-- description:
license: BSD3

View File

@ -4,16 +4,17 @@ import Codec.Compression.Zlib (decompress)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
import Data.Map ((!))
import qualified Data.Map as Map (lookup)
import Data.Map ((!?))
import qualified Data.Map as Map (keys, lookup)
import PDF (Document(..), parseDocument)
import qualified PDF.EOL as EOL (Style)
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
import PDF.Output (ObjectId(..))
import qualified PDF.Output as Output (render)
import PDF.Update (unify)
import System.Environment (getArgs)
import System.IO (hPutStrLn, stderr)
import System.Environment (getArgs, getProgName)
import System.Exit (die)
import Text.Printf (printf)
display :: EOL.Style -> Object -> ByteString
display eolStyle d@(Direct _) = Output.render eolStyle d
@ -25,16 +26,28 @@ display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
}
_ -> s
extractObject :: ObjectId -> Document -> ByteString
extractObject :: ObjectId -> Document -> Either String ByteString
extractObject objectId (Document {eolStyle, updates}) =
display eolStyle . (!objectId) $ objects content
case objects content !? objectId of
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
Just o -> Right $ display eolStyle o
where
content = unify updates
listObjectIds :: Document -> Either String [String]
listObjectIds =
Right . prependTitle . toString . Map.keys . objects . unify . updates
where
toString = fmap (show . getObjectId)
prependTitle = ("ObjectIds defined in this PDF:":)
main :: IO ()
main = do
[inputFile, objectId] <- getArgs
result <- parseDocument <$> BS.readFile inputFile
case result of
Left parseError -> hPutStrLn stderr parseError
Right doc -> Lazy.putStr $ extractObject (ObjectId (read objectId)) doc
(inputFile, getData) <- parse =<< getArgs
input <- BS.readFile inputFile
either die id $ (parseDocument input >>= getData)
where
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
parse [inputFile, objectId] = return
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName

View File

@ -49,7 +49,7 @@ import PDF.Parser (
import Text.Printf (printf)
line :: String -> Parser u ()
line l = (string (BS.pack l) *> EOL.parser *> return ()) <?> printf "line «%s»" l
line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
magicNumber :: ByteString
magicNumber = "%PDF-"