{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} module Main where import Codec.Compression.Zlib (compress, decompress) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS ( length, readFile ) import qualified Data.ByteString.Lazy.Char8 as Lazy ( fromStrict, intercalate, split, toStrict, writeFile ) import qualified Data.Map as Map (insert, lookup) import PDF (Document(..), parseDocument, render) import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..), Number(..)) import PDF.TextRendering (TextRendering(..), update) import Prelude hiding (lines, unlines) import System.Environment (getArgs) import System.IO (hPutStrLn, stderr) revealFlateEncodedText :: ByteString -> ByteString revealFlateEncodedText = Lazy.toStrict . compress . revealText . decompress . Lazy.fromStrict where lines = Lazy.split '\n' unlines = Lazy.intercalate "\n" revealText = unlines . fmap fill . lines fill = update $ const Fill revealObject :: Object -> Object revealObject obj@(Stream {header, streamContent}) = case Map.lookup (Name "Filter") header of Just (NameObject (Name "FlateDecode")) -> let newStreamContent = revealFlateEncodedText streamContent in let newLength = Number . fromIntegral $ BS.length newStreamContent in Stream { header = Map.insert (Name "Length") (NumberObject newLength) header , streamContent = newStreamContent } _ -> obj revealObject obj = obj revealContent :: Content -> Content revealContent content = content { objects = revealObject <$> (objects content) } reveal :: Document -> Document reveal document = document { updates = revealContent <$> (updates document) } main :: IO () main = do [inputPath, outputPath] <- getArgs result <- parseDocument <$> BS.readFile inputPath case result of Left parseError -> hPutStrLn stderr $ show parseError Right doc -> Lazy.writeFile outputPath . render $ reveal doc