{-# LANGUAGE FlexibleContexts #-} import Control.Monad ((>=>)) import Control.Monad.Except (ExceptT(..)) import Control.Monad.Except.IOH (handle) import Control.Monad.IO.Class (liftIO) import qualified Data.ByteString.Char8 as BS (readFile) import Data.Id (Id(..), mapWithKey) import qualified Data.Map as Map (mapWithKey) import Data.OrderedMap (mapi) import qualified Data.Text as Text (unpack) import PDF (UnifiedLayers(..), parseDocument) import PDF.Box (Box(..)) import PDF.Content.Text (Chunks(..)) import PDF.Layer (Layer, LayerReader) import PDF.Pages ( Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), withFonts , withResources ) import System.Environment (getArgs) import System.Exit (die) import System.IO (BufferMode(..), hSetBuffering, stdout) import Text.Printf (printf) displayPage :: Int -> Page -> FontCache (LayerReader (ExceptT String IO)) () displayPage n = withResources ( r Contents >=> sequence_ . mapi (\objectId -> r Chunks >=> sequence_ . mapWithKey (display objectId) ) ) where display a b v = liftIO . putStrLn $ printf "p#%d obj#%d instr#%d: %s" n (getId a) (getId b) (Text.unpack v) getAll :: Layer -> ExceptT String IO () getAll = withFonts $ r Pages >=> sequence_ . Map.mapWithKey displayPage get :: Int -> Layer -> ExceptT String IO () get n = withFonts $ r (P n) >=> displayPage n onDoc :: FilePath -> (Layer -> ExceptT String IO ()) -> ExceptT String IO () onDoc inputFile f = ExceptT (parseDocument <$> BS.readFile inputFile) >>= r UnifiedLayers >>= f main :: IO () main = do hSetBuffering stdout LineBuffering args <- getArgs case args of [inputFile] -> onDoc inputFile getAll `handle` die [inputFile, pageNumber] -> onDoc inputFile (get $ read pageNumber) `handle` die _ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"