diff --git a/examples/getText.hs b/examples/getText.hs index 506b751..ba73131 100644 --- a/examples/getText.hs +++ b/examples/getText.hs @@ -1,37 +1,34 @@ +import Control.Monad ((>=>)) import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.Map as Map (toList) import qualified Data.Text.IO as Text (putStrLn) -import PDF (Document(..), parseDocument) -import PDF.Layer (Layer, unify) -import PDF.Pages (Page(..), get, getAll) +import PDF (UnifiedLayers(..), parseDocument) +import PDF.Box (Box(..)) +import PDF.Layer (Layer) +import PDF.Pages (Page(..), PageNumber(..), Pages(..), cacheFonts) import System.Environment (getArgs) import System.Exit (die) import System.IO (BufferMode(..), hSetBuffering, stdout) -onDoc :: FilePath -> (Layer -> Either String a) -> IO a -onDoc inputFile f = do - layer <- fmap (unify . layers) . parseDocument <$> BS.readFile inputFile - case layer >>= f of - Left someError -> die someError - Right value -> return value - displayPage :: Page -> IO () displayPage = mapM_ Text.putStrLn . contents -wholeDoc :: FilePath -> IO () -wholeDoc inputFile = do - pages <- onDoc inputFile getAll - mapM_ (displayPage . snd) $ Map.toList pages +getAll :: Layer -> IO () +getAll layer = (cacheFonts $ r Pages layer) >>= mapM_ (displayPage . snd) . Map.toList -singlePage :: FilePath -> Int -> IO () -singlePage inputFile pageNumber = - onDoc inputFile (`get` pageNumber) >>= displayPage +get :: Int -> Layer -> IO () +get n layer = (cacheFonts $ r (P n) layer) >>= displayPage + +onDoc :: FilePath -> (Layer -> IO ()) -> IO () +onDoc inputFile f = do + (parseDocument <$> BS.readFile inputFile) + >>= either die (r UnifiedLayers >=> f) main :: IO () main = do hSetBuffering stdout LineBuffering args <- getArgs case args of - [inputFile] -> wholeDoc inputFile - [inputFile, pageNumber] -> singlePage inputFile (read pageNumber) + [inputFile] -> onDoc inputFile getAll + [inputFile, pageNumber] -> onDoc inputFile (get $ read pageNumber) _ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]" diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index 0eb8e36..c8fdc4d 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -8,10 +8,8 @@ module PDF.Pages ( Page(..) , PageNumber(..) - , Pages + , Pages(..) , cacheFonts - --, get - --, getAll ) where import Control.Applicative (Alternative(..)) @@ -48,7 +46,6 @@ type CachedFonts = Map ObjectId Font type T = RWST Layer () CachedFonts Error type PageContext = StateT CachedFonts type FontCache m = (MonadState CachedFonts m, PDFContent m) ---type PDFContent m = (MonadState Layer m, MonadFail m, MonadState CachedFonts m) data Page = Page { contents :: [Text] , source :: ObjectId @@ -131,24 +128,7 @@ instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m Pages L numbered :: [Page] -> Map Int Page numbered = Map.fromList . zip [1..] -instance Box T PageNumber Layer Page +instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m PageNumber Layer Page where cacheFonts :: Monad m => StateT CachedFonts m a -> m a cacheFonts = flip evalStateT Map.empty - -{- -getAll :: Layer -> Either String (Map Int Page) -getAll content = runError $ fst <$> evalRWST getPages content Map.empty - where - numbered = Map.fromList . zip [1..] - getPages = numbered <$> (mapM loadPage =<< pagesList) - -get :: Layer -> Int -> Either String Page -get content pageNumber - | pageNumber < 1 = Left "Pages start at 1" - | otherwise = runError $ fst <$> evalRWST getPage content Map.empty - where - firstPage [] = fail "Page is out of bounds" - firstPage (p:_) = loadPage p - getPage = drop (pageNumber - 1) <$> pagesList >>= firstPage --}