Actually re-implement getText with the simpler Box instance

This commit is contained in:
Tissevert 2020-03-04 18:19:10 +01:00
parent 93c9863426
commit 309f6ed461
2 changed files with 18 additions and 41 deletions

View file

@ -1,37 +1,34 @@
import Control.Monad ((>=>))
import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.Map as Map (toList) import qualified Data.Map as Map (toList)
import qualified Data.Text.IO as Text (putStrLn) import qualified Data.Text.IO as Text (putStrLn)
import PDF (Document(..), parseDocument) import PDF (UnifiedLayers(..), parseDocument)
import PDF.Layer (Layer, unify) import PDF.Box (Box(..))
import PDF.Pages (Page(..), get, getAll) import PDF.Layer (Layer)
import PDF.Pages (Page(..), PageNumber(..), Pages(..), cacheFonts)
import System.Environment (getArgs) import System.Environment (getArgs)
import System.Exit (die) import System.Exit (die)
import System.IO (BufferMode(..), hSetBuffering, stdout) import System.IO (BufferMode(..), hSetBuffering, stdout)
onDoc :: FilePath -> (Layer -> Either String a) -> IO a
onDoc inputFile f = do
layer <- fmap (unify . layers) . parseDocument <$> BS.readFile inputFile
case layer >>= f of
Left someError -> die someError
Right value -> return value
displayPage :: Page -> IO () displayPage :: Page -> IO ()
displayPage = mapM_ Text.putStrLn . contents displayPage = mapM_ Text.putStrLn . contents
wholeDoc :: FilePath -> IO () getAll :: Layer -> IO ()
wholeDoc inputFile = do getAll layer = (cacheFonts $ r Pages layer) >>= mapM_ (displayPage . snd) . Map.toList
pages <- onDoc inputFile getAll
mapM_ (displayPage . snd) $ Map.toList pages
singlePage :: FilePath -> Int -> IO () get :: Int -> Layer -> IO ()
singlePage inputFile pageNumber = get n layer = (cacheFonts $ r (P n) layer) >>= displayPage
onDoc inputFile (`get` pageNumber) >>= displayPage
onDoc :: FilePath -> (Layer -> IO ()) -> IO ()
onDoc inputFile f = do
(parseDocument <$> BS.readFile inputFile)
>>= either die (r UnifiedLayers >=> f)
main :: IO () main :: IO ()
main = do main = do
hSetBuffering stdout LineBuffering hSetBuffering stdout LineBuffering
args <- getArgs args <- getArgs
case args of case args of
[inputFile] -> wholeDoc inputFile [inputFile] -> onDoc inputFile getAll
[inputFile, pageNumber] -> singlePage inputFile (read pageNumber) [inputFile, pageNumber] -> onDoc inputFile (get $ read pageNumber)
_ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]" _ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"

View file

@ -8,10 +8,8 @@
module PDF.Pages ( module PDF.Pages (
Page(..) Page(..)
, PageNumber(..) , PageNumber(..)
, Pages , Pages(..)
, cacheFonts , cacheFonts
--, get
--, getAll
) where ) where
import Control.Applicative (Alternative(..)) import Control.Applicative (Alternative(..))
@ -48,7 +46,6 @@ type CachedFonts = Map ObjectId Font
type T = RWST Layer () CachedFonts Error type T = RWST Layer () CachedFonts Error
type PageContext = StateT CachedFonts type PageContext = StateT CachedFonts
type FontCache m = (MonadState CachedFonts m, PDFContent m) type FontCache m = (MonadState CachedFonts m, PDFContent m)
--type PDFContent m = (MonadState Layer m, MonadFail m, MonadState CachedFonts m)
data Page = Page { data Page = Page {
contents :: [Text] contents :: [Text]
, source :: ObjectId , source :: ObjectId
@ -131,24 +128,7 @@ instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m Pages L
numbered :: [Page] -> Map Int Page numbered :: [Page] -> Map Int Page
numbered = Map.fromList . zip [1..] numbered = Map.fromList . zip [1..]
instance Box T PageNumber Layer Page instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m PageNumber Layer Page where
cacheFonts :: Monad m => StateT CachedFonts m a -> m a cacheFonts :: Monad m => StateT CachedFonts m a -> m a
cacheFonts = flip evalStateT Map.empty cacheFonts = flip evalStateT Map.empty
{-
getAll :: Layer -> Either String (Map Int Page)
getAll content = runError $ fst <$> evalRWST getPages content Map.empty
where
numbered = Map.fromList . zip [1..]
getPages = numbered <$> (mapM loadPage =<< pagesList)
get :: Layer -> Int -> Either String Page
get content pageNumber
| pageNumber < 1 = Left "Pages start at 1"
| otherwise = runError $ fst <$> evalRWST getPage content Map.empty
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
getPage = drop (pageNumber - 1) <$> pagesList >>= firstPage
-}