Actually re-implement getText with the simpler Box instance

This commit is contained in:
Tissevert 2020-03-04 18:19:10 +01:00
parent 93c9863426
commit 309f6ed461
2 changed files with 18 additions and 41 deletions

View file

@ -1,37 +1,34 @@
import Control.Monad ((>=>))
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.Map as Map (toList)
import qualified Data.Text.IO as Text (putStrLn)
import PDF (Document(..), parseDocument)
import PDF.Layer (Layer, unify)
import PDF.Pages (Page(..), get, getAll)
import PDF (UnifiedLayers(..), parseDocument)
import PDF.Box (Box(..))
import PDF.Layer (Layer)
import PDF.Pages (Page(..), PageNumber(..), Pages(..), cacheFonts)
import System.Environment (getArgs)
import System.Exit (die)
import System.IO (BufferMode(..), hSetBuffering, stdout)
onDoc :: FilePath -> (Layer -> Either String a) -> IO a
onDoc inputFile f = do
layer <- fmap (unify . layers) . parseDocument <$> BS.readFile inputFile
case layer >>= f of
Left someError -> die someError
Right value -> return value
displayPage :: Page -> IO ()
displayPage = mapM_ Text.putStrLn . contents
wholeDoc :: FilePath -> IO ()
wholeDoc inputFile = do
pages <- onDoc inputFile getAll
mapM_ (displayPage . snd) $ Map.toList pages
getAll :: Layer -> IO ()
getAll layer = (cacheFonts $ r Pages layer) >>= mapM_ (displayPage . snd) . Map.toList
singlePage :: FilePath -> Int -> IO ()
singlePage inputFile pageNumber =
onDoc inputFile (`get` pageNumber) >>= displayPage
get :: Int -> Layer -> IO ()
get n layer = (cacheFonts $ r (P n) layer) >>= displayPage
onDoc :: FilePath -> (Layer -> IO ()) -> IO ()
onDoc inputFile f = do
(parseDocument <$> BS.readFile inputFile)
>>= either die (r UnifiedLayers >=> f)
main :: IO ()
main = do
hSetBuffering stdout LineBuffering
args <- getArgs
case args of
[inputFile] -> wholeDoc inputFile
[inputFile, pageNumber] -> singlePage inputFile (read pageNumber)
[inputFile] -> onDoc inputFile getAll
[inputFile, pageNumber] -> onDoc inputFile (get $ read pageNumber)
_ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"

View file

@ -8,10 +8,8 @@
module PDF.Pages (
Page(..)
, PageNumber(..)
, Pages
, Pages(..)
, cacheFonts
--, get
--, getAll
) where
import Control.Applicative (Alternative(..))
@ -48,7 +46,6 @@ type CachedFonts = Map ObjectId Font
type T = RWST Layer () CachedFonts Error
type PageContext = StateT CachedFonts
type FontCache m = (MonadState CachedFonts m, PDFContent m)
--type PDFContent m = (MonadState Layer m, MonadFail m, MonadState CachedFonts m)
data Page = Page {
contents :: [Text]
, source :: ObjectId
@ -131,24 +128,7 @@ instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m Pages L
numbered :: [Page] -> Map Int Page
numbered = Map.fromList . zip [1..]
instance Box T PageNumber Layer Page
instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m PageNumber Layer Page where
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
cacheFonts = flip evalStateT Map.empty
{-
getAll :: Layer -> Either String (Map Int Page)
getAll content = runError $ fst <$> evalRWST getPages content Map.empty
where
numbered = Map.fromList . zip [1..]
getPages = numbered <$> (mapM loadPage =<< pagesList)
get :: Layer -> Int -> Either String Page
get content pageNumber
| pageNumber < 1 = Left "Pages start at 1"
| otherwise = runError $ fst <$> evalRWST getPage content Map.empty
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
getPage = drop (pageNumber - 1) <$> pagesList >>= firstPage
-}