Actually re-implement getText with the simpler Box instance
This commit is contained in:
parent
93c9863426
commit
309f6ed461
2 changed files with 18 additions and 41 deletions
|
@ -1,37 +1,34 @@
|
|||
import Control.Monad ((>=>))
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.Map as Map (toList)
|
||||
import qualified Data.Text.IO as Text (putStrLn)
|
||||
import PDF (Document(..), parseDocument)
|
||||
import PDF.Layer (Layer, unify)
|
||||
import PDF.Pages (Page(..), get, getAll)
|
||||
import PDF (UnifiedLayers(..), parseDocument)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Layer (Layer)
|
||||
import PDF.Pages (Page(..), PageNumber(..), Pages(..), cacheFonts)
|
||||
import System.Environment (getArgs)
|
||||
import System.Exit (die)
|
||||
import System.IO (BufferMode(..), hSetBuffering, stdout)
|
||||
|
||||
onDoc :: FilePath -> (Layer -> Either String a) -> IO a
|
||||
onDoc inputFile f = do
|
||||
layer <- fmap (unify . layers) . parseDocument <$> BS.readFile inputFile
|
||||
case layer >>= f of
|
||||
Left someError -> die someError
|
||||
Right value -> return value
|
||||
|
||||
displayPage :: Page -> IO ()
|
||||
displayPage = mapM_ Text.putStrLn . contents
|
||||
|
||||
wholeDoc :: FilePath -> IO ()
|
||||
wholeDoc inputFile = do
|
||||
pages <- onDoc inputFile getAll
|
||||
mapM_ (displayPage . snd) $ Map.toList pages
|
||||
getAll :: Layer -> IO ()
|
||||
getAll layer = (cacheFonts $ r Pages layer) >>= mapM_ (displayPage . snd) . Map.toList
|
||||
|
||||
singlePage :: FilePath -> Int -> IO ()
|
||||
singlePage inputFile pageNumber =
|
||||
onDoc inputFile (`get` pageNumber) >>= displayPage
|
||||
get :: Int -> Layer -> IO ()
|
||||
get n layer = (cacheFonts $ r (P n) layer) >>= displayPage
|
||||
|
||||
onDoc :: FilePath -> (Layer -> IO ()) -> IO ()
|
||||
onDoc inputFile f = do
|
||||
(parseDocument <$> BS.readFile inputFile)
|
||||
>>= either die (r UnifiedLayers >=> f)
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
hSetBuffering stdout LineBuffering
|
||||
args <- getArgs
|
||||
case args of
|
||||
[inputFile] -> wholeDoc inputFile
|
||||
[inputFile, pageNumber] -> singlePage inputFile (read pageNumber)
|
||||
[inputFile] -> onDoc inputFile getAll
|
||||
[inputFile, pageNumber] -> onDoc inputFile (get $ read pageNumber)
|
||||
_ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"
|
||||
|
|
|
@ -8,10 +8,8 @@
|
|||
module PDF.Pages (
|
||||
Page(..)
|
||||
, PageNumber(..)
|
||||
, Pages
|
||||
, Pages(..)
|
||||
, cacheFonts
|
||||
--, get
|
||||
--, getAll
|
||||
) where
|
||||
|
||||
import Control.Applicative (Alternative(..))
|
||||
|
@ -48,7 +46,6 @@ type CachedFonts = Map ObjectId Font
|
|||
type T = RWST Layer () CachedFonts Error
|
||||
type PageContext = StateT CachedFonts
|
||||
type FontCache m = (MonadState CachedFonts m, PDFContent m)
|
||||
--type PDFContent m = (MonadState Layer m, MonadFail m, MonadState CachedFonts m)
|
||||
data Page = Page {
|
||||
contents :: [Text]
|
||||
, source :: ObjectId
|
||||
|
@ -131,24 +128,7 @@ instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m Pages L
|
|||
numbered :: [Page] -> Map Int Page
|
||||
numbered = Map.fromList . zip [1..]
|
||||
|
||||
instance Box T PageNumber Layer Page
|
||||
instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m PageNumber Layer Page where
|
||||
|
||||
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
|
||||
cacheFonts = flip evalStateT Map.empty
|
||||
|
||||
{-
|
||||
getAll :: Layer -> Either String (Map Int Page)
|
||||
getAll content = runError $ fst <$> evalRWST getPages content Map.empty
|
||||
where
|
||||
numbered = Map.fromList . zip [1..]
|
||||
getPages = numbered <$> (mapM loadPage =<< pagesList)
|
||||
|
||||
get :: Layer -> Int -> Either String Page
|
||||
get content pageNumber
|
||||
| pageNumber < 1 = Left "Pages start at 1"
|
||||
| otherwise = runError $ fst <$> evalRWST getPage content Map.empty
|
||||
where
|
||||
firstPage [] = fail "Page is out of bounds"
|
||||
firstPage (p:_) = loadPage p
|
||||
getPage = drop (pageNumber - 1) <$> pagesList >>= firstPage
|
||||
-}
|
||||
|
|
Loading…
Reference in a new issue