diff --git a/examples/getText.hs b/examples/getText.hs index 8326002..c70598e 100644 --- a/examples/getText.hs +++ b/examples/getText.hs @@ -1,47 +1,36 @@ {-# LANGUAGE FlexibleContexts #-} import Control.Monad ((>=>)) -import Control.Monad.IO.Class (MonadIO, liftIO) +import Control.Monad.IO.Class (liftIO) import Control.Monad.Reader (runReaderT) import qualified Data.ByteString.Char8 as BS (readFile) -import Data.Map (Map, foldlWithKey, mapWithKey) -import qualified Data.Map as Map (empty, insert, toList, union) +import Data.Map (mapWithKey) +import qualified Data.Map as Map (toList) import Data.OrderedMap (mapi) import qualified Data.Text as Text (unpack) -import qualified Data.Text.IO as Text (putStrLn) import PDF (UnifiedLayers(..), parseDocument) import PDF.Box (Box(..)) import PDF.Content.Text (Chunks(..)) import PDF.Layer (Layer) import PDF.Output (ObjectId(..)) import PDF.Pages ( - Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), Text_(..), cacheFonts, withResources + Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources ) import System.Environment (getArgs) import System.Exit (die) import System.IO (BufferMode(..), hSetBuffering, stdout) import Text.Printf (printf) -displayPage :: (MonadIO m, FontCache m) => Page -> m () -displayPage = - withResources (r Contents) - >=> sequence_ . mapi (\objectId content -> - r Chunks content -- >=> sequence_ . mapWithKey (display objectId) - ) - where - display = undefined - -{- -( - r Contents :: ReaderT FontSet m (OrderedMap ObjectId Content) - -- >=> sequenceA . mapi $ \(objectId, content) -> undefined +displayPage :: Page -> FontCache IO () +displayPage = withResources ( + r Contents + >=> sequence_ . mapi (\objectId -> + r Chunks >=> sequence_ . mapWithKey (display objectId) + ) ) - --sequenceA $ mapWithKey (display objectId) (r Chunks content) - -- >=> mapM_ Text.putStrLn where display a b v = liftIO . putStrLn $ printf "%d@%s: %s" (getObjectId a) (show b) (Text.unpack v) --} getAll :: Layer -> IO () getAll layer = diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index 2ec20a7..7bb6130 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -1,17 +1,14 @@ {-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE UndecidableInstances #-} module PDF.Pages ( Contents(..) , FontCache , Page(..) , PageNumber(..) , Pages(..) - , Text_(..) , cacheFonts , withResources ) where @@ -20,17 +17,15 @@ import Control.Applicative (Alternative, (<|>)) import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (ReaderT, runReaderT) -import Control.Monad.State (MonadState, StateT(..), evalStateT, modify) -import qualified Control.Monad.RWS as RWS (get) +import Control.Monad.State (StateT(..), evalStateT, gets, modify) +import Control.Monad.Trans (lift) import Data.OrderedMap (OrderedMap, build) -import Data.Map (Map, foldrWithKey, mapWithKey) +import Data.Map (Map) import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) -import Data.Text (Text) import PDF.Box (Box(..)) import PDF.CMap (cMap) -import PDF.Content (Content(..), Id(..), Indexed) +import PDF.Content (Content(..)) import qualified PDF.Content as Content (parse) -import PDF.Content.Text (Chunks(..)) import PDF.Encoding (encoding) import PDF.Font (Font, FontSet) import PDF.Layer (Layer(..)) @@ -46,41 +41,41 @@ import PDF.Output (ObjectId(..)) import Prelude hiding (fail) import Text.Printf (printf) +type Except m = (Alternative m, MonadFail m) +type InLayer m = ReaderT Layer m + type CachedFonts = Map ObjectId Font -type FontCache m = (MonadState CachedFonts m, PDFContent m) +type FontCache m = StateT CachedFonts (InLayer m) data Page = Page { - --contents :: [(ObjectId, Content)] contents :: OrderedMap ObjectId Content , resources :: Dictionary , source :: ObjectId } ---loadContents :: PDFContent m => DirectObject -> m [(ObjectId, Content)] -loadContents :: PDFContent m => DirectObject -> m (OrderedMap ObjectId Content) +loadContents :: Except m => DirectObject -> InLayer m (OrderedMap ObjectId Content) loadContents directObject = sequenceA . build loadContent $ objectIds directObject where - loadContent :: PDFContent m => ObjectId -> m Content + loadContent :: Except m => ObjectId -> InLayer m Content loadContent objectId = objectById objectId >>= r Clear >>= Content.parse objectIds (Array l) = l >>= getReference objectIds dirObj = getReference dirObj - --build f = fmap (\k -> f k >>= \v -> return (k, v)) -getFontDictionary :: PDFContent m => Object -> m Dictionary +getFontDictionary :: Except m => Object -> InLayer m Dictionary getFontDictionary pageObj = (pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty -cache :: FontCache m => (ObjectId -> m Font) -> ObjectId -> m Font +cache :: Except m => (ObjectId -> FontCache m Font) -> ObjectId -> FontCache m Font cache loader objectId = - (maybe load return . Map.lookup objectId) =<< RWS.get + gets (Map.lookup objectId) >>= maybe load return where load = do value <- loader objectId modify $ Map.insert objectId value return value -loadFont :: FontCache m => ObjectId -> m Font -loadFont objectId = objectById objectId >>= tryMappings +loadFont :: Except m => ObjectId -> FontCache m Font +loadFont objectId = lift $ objectById objectId >>= tryMappings where tryMappings object = (object >./ "ToUnicode" >>= r Clear >>= cMap) @@ -92,7 +87,7 @@ loadFont objectId = objectById objectId >>= tryMappings loadEncoding object = fail $ printf "Encoding must be a name, not that : %s" $ show object -loadResources :: FontCache m => Dictionary -> m FontSet +loadResources :: Except m => Dictionary -> FontCache m FontSet loadResources = foldM addFont Map.empty . Map.toList where addFont output (name, Reference (IndirectObjCoordinates {objectId})) = @@ -103,14 +98,14 @@ getReference :: DirectObject -> [ObjectId] getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId] getReference _ = [] -loadPage :: PDFContent m => ObjectId -> m Page +loadPage :: Except m => ObjectId -> InLayer m Page loadPage source = do page <- objectById source contents <- getKey "Contents" page >>= loadContents resources <- getFontDictionary page return $ Page {contents, resources, source} -pagesList :: PDFContent m => m [ObjectId] +pagesList :: Except m => InLayer m [ObjectId] pagesList = (origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences) <|> return [] @@ -121,7 +116,6 @@ pagesList = data Pages = Pages newtype PageNumber = P Int data Contents = Contents -data Text_ = Text_ instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer @@ -137,25 +131,12 @@ instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where firstPage [] = fail "Page is out of bounds" firstPage (p:_) = loadPage p ---instance Monad m => Box m Contents Page [(ObjectId, Content)] where instance Monad m => Box m Contents Page (OrderedMap ObjectId Content) where r Contents = return . contents -{- -instance FontCache m => Box m Text_ Page (OrderedMap ObjectId (Indexed Text)) where ---instance FontCache m => Box m Text_ Page [((ObjectId, Id Text), Text)] where - r Text_ (Page {contents, resources}) = - loadResources resources - >>= runReaderT (mapM (r Chunks) contents) - -- >>= runReaderT (concat <$> mapM (r Chunks . snd) contents) - where - mixIn prefix = foldrWithKey (\k v m -> ((prefix, k), v):m) [] - extractChunks (objectId, content) = mixIn objectId <$> r Chunks content --} - cacheFonts :: Monad m => StateT CachedFonts m a -> m a cacheFonts = flip evalStateT Map.empty -withResources :: FontCache m => (Page -> ReaderT FontSet m b) -> Page -> m b +withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b withResources f p = - loadResources (resources p) >>= runReaderT (f p) + loadResources (resources p) >>= lift . lift . runReaderT (f p)