diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index deee4ab..c4fd585 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -2,15 +2,24 @@ {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE UndecidableInstances #-} module PDF.Pages ( Page(..) - , get - , getAll + , PageNumber(..) + , Pages + , cacheFonts + --, get + --, getAll ) where +import Control.Applicative (Alternative(..)) import Control.Applicative ((<|>)) import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) +import Control.Monad.Reader (runReaderT) +import Control.Monad.State (MonadState, StateT(..), evalStateT) import Control.Monad.RWS (RWST(..), evalRWST, modify) import qualified Control.Monad.RWS as RWS (get) import Data.Map (Map) @@ -28,7 +37,7 @@ import PDF.Object ( , Name(..), Object(..) ,) import PDF.Object.Navigation ( - Error(..), StreamContent(..), (//), (>./), (>//), getDictionary, objectById + Error(..), PDFContent, StreamContent(..), (//), (>./), (>//), getDictionary, objectById , origin ) import PDF.Output (ObjectId(..)) @@ -37,17 +46,20 @@ import Text.Printf (printf) type CachedFonts = Map ObjectId Font type T = RWST Layer () CachedFonts Error +type PageContext = StateT CachedFonts +type FontCache m = (MonadState CachedFonts m, PDFContent m) +--type PDFContent m = (MonadState Layer m, MonadFail m, MonadState CachedFonts m) data Page = Page { contents :: [Text] , source :: ObjectId } -getFontDictionary :: Object -> T Dictionary +getFontDictionary :: PDFContent m => Object -> m Dictionary getFontDictionary pageObj = (pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty -cache :: (ObjectId -> T Font) -> ObjectId -> T Font +cache :: FontCache m => (ObjectId -> m Font) -> ObjectId -> m Font cache loader objectId = (maybe load return . Map.lookup objectId) =<< RWS.get where @@ -56,7 +68,7 @@ cache loader objectId = modify $ Map.insert objectId value return value -loadFont :: ObjectId -> T Font +loadFont :: FontCache m => ObjectId -> m Font loadFont objectId = objectById objectId >>= tryMappings where tryMappings object = @@ -64,15 +76,14 @@ loadFont objectId = objectById objectId >>= tryMappings <|> (object >./ "Encoding" >>= loadEncoding) <|> (fail $ unknownFormat (show objectId) (show object)) unknownFormat = printf "Unknown font format for object #%s : %s" - loadEncoding :: Object -> T Font + loadEncoding :: MonadFail m => Object -> m Font loadEncoding (Direct (NameObject (Name name))) = encoding name loadEncoding object = fail $ printf "Encoding must be a name, not that : %s" $ show object -loadFonts :: Dictionary -> T FontSet +loadFonts :: FontCache m => Dictionary -> m FontSet loadFonts = foldM addFont Map.empty . Map.toList where - addFont :: FontSet -> (Name, DirectObject) -> T FontSet addFont output (name, Reference (IndirectObjCoordinates {objectId})) = flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output @@ -81,7 +92,7 @@ several :: Object -> [Object] several (Direct (Array l)) = Direct <$> l several object = [object] -pagesList :: T [ObjectId] +pagesList :: PDFContent m => m [ObjectId] pagesList = do pages <- origin // ["Root", "Pages"] >>= getDictionary case Map.lookup (Name "Kids") pages of @@ -95,28 +106,37 @@ getReferences objects = do Reference (IndirectObjCoordinates {objectId}) -> [objectId] _ -> [] -extractText :: Object -> T [Text] +extractText :: FontCache m => Object -> m [Text] extractText pageObj = do fonts <- loadFonts =<< getFontDictionary pageObj objects <- several <$> pageObj >./ "Contents" concat <$> mapM (loadContent fonts) objects where - loadContent :: FontSet -> Object -> T [Text] loadContent fonts object = r StreamContent object >>= (either fail return . Content.parse) >>= renderText fonts -loadPage :: ObjectId -> T Page +loadPage :: FontCache m => ObjectId -> m Page loadPage source = do contents <- extractText =<< objectById source return $ Page {contents, source} data Pages = Pages +newtype PageNumber = P Int -instance Box T Pages Layer (Map ObjectId Page) -instance Box T ObjectId Layer Page +instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m Pages Layer (Map Int Page) where + r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer + where + numbered :: [Page] -> Map Int Page + numbered = Map.fromList . zip [1..] +instance Box T PageNumber Layer Page + +cacheFonts :: Monad m => StateT CachedFonts m a -> m a +cacheFonts = flip evalStateT Map.empty + +{- getAll :: Layer -> Either String (Map Int Page) getAll content = runError $ fst <$> evalRWST getPages content Map.empty where @@ -131,3 +151,4 @@ get content pageNumber firstPage [] = fail "Page is out of bounds" firstPage (p:_) = loadPage p getPage = drop (pageNumber - 1) <$> pagesList >>= firstPage +-}