Use a defined monadic stack in Pages to lift the MonadReader ambiguity and allow finishing to reimplement getText demo
This commit is contained in:
parent
40475a3093
commit
0f857c457d
2 changed files with 30 additions and 60 deletions
|
@ -1,47 +1,36 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
import Control.Monad ((>=>))
|
||||
import Control.Monad.IO.Class (MonadIO, liftIO)
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad.Reader (runReaderT)
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import Data.Map (Map, foldlWithKey, mapWithKey)
|
||||
import qualified Data.Map as Map (empty, insert, toList, union)
|
||||
import Data.Map (mapWithKey)
|
||||
import qualified Data.Map as Map (toList)
|
||||
import Data.OrderedMap (mapi)
|
||||
import qualified Data.Text as Text (unpack)
|
||||
import qualified Data.Text.IO as Text (putStrLn)
|
||||
import PDF (UnifiedLayers(..), parseDocument)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Content.Text (Chunks(..))
|
||||
import PDF.Layer (Layer)
|
||||
import PDF.Output (ObjectId(..))
|
||||
import PDF.Pages (
|
||||
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), Text_(..), cacheFonts, withResources
|
||||
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources
|
||||
)
|
||||
import System.Environment (getArgs)
|
||||
import System.Exit (die)
|
||||
import System.IO (BufferMode(..), hSetBuffering, stdout)
|
||||
import Text.Printf (printf)
|
||||
|
||||
displayPage :: (MonadIO m, FontCache m) => Page -> m ()
|
||||
displayPage =
|
||||
withResources (r Contents)
|
||||
>=> sequence_ . mapi (\objectId content ->
|
||||
r Chunks content -- >=> sequence_ . mapWithKey (display objectId)
|
||||
)
|
||||
where
|
||||
display = undefined
|
||||
|
||||
{-
|
||||
(
|
||||
r Contents :: ReaderT FontSet m (OrderedMap ObjectId Content)
|
||||
-- >=> sequenceA . mapi $ \(objectId, content) -> undefined
|
||||
displayPage :: Page -> FontCache IO ()
|
||||
displayPage = withResources (
|
||||
r Contents
|
||||
>=> sequence_ . mapi (\objectId ->
|
||||
r Chunks >=> sequence_ . mapWithKey (display objectId)
|
||||
)
|
||||
)
|
||||
--sequenceA $ mapWithKey (display objectId) (r Chunks content)
|
||||
-- >=> mapM_ Text.putStrLn
|
||||
where
|
||||
display a b v =
|
||||
liftIO . putStrLn $
|
||||
printf "%d@%s: %s" (getObjectId a) (show b) (Text.unpack v)
|
||||
-}
|
||||
|
||||
getAll :: Layer -> IO ()
|
||||
getAll layer =
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE TypeSynonymInstances #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE UndecidableInstances #-}
|
||||
module PDF.Pages (
|
||||
Contents(..)
|
||||
, FontCache
|
||||
, Page(..)
|
||||
, PageNumber(..)
|
||||
, Pages(..)
|
||||
, Text_(..)
|
||||
, cacheFonts
|
||||
, withResources
|
||||
) where
|
||||
|
@ -20,17 +17,15 @@ import Control.Applicative (Alternative, (<|>))
|
|||
import Control.Monad (foldM)
|
||||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Control.Monad.Reader (ReaderT, runReaderT)
|
||||
import Control.Monad.State (MonadState, StateT(..), evalStateT, modify)
|
||||
import qualified Control.Monad.RWS as RWS (get)
|
||||
import Control.Monad.State (StateT(..), evalStateT, gets, modify)
|
||||
import Control.Monad.Trans (lift)
|
||||
import Data.OrderedMap (OrderedMap, build)
|
||||
import Data.Map (Map, foldrWithKey, mapWithKey)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
||||
import Data.Text (Text)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.CMap (cMap)
|
||||
import PDF.Content (Content(..), Id(..), Indexed)
|
||||
import PDF.Content (Content(..))
|
||||
import qualified PDF.Content as Content (parse)
|
||||
import PDF.Content.Text (Chunks(..))
|
||||
import PDF.Encoding (encoding)
|
||||
import PDF.Font (Font, FontSet)
|
||||
import PDF.Layer (Layer(..))
|
||||
|
@ -46,41 +41,41 @@ import PDF.Output (ObjectId(..))
|
|||
import Prelude hiding (fail)
|
||||
import Text.Printf (printf)
|
||||
|
||||
type Except m = (Alternative m, MonadFail m)
|
||||
type InLayer m = ReaderT Layer m
|
||||
|
||||
type CachedFonts = Map ObjectId Font
|
||||
type FontCache m = (MonadState CachedFonts m, PDFContent m)
|
||||
type FontCache m = StateT CachedFonts (InLayer m)
|
||||
data Page = Page {
|
||||
--contents :: [(ObjectId, Content)]
|
||||
contents :: OrderedMap ObjectId Content
|
||||
, resources :: Dictionary
|
||||
, source :: ObjectId
|
||||
}
|
||||
|
||||
--loadContents :: PDFContent m => DirectObject -> m [(ObjectId, Content)]
|
||||
loadContents :: PDFContent m => DirectObject -> m (OrderedMap ObjectId Content)
|
||||
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap ObjectId Content)
|
||||
loadContents directObject =
|
||||
sequenceA . build loadContent $ objectIds directObject
|
||||
where
|
||||
loadContent :: PDFContent m => ObjectId -> m Content
|
||||
loadContent :: Except m => ObjectId -> InLayer m Content
|
||||
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
|
||||
objectIds (Array l) = l >>= getReference
|
||||
objectIds dirObj = getReference dirObj
|
||||
--build f = fmap (\k -> f k >>= \v -> return (k, v))
|
||||
|
||||
getFontDictionary :: PDFContent m => Object -> m Dictionary
|
||||
getFontDictionary :: Except m => Object -> InLayer m Dictionary
|
||||
getFontDictionary pageObj =
|
||||
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
|
||||
|
||||
cache :: FontCache m => (ObjectId -> m Font) -> ObjectId -> m Font
|
||||
cache :: Except m => (ObjectId -> FontCache m Font) -> ObjectId -> FontCache m Font
|
||||
cache loader objectId =
|
||||
(maybe load return . Map.lookup objectId) =<< RWS.get
|
||||
gets (Map.lookup objectId) >>= maybe load return
|
||||
where
|
||||
load = do
|
||||
value <- loader objectId
|
||||
modify $ Map.insert objectId value
|
||||
return value
|
||||
|
||||
loadFont :: FontCache m => ObjectId -> m Font
|
||||
loadFont objectId = objectById objectId >>= tryMappings
|
||||
loadFont :: Except m => ObjectId -> FontCache m Font
|
||||
loadFont objectId = lift $ objectById objectId >>= tryMappings
|
||||
where
|
||||
tryMappings object =
|
||||
(object >./ "ToUnicode" >>= r Clear >>= cMap)
|
||||
|
@ -92,7 +87,7 @@ loadFont objectId = objectById objectId >>= tryMappings
|
|||
loadEncoding object =
|
||||
fail $ printf "Encoding must be a name, not that : %s" $ show object
|
||||
|
||||
loadResources :: FontCache m => Dictionary -> m FontSet
|
||||
loadResources :: Except m => Dictionary -> FontCache m FontSet
|
||||
loadResources = foldM addFont Map.empty . Map.toList
|
||||
where
|
||||
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
|
||||
|
@ -103,14 +98,14 @@ getReference :: DirectObject -> [ObjectId]
|
|||
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
|
||||
getReference _ = []
|
||||
|
||||
loadPage :: PDFContent m => ObjectId -> m Page
|
||||
loadPage :: Except m => ObjectId -> InLayer m Page
|
||||
loadPage source = do
|
||||
page <- objectById source
|
||||
contents <- getKey "Contents" page >>= loadContents
|
||||
resources <- getFontDictionary page
|
||||
return $ Page {contents, resources, source}
|
||||
|
||||
pagesList :: PDFContent m => m [ObjectId]
|
||||
pagesList :: Except m => InLayer m [ObjectId]
|
||||
pagesList =
|
||||
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
|
||||
<|> return []
|
||||
|
@ -121,7 +116,6 @@ pagesList =
|
|||
data Pages = Pages
|
||||
newtype PageNumber = P Int
|
||||
data Contents = Contents
|
||||
data Text_ = Text_
|
||||
|
||||
instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where
|
||||
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
|
||||
|
@ -137,25 +131,12 @@ instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
|
|||
firstPage [] = fail "Page is out of bounds"
|
||||
firstPage (p:_) = loadPage p
|
||||
|
||||
--instance Monad m => Box m Contents Page [(ObjectId, Content)] where
|
||||
instance Monad m => Box m Contents Page (OrderedMap ObjectId Content) where
|
||||
r Contents = return . contents
|
||||
|
||||
{-
|
||||
instance FontCache m => Box m Text_ Page (OrderedMap ObjectId (Indexed Text)) where
|
||||
--instance FontCache m => Box m Text_ Page [((ObjectId, Id Text), Text)] where
|
||||
r Text_ (Page {contents, resources}) =
|
||||
loadResources resources
|
||||
>>= runReaderT (mapM (r Chunks) contents)
|
||||
-- >>= runReaderT (concat <$> mapM (r Chunks . snd) contents)
|
||||
where
|
||||
mixIn prefix = foldrWithKey (\k v m -> ((prefix, k), v):m) []
|
||||
extractChunks (objectId, content) = mixIn objectId <$> r Chunks content
|
||||
-}
|
||||
|
||||
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
|
||||
cacheFonts = flip evalStateT Map.empty
|
||||
|
||||
withResources :: FontCache m => (Page -> ReaderT FontSet m b) -> Page -> m b
|
||||
withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b
|
||||
withResources f p =
|
||||
loadResources (resources p) >>= runReaderT (f p)
|
||||
loadResources (resources p) >>= lift . lift . runReaderT (f p)
|
||||
|
|
Loading…
Reference in a new issue