Use a defined monadic stack in Pages to lift the MonadReader ambiguity and allow finishing to reimplement getText demo

This commit is contained in:
Tissevert 2020-03-14 16:57:16 +01:00
parent 40475a3093
commit 0f857c457d
2 changed files with 30 additions and 60 deletions

View file

@ -1,47 +1,36 @@
{-# LANGUAGE FlexibleContexts #-}
import Control.Monad ((>=>))
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Reader (runReaderT)
import qualified Data.ByteString.Char8 as BS (readFile)
import Data.Map (Map, foldlWithKey, mapWithKey)
import qualified Data.Map as Map (empty, insert, toList, union)
import Data.Map (mapWithKey)
import qualified Data.Map as Map (toList)
import Data.OrderedMap (mapi)
import qualified Data.Text as Text (unpack)
import qualified Data.Text.IO as Text (putStrLn)
import PDF (UnifiedLayers(..), parseDocument)
import PDF.Box (Box(..))
import PDF.Content.Text (Chunks(..))
import PDF.Layer (Layer)
import PDF.Output (ObjectId(..))
import PDF.Pages (
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), Text_(..), cacheFonts, withResources
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), cacheFonts, withResources
)
import System.Environment (getArgs)
import System.Exit (die)
import System.IO (BufferMode(..), hSetBuffering, stdout)
import Text.Printf (printf)
displayPage :: (MonadIO m, FontCache m) => Page -> m ()
displayPage =
withResources (r Contents)
>=> sequence_ . mapi (\objectId content ->
r Chunks content -- >=> sequence_ . mapWithKey (display objectId)
)
where
display = undefined
{-
(
r Contents :: ReaderT FontSet m (OrderedMap ObjectId Content)
-- >=> sequenceA . mapi $ \(objectId, content) -> undefined
displayPage :: Page -> FontCache IO ()
displayPage = withResources (
r Contents
>=> sequence_ . mapi (\objectId ->
r Chunks >=> sequence_ . mapWithKey (display objectId)
)
)
--sequenceA $ mapWithKey (display objectId) (r Chunks content)
-- >=> mapM_ Text.putStrLn
where
display a b v =
liftIO . putStrLn $
printf "%d@%s: %s" (getObjectId a) (show b) (Text.unpack v)
-}
getAll :: Layer -> IO ()
getAll layer =

View file

@ -1,17 +1,14 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
module PDF.Pages (
Contents(..)
, FontCache
, Page(..)
, PageNumber(..)
, Pages(..)
, Text_(..)
, cacheFonts
, withResources
) where
@ -20,17 +17,15 @@ import Control.Applicative (Alternative, (<|>))
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (MonadState, StateT(..), evalStateT, modify)
import qualified Control.Monad.RWS as RWS (get)
import Control.Monad.State (StateT(..), evalStateT, gets, modify)
import Control.Monad.Trans (lift)
import Data.OrderedMap (OrderedMap, build)
import Data.Map (Map, foldrWithKey, mapWithKey)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.Text (Text)
import PDF.Box (Box(..))
import PDF.CMap (cMap)
import PDF.Content (Content(..), Id(..), Indexed)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Content.Text (Chunks(..))
import PDF.Encoding (encoding)
import PDF.Font (Font, FontSet)
import PDF.Layer (Layer(..))
@ -46,41 +41,41 @@ import PDF.Output (ObjectId(..))
import Prelude hiding (fail)
import Text.Printf (printf)
type Except m = (Alternative m, MonadFail m)
type InLayer m = ReaderT Layer m
type CachedFonts = Map ObjectId Font
type FontCache m = (MonadState CachedFonts m, PDFContent m)
type FontCache m = StateT CachedFonts (InLayer m)
data Page = Page {
--contents :: [(ObjectId, Content)]
contents :: OrderedMap ObjectId Content
, resources :: Dictionary
, source :: ObjectId
}
--loadContents :: PDFContent m => DirectObject -> m [(ObjectId, Content)]
loadContents :: PDFContent m => DirectObject -> m (OrderedMap ObjectId Content)
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap ObjectId Content)
loadContents directObject =
sequenceA . build loadContent $ objectIds directObject
where
loadContent :: PDFContent m => ObjectId -> m Content
loadContent :: Except m => ObjectId -> InLayer m Content
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
objectIds (Array l) = l >>= getReference
objectIds dirObj = getReference dirObj
--build f = fmap (\k -> f k >>= \v -> return (k, v))
getFontDictionary :: PDFContent m => Object -> m Dictionary
getFontDictionary :: Except m => Object -> InLayer m Dictionary
getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
cache :: FontCache m => (ObjectId -> m Font) -> ObjectId -> m Font
cache :: Except m => (ObjectId -> FontCache m Font) -> ObjectId -> FontCache m Font
cache loader objectId =
(maybe load return . Map.lookup objectId) =<< RWS.get
gets (Map.lookup objectId) >>= maybe load return
where
load = do
value <- loader objectId
modify $ Map.insert objectId value
return value
loadFont :: FontCache m => ObjectId -> m Font
loadFont objectId = objectById objectId >>= tryMappings
loadFont :: Except m => ObjectId -> FontCache m Font
loadFont objectId = lift $ objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r Clear >>= cMap)
@ -92,7 +87,7 @@ loadFont objectId = objectById objectId >>= tryMappings
loadEncoding object =
fail $ printf "Encoding must be a name, not that : %s" $ show object
loadResources :: FontCache m => Dictionary -> m FontSet
loadResources :: Except m => Dictionary -> FontCache m FontSet
loadResources = foldM addFont Map.empty . Map.toList
where
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
@ -103,14 +98,14 @@ getReference :: DirectObject -> [ObjectId]
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
getReference _ = []
loadPage :: PDFContent m => ObjectId -> m Page
loadPage :: Except m => ObjectId -> InLayer m Page
loadPage source = do
page <- objectById source
contents <- getKey "Contents" page >>= loadContents
resources <- getFontDictionary page
return $ Page {contents, resources, source}
pagesList :: PDFContent m => m [ObjectId]
pagesList :: Except m => InLayer m [ObjectId]
pagesList =
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
<|> return []
@ -121,7 +116,6 @@ pagesList =
data Pages = Pages
newtype PageNumber = P Int
data Contents = Contents
data Text_ = Text_
instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
@ -137,25 +131,12 @@ instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
--instance Monad m => Box m Contents Page [(ObjectId, Content)] where
instance Monad m => Box m Contents Page (OrderedMap ObjectId Content) where
r Contents = return . contents
{-
instance FontCache m => Box m Text_ Page (OrderedMap ObjectId (Indexed Text)) where
--instance FontCache m => Box m Text_ Page [((ObjectId, Id Text), Text)] where
r Text_ (Page {contents, resources}) =
loadResources resources
>>= runReaderT (mapM (r Chunks) contents)
-- >>= runReaderT (concat <$> mapM (r Chunks . snd) contents)
where
mixIn prefix = foldrWithKey (\k v m -> ((prefix, k), v):m) []
extractChunks (objectId, content) = mixIn objectId <$> r Chunks content
-}
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
cacheFonts = flip evalStateT Map.empty
withResources :: FontCache m => (Page -> ReaderT FontSet m b) -> Page -> m b
withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b
withResources f p =
loadResources (resources p) >>= runReaderT (f p)
loadResources (resources p) >>= lift . lift . runReaderT (f p)