{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} module PDF.Pages ( Contents(..) , FontCache , Page(..) , PageNumber(..) , Pages(..) , cacheFonts , withResources ) where import Control.Applicative (Alternative, (<|>)) import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (ReaderT, runReaderT) import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify) import Control.Monad.Trans (lift) import Data.ByteString.Lazy (toStrict) import Data.Id (Id) import Data.Map (Map) import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) import Data.OrderedMap (OrderedMap, build, mapi) import PDF.Box (Box(..), at, edit) import PDF.CMap (cMap) import PDF.Content (Content(..)) import qualified PDF.Content as Content (parse) import PDF.Encoding (encoding) import PDF.EOL (Style(..)) import PDF.Font (Font, FontSet) import PDF.Layer (Layer(..), Objects(..)) import PDF.Object ( Dictionary, DirectObject(..), IndirectObjCoordinates(..) , Name(..), Object(..) ,) import PDF.Object.Navigation ( Clear(..), (//), (>./), (>//), getDictionary , getKey, objectById, origin ) import PDF.Output (render) import Prelude hiding (fail) import Text.Printf (printf) type Except m = (Alternative m, MonadFail m) type InLayer m = ReaderT Layer m type CachedFonts = Map (Id Object) Font type FontCache m = StateT CachedFonts (InLayer m) data Page = Page { contents :: OrderedMap (Id Object) Content , resources :: Dictionary , source :: (Id Object) } loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content) loadContents directObject = sequenceA . build loadContent $ objectIds directObject where loadContent :: Except m => (Id Object) -> InLayer m Content loadContent objectId = objectById objectId >>= r Clear >>= Content.parse objectIds (Array l) = l >>= getReference objectIds dirObj = getReference dirObj getFontDictionary :: Except m => Object -> InLayer m Dictionary getFontDictionary pageObj = (pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font cache loader objectId = gets (Map.lookup objectId) >>= maybe load return where load = do value <- loader objectId modify $ Map.insert objectId value return value loadFont :: Except m => (Id Object) -> FontCache m Font loadFont objectId = lift $ objectById objectId >>= tryMappings where tryMappings object = (object >./ "ToUnicode" >>= r Clear >>= cMap) <|> (object >./ "Encoding" >>= loadEncoding) <|> (fail $ unknownFormat (show objectId) (show object)) unknownFormat = printf "Unknown font format for object #%s : %s" loadEncoding :: MonadFail m => Object -> m Font loadEncoding (Direct (NameObject (Name name))) = encoding name loadEncoding object = fail $ printf "Encoding must be a name, not that : %s" $ show object loadResources :: Except m => Dictionary -> FontCache m FontSet loadResources = foldM addFont Map.empty . Map.toList where addFont output (name, Reference (IndirectObjCoordinates {objectId})) = flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output getReference :: DirectObject -> [(Id Object)] getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId] getReference _ = [] loadPage :: Except m => (Id Object) -> InLayer m Page loadPage source = do page <- objectById source contents <- getKey "Contents" page >>= loadContents resources <- getFontDictionary page return $ Page {contents, resources, source} pagesList :: Except m => InLayer m [(Id Object)] pagesList = (origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences) <|> return [] where getReferences (Array kids) = kids >>= getReference getReferences _ = fail "Not a pages array" updatePage :: MonadFail m => Page -> StateT Layer m () updatePage (Page {contents}) = sequence_ $ mapi updateContent contents where updateContent source content = edit . at Objects . at source . at Clear $ setContent content setContent content _ = return . toStrict $ render LF content data Pages = Pages newtype PageNumber = P Int data Contents = Contents instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer where numbered :: [Page] -> Map Int Page numbered = Map.fromList . zip [1..] w Pages pages = execStateT $ mapM_ updatePage pages instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where r (P i) layer | i < 1 = fail "Pages start at 1" | otherwise = runReaderT (drop (i - 1) <$> pagesList >>= firstPage) layer where firstPage [] = fail "Page is out of bounds" firstPage (p:_) = loadPage p w _ page = execStateT $ updatePage page instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where r Contents = return . contents w _ contents page = return $ page {contents} cacheFonts :: Monad m => Layer -> FontCache m a -> m a cacheFonts layer = flip runReaderT layer . flip evalStateT Map.empty withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b withResources f p = loadResources (resources p) >>= lift . lift . runReaderT (f p)