{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} module PDF.Pages ( Contents(..) , FontCache , Page(..) , PageNumber(..) , Pages(..) , withFonts , withResources ) where import Control.Applicative (Alternative) import Control.Applicative ((<|>)) import Control.Monad (foldM) import Control.Monad.Except (MonadError(..)) import Control.Monad.Reader (ReaderT, runReaderT) import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify) import Control.Monad.Trans (lift) import Data.ByteString (ByteString) import Data.ByteString.Lazy (toStrict) import Data.Id (Id(..), IdMap) import qualified Data.Id as Id (empty, insert, lookup) import Data.Map (Map) import qualified Data.Map as Map (empty, elems, fromList, insert, toList) import Data.Maybe (listToMaybe) import Data.OrderedMap (OrderedMap, build, mapi) import PDF.Box (Box(..), at, edit, runRO) import qualified PDF.CMap as CMap (parse) import PDF.Content (Content(..)) import qualified PDF.Content as Content (parse) import PDF.Encoding (encoding) import PDF.EOL (Style(..)) import PDF.Font (Font, FontSet) import PDF.Layer (Layer(..), LayerReader) import PDF.Object ( Dictionary, DirectObject(..), IndirectObjCoordinates(..) , Name(..), Number(..), Object(..) ,) import PDF.Object.Navigation ( Nav(..), PPath(..), ROLayer, RWLayer, StreamContent(..), (//), (>./) , (>//), catalog, getDictionary, getKey, objectById, save ) import PDF.Output (render) import Text.Printf (printf) type CachedFonts = IdMap Object Font type FontCache m = StateT CachedFonts m data Page = Page { byteContents :: OrderedMap (Id Object) ByteString , resources :: Dictionary , source :: (Id Object) } loadByteContents :: ROLayer m => DirectObject -> m (OrderedMap (Id Object) ByteString) loadByteContents directObject = do objs <- sequence . build objectById $ objectIds directObject mapM (r Clear) objs where objectIds (Array l) = l >>= getReference objectIds dirObj = getReference dirObj getFontDictionary :: (Alternative m, ROLayer m) => Nav Object -> m Dictionary getFontDictionary pageObj = (pageObj >// PPath ["Resources", "Font"] >>= fmap value . getDictionary) <|> return Map.empty cache :: ROLayer m => (Id Object -> FontCache m Font) -> Id Object -> FontCache m Font cache loader objectId = gets (Id.lookup objectId) >>= maybe load return where load = do value <- loader objectId modify $ Id.insert objectId value return value loadFont :: (Alternative m, ROLayer m) => Id Object -> FontCache m Font loadFont objectId = lift $ objectById objectId >>= tryMappings where tryMappings object = (object >./ "ToUnicode" >>= r Clear >>= CMap.parse) <|> (object >./ "Encoding" >>= loadEncoding . value) <|> (throwError $ unknownFormat (show objectId) (show object)) unknownFormat = printf "Unknown font format for object #%s : %s" loadEncoding :: MonadError String m => Object -> m Font loadEncoding (Direct (NameObject (Name name))) = encoding name loadEncoding object = throwError $ printf "Encoding must be a name, not that : %s" $ show object loadResources :: (Alternative m, ROLayer m) => Dictionary -> FontCache m FontSet loadResources = foldM addFont Map.empty . Map.toList where addFont output (name, Reference (IndirectObjCoordinates {objectId})) = flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output getReference :: DirectObject -> [Id Object] getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId] getReference _ = [] loadPage :: (Alternative m, ROLayer m) => Id Object -> m Page loadPage source = do page <- objectById source byteContents <- loadByteContents . value =<< getKey "Contents" page resources <- getFontDictionary page return $ Page {byteContents, resources, source} pagesList :: (Alternative m, ROLayer m) => m [Id Object] pagesList = (catalog // PPath ["Root", "Pages", "Kids"] >>= getReferences . value) <|> return [] where getReferences (Direct (Array kids)) = return $ getReference =<< kids getReferences _ = throwError "Not a pages array" editPagesList :: RWLayer m => ([DirectObject] -> [DirectObject]) -> m () editPagesList f = do pages <- runRO (catalog // PPath ["Root", "Pages"]) kids <- runRO (pages >./ "Kids") count <- runRO (pages >./ "Count") (newSize, newKids) <- editKids (value kids) save $ kids {value = newKids} save $ count {value = Direct $ NumberObject newSize} where editKids (Direct (Array pageRefs)) = let result = f pageRefs in return (Number . fromIntegral $ length result, Direct $ Array result) editKids _ = throwError "Invalid format for Root.Pages.Kids (not an array)" updatePage :: RWLayer m => Page -> m () updatePage (Page {byteContents}) = sequence_ $ mapi updateByteContent byteContents where updateByteContent source byteContent = edit .at source .at Clear $ \_ -> return byteContent data Pages = Pages newtype PageNumber = P Int data Contents = Contents instance (Alternative m, MonadError String m) => Box m Pages Layer (Map Int Page) where r Pages = runReaderT (numbered <$> pagesList >>= mapM loadPage) where numbered :: [Id Object] -> Map Int (Id Object) numbered = Map.fromList . zip [1..] w Pages pages = execStateT $ do mapM_ updatePage pages setPagesList $ Map.elems (source <$> pages) where setPagesList = editPagesList . const . fmap (Reference . flip IndirectObjCoordinates 0) instance (Alternative m, MonadError String m) => Box m PageNumber Layer Page where r (P p) layer | p < 1 = throwError "Pages start at 1" | otherwise = runReaderT (drop (p - 1) <$> pagesList >>= firstPage) layer where firstPage = maybe (throwError "Page is out of bounds") loadPage . listToMaybe w (P p) page = execStateT $ do updatePage page editPagesList $ setPage (Reference $ IndirectObjCoordinates (source page) 0) where setPage ref l = take (p-1) l ++ ref : drop p l instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where r Contents = return . fmap Content.parse . byteContents w Contents contents page = return $ page {byteContents} where byteContents = toStrict . render LF <$> contents withFonts :: MonadError String m => (Layer -> FontCache (LayerReader m) a) -> Layer -> m a withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer withResources :: (Alternative m, MonadError String m) => (Page -> ReaderT FontSet m b) -> Page -> FontCache (LayerReader m) b withResources f p = loadResources (resources p) >>= lift . lift . runReaderT (f p)