Hufflepdf/src/PDF/Pages.hs

182 lines
6.7 KiB
Haskell
Executable File

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Pages (
Contents(..)
, FontCache
, Page(..)
, PageNumber(..)
, Pages(..)
, withFonts
, withResources
) where
import Control.Applicative (Alternative)
import Control.Applicative ((<|>))
import Control.Monad (foldM)
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
import Control.Monad.Trans (lift)
import Data.ByteString (ByteString)
import Data.ByteString.Lazy (toStrict)
import Data.Id (Id(..), IdMap)
import qualified Data.Id as Id (empty, insert, lookup)
import Data.Map (Map)
import qualified Data.Map as Map (empty, elems, fromList, insert, toList)
import Data.Maybe (listToMaybe)
import Data.OrderedMap (OrderedMap, build, mapi)
import PDF.Box (Box(..), at, edit, runRO)
import qualified PDF.CMap as CMap (parse)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Encoding (encoding)
import PDF.EOL (Style(..))
import PDF.Font (Font, FontSet)
import PDF.Layer (Layer(..), LayerReader)
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Number(..), Object(..)
,)
import PDF.Object.Navigation (
Nav(..), PPath(..), ROLayer, RWLayer, StreamContent(..), (//), (>./)
, (>//), catalog, getDictionary, getKey, objectById, save
)
import PDF.Output (render)
import Text.Printf (printf)
type CachedFonts = IdMap Object Font
type FontCache m = StateT CachedFonts m
data Page = Page {
byteContents :: OrderedMap (Id Object) ByteString
, resources :: Dictionary
, source :: (Id Object)
}
loadByteContents :: ROLayer m => DirectObject -> m (OrderedMap (Id Object) ByteString)
loadByteContents directObject = do
objs <- sequence . build objectById $ objectIds directObject
mapM (r Clear) objs
where
objectIds (Array l) = l >>= getReference
objectIds dirObj = getReference dirObj
getFontDictionary :: (Alternative m, ROLayer m) => Nav Object -> m Dictionary
getFontDictionary pageObj =
(pageObj >// PPath ["Resources", "Font"] >>= fmap value . getDictionary)
<|> return Map.empty
cache :: ROLayer m => (Id Object -> FontCache m Font) -> Id Object -> FontCache m Font
cache loader objectId =
gets (Id.lookup objectId) >>= maybe load return
where
load = do
value <- loader objectId
modify $ Id.insert objectId value
return value
loadFont :: (Alternative m, ROLayer m) => Id Object -> FontCache m Font
loadFont objectId = lift $ objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r Clear >>= CMap.parse)
<|> (object >./ "Encoding" >>= loadEncoding . value)
<|> (throwError $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s"
loadEncoding :: MonadError String m => Object -> m Font
loadEncoding (Direct (NameObject (Name name))) = encoding name
loadEncoding object =
throwError $ printf "Encoding must be a name, not that : %s" $ show object
loadResources :: (Alternative m, ROLayer m) => Dictionary -> FontCache m FontSet
loadResources = foldM addFont Map.empty . Map.toList
where
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
getReference :: DirectObject -> [Id Object]
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
getReference _ = []
loadPage :: (Alternative m, ROLayer m) => Id Object -> m Page
loadPage source = do
page <- objectById source
byteContents <- loadByteContents . value =<< getKey "Contents" page
resources <- getFontDictionary page
return $ Page {byteContents, resources, source}
pagesList :: (Alternative m, ROLayer m) => m [Id Object]
pagesList =
(catalog // PPath ["Root", "Pages", "Kids"] >>= getReferences . value)
<|> return []
where
getReferences (Direct (Array kids)) = return $ getReference =<< kids
getReferences _ = throwError "Not a pages array"
editPagesList :: RWLayer m => ([DirectObject] -> [DirectObject]) -> m ()
editPagesList f = do
pages <- runRO (catalog // PPath ["Root", "Pages"])
kids <- runRO (pages >./ "Kids")
count <- runRO (pages >./ "Count")
(newSize, newKids) <- editKids (value kids)
save $ kids {value = newKids}
save $ count {value = Direct $ NumberObject newSize}
where
editKids (Direct (Array pageRefs)) =
let result = f pageRefs in
return (Number . fromIntegral $ length result, Direct $ Array result)
editKids _ = throwError "Invalid format for Root.Pages.Kids (not an array)"
updatePage :: RWLayer m => Page -> m ()
updatePage (Page {byteContents}) = sequence_ $ mapi updateByteContent byteContents
where
updateByteContent source byteContent =
edit .at source .at Clear $ \_ -> return byteContent
data Pages = Pages
newtype PageNumber = P Int
data Contents = Contents
instance (Alternative m, MonadError String m) => Box m Pages Layer (Map Int Page) where
r Pages = runReaderT (numbered <$> pagesList >>= mapM loadPage)
where
numbered :: [Id Object] -> Map Int (Id Object)
numbered = Map.fromList . zip [1..]
w Pages pages = execStateT $ do
mapM_ updatePage pages
setPagesList $ Map.elems (source <$> pages)
where
setPagesList =
editPagesList . const . fmap (Reference . flip IndirectObjCoordinates 0)
instance (Alternative m, MonadError String m) => Box m PageNumber Layer Page where
r (P p) layer
| p < 1 = throwError "Pages start at 1"
| otherwise = runReaderT (drop (p - 1) <$> pagesList >>= firstPage) layer
where
firstPage =
maybe (throwError "Page is out of bounds") loadPage . listToMaybe
w (P p) page = execStateT $ do
updatePage page
editPagesList $ setPage (Reference $ IndirectObjCoordinates (source page) 0)
where
setPage ref l = take (p-1) l ++ ref : drop p l
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
r Contents = return . fmap Content.parse . byteContents
w Contents contents page = return $ page {byteContents}
where
byteContents = toStrict . render LF <$> contents
withFonts :: MonadError String m => (Layer -> FontCache (LayerReader m) a) -> Layer -> m a
withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer
withResources :: (Alternative m, MonadError String m) => (Page -> ReaderT FontSet m b) -> Page -> FontCache (LayerReader m) b
withResources f p =
loadResources (resources p) >>= lift . lift . runReaderT (f p)