182 lines
6.7 KiB
Haskell
Executable File
182 lines
6.7 KiB
Haskell
Executable File
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
module PDF.Pages (
|
|
Contents(..)
|
|
, FontCache
|
|
, Page(..)
|
|
, PageNumber(..)
|
|
, Pages(..)
|
|
, withFonts
|
|
, withResources
|
|
) where
|
|
|
|
import Control.Applicative (Alternative)
|
|
import Control.Applicative ((<|>))
|
|
import Control.Monad (foldM)
|
|
import Control.Monad.Except (MonadError(..))
|
|
import Control.Monad.Reader (ReaderT, runReaderT)
|
|
import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
|
|
import Control.Monad.Trans (lift)
|
|
import Data.ByteString (ByteString)
|
|
import Data.ByteString.Lazy (toStrict)
|
|
import Data.Id (Id(..), IdMap)
|
|
import qualified Data.Id as Id (empty, insert, lookup)
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map (empty, elems, fromList, insert, toList)
|
|
import Data.Maybe (listToMaybe)
|
|
import Data.OrderedMap (OrderedMap, build, mapi)
|
|
import PDF.Box (Box(..), at, edit, runRO)
|
|
import qualified PDF.CMap as CMap (parse)
|
|
import PDF.Content (Content(..))
|
|
import qualified PDF.Content as Content (parse)
|
|
import PDF.Encoding (encoding)
|
|
import PDF.EOL (Style(..))
|
|
import PDF.Font (Font, FontSet)
|
|
import PDF.Layer (Layer(..), LayerReader)
|
|
import PDF.Object (
|
|
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
|
, Name(..), Number(..), Object(..)
|
|
,)
|
|
import PDF.Object.Navigation (
|
|
Nav(..), PPath(..), ROLayer, RWLayer, StreamContent(..), (//), (>./)
|
|
, (>//), catalog, getDictionary, getKey, objectById, save
|
|
)
|
|
import PDF.Output (render)
|
|
import Text.Printf (printf)
|
|
|
|
type CachedFonts = IdMap Object Font
|
|
type FontCache m = StateT CachedFonts m
|
|
data Page = Page {
|
|
byteContents :: OrderedMap (Id Object) ByteString
|
|
, resources :: Dictionary
|
|
, source :: (Id Object)
|
|
}
|
|
|
|
loadByteContents :: ROLayer m => DirectObject -> m (OrderedMap (Id Object) ByteString)
|
|
loadByteContents directObject = do
|
|
objs <- sequence . build objectById $ objectIds directObject
|
|
mapM (r Clear) objs
|
|
where
|
|
objectIds (Array l) = l >>= getReference
|
|
objectIds dirObj = getReference dirObj
|
|
|
|
getFontDictionary :: (Alternative m, ROLayer m) => Nav Object -> m Dictionary
|
|
getFontDictionary pageObj =
|
|
(pageObj >// PPath ["Resources", "Font"] >>= fmap value . getDictionary)
|
|
<|> return Map.empty
|
|
|
|
cache :: ROLayer m => (Id Object -> FontCache m Font) -> Id Object -> FontCache m Font
|
|
cache loader objectId =
|
|
gets (Id.lookup objectId) >>= maybe load return
|
|
where
|
|
load = do
|
|
value <- loader objectId
|
|
modify $ Id.insert objectId value
|
|
return value
|
|
|
|
loadFont :: (Alternative m, ROLayer m) => Id Object -> FontCache m Font
|
|
loadFont objectId = lift $ objectById objectId >>= tryMappings
|
|
where
|
|
tryMappings object =
|
|
(object >./ "ToUnicode" >>= r Clear >>= CMap.parse)
|
|
<|> (object >./ "Encoding" >>= loadEncoding . value)
|
|
<|> (throwError $ unknownFormat (show objectId) (show object))
|
|
unknownFormat = printf "Unknown font format for object #%s : %s"
|
|
loadEncoding :: MonadError String m => Object -> m Font
|
|
loadEncoding (Direct (NameObject (Name name))) = encoding name
|
|
loadEncoding object =
|
|
throwError $ printf "Encoding must be a name, not that : %s" $ show object
|
|
|
|
loadResources :: (Alternative m, ROLayer m) => Dictionary -> FontCache m FontSet
|
|
loadResources = foldM addFont Map.empty . Map.toList
|
|
where
|
|
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
|
|
flip (Map.insert name) output <$> cache loadFont objectId
|
|
addFont output _ = return output
|
|
|
|
getReference :: DirectObject -> [Id Object]
|
|
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
|
|
getReference _ = []
|
|
|
|
loadPage :: (Alternative m, ROLayer m) => Id Object -> m Page
|
|
loadPage source = do
|
|
page <- objectById source
|
|
byteContents <- loadByteContents . value =<< getKey "Contents" page
|
|
resources <- getFontDictionary page
|
|
return $ Page {byteContents, resources, source}
|
|
|
|
pagesList :: (Alternative m, ROLayer m) => m [Id Object]
|
|
pagesList =
|
|
(catalog // PPath ["Root", "Pages", "Kids"] >>= getReferences . value)
|
|
<|> return []
|
|
where
|
|
getReferences (Direct (Array kids)) = return $ getReference =<< kids
|
|
getReferences _ = throwError "Not a pages array"
|
|
|
|
editPagesList :: RWLayer m => ([DirectObject] -> [DirectObject]) -> m ()
|
|
editPagesList f = do
|
|
pages <- runRO (catalog // PPath ["Root", "Pages"])
|
|
kids <- runRO (pages >./ "Kids")
|
|
count <- runRO (pages >./ "Count")
|
|
(newSize, newKids) <- editKids (value kids)
|
|
save $ kids {value = newKids}
|
|
save $ count {value = Direct $ NumberObject newSize}
|
|
where
|
|
editKids (Direct (Array pageRefs)) =
|
|
let result = f pageRefs in
|
|
return (Number . fromIntegral $ length result, Direct $ Array result)
|
|
editKids _ = throwError "Invalid format for Root.Pages.Kids (not an array)"
|
|
|
|
updatePage :: RWLayer m => Page -> m ()
|
|
updatePage (Page {byteContents}) = sequence_ $ mapi updateByteContent byteContents
|
|
where
|
|
updateByteContent source byteContent =
|
|
edit .at source .at Clear $ \_ -> return byteContent
|
|
|
|
data Pages = Pages
|
|
newtype PageNumber = P Int
|
|
data Contents = Contents
|
|
|
|
instance (Alternative m, MonadError String m) => Box m Pages Layer (Map Int Page) where
|
|
r Pages = runReaderT (numbered <$> pagesList >>= mapM loadPage)
|
|
where
|
|
numbered :: [Id Object] -> Map Int (Id Object)
|
|
numbered = Map.fromList . zip [1..]
|
|
|
|
w Pages pages = execStateT $ do
|
|
mapM_ updatePage pages
|
|
setPagesList $ Map.elems (source <$> pages)
|
|
where
|
|
setPagesList =
|
|
editPagesList . const . fmap (Reference . flip IndirectObjCoordinates 0)
|
|
|
|
instance (Alternative m, MonadError String m) => Box m PageNumber Layer Page where
|
|
r (P p) layer
|
|
| p < 1 = throwError "Pages start at 1"
|
|
| otherwise = runReaderT (drop (p - 1) <$> pagesList >>= firstPage) layer
|
|
where
|
|
firstPage =
|
|
maybe (throwError "Page is out of bounds") loadPage . listToMaybe
|
|
|
|
w (P p) page = execStateT $ do
|
|
updatePage page
|
|
editPagesList $ setPage (Reference $ IndirectObjCoordinates (source page) 0)
|
|
where
|
|
setPage ref l = take (p-1) l ++ ref : drop p l
|
|
|
|
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
|
|
r Contents = return . fmap Content.parse . byteContents
|
|
w Contents contents page = return $ page {byteContents}
|
|
where
|
|
byteContents = toStrict . render LF <$> contents
|
|
|
|
withFonts :: MonadError String m => (Layer -> FontCache (LayerReader m) a) -> Layer -> m a
|
|
withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer
|
|
|
|
withResources :: (Alternative m, MonadError String m) => (Page -> ReaderT FontSet m b) -> Page -> FontCache (LayerReader m) b
|
|
withResources f p =
|
|
loadResources (resources p) >>= lift . lift . runReaderT (f p)
|