Hufflepdf/src/PDF/Pages.hs

158 lines
5.4 KiB
Haskell
Executable File

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Pages (
Contents(..)
, FontCache
, Page(..)
, PageNumber(..)
, Pages(..)
, cacheFonts
, withResources
) where
import Control.Applicative (Alternative, (<|>))
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
import Control.Monad.Trans (lift)
import Data.ByteString.Lazy (toStrict)
import Data.Id (Id)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.OrderedMap (OrderedMap, build, mapi)
import PDF.Box (Box(..), at, edit)
import PDF.CMap (cMap)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Encoding (encoding)
import PDF.EOL (Style(..))
import PDF.Font (Font, FontSet)
import PDF.Layer (Layer(..), Objects(..))
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..)
,)
import PDF.Object.Navigation (
Clear(..), (//), (>./), (>//), getDictionary
, getKey, objectById, origin
)
import PDF.Output (render)
import Prelude hiding (fail)
import Text.Printf (printf)
type Except m = (Alternative m, MonadFail m)
type InLayer m = ReaderT Layer m
type CachedFonts = Map (Id Object) Font
type FontCache m = StateT CachedFonts (InLayer m)
data Page = Page {
contents :: OrderedMap (Id Object) Content
, resources :: Dictionary
, source :: (Id Object)
}
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content)
loadContents directObject =
sequenceA . build loadContent $ objectIds directObject
where
loadContent :: Except m => (Id Object) -> InLayer m Content
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
objectIds (Array l) = l >>= getReference
objectIds dirObj = getReference dirObj
getFontDictionary :: Except m => Object -> InLayer m Dictionary
getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font
cache loader objectId =
gets (Map.lookup objectId) >>= maybe load return
where
load = do
value <- loader objectId
modify $ Map.insert objectId value
return value
loadFont :: Except m => (Id Object) -> FontCache m Font
loadFont objectId = lift $ objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r Clear >>= cMap)
<|> (object >./ "Encoding" >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s"
loadEncoding :: MonadFail m => Object -> m Font
loadEncoding (Direct (NameObject (Name name))) = encoding name
loadEncoding object =
fail $ printf "Encoding must be a name, not that : %s" $ show object
loadResources :: Except m => Dictionary -> FontCache m FontSet
loadResources = foldM addFont Map.empty . Map.toList
where
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
getReference :: DirectObject -> [(Id Object)]
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
getReference _ = []
loadPage :: Except m => (Id Object) -> InLayer m Page
loadPage source = do
page <- objectById source
contents <- getKey "Contents" page >>= loadContents
resources <- getFontDictionary page
return $ Page {contents, resources, source}
pagesList :: Except m => InLayer m [(Id Object)]
pagesList =
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
<|> return []
where
getReferences (Array kids) = kids >>= getReference
getReferences _ = fail "Not a pages array"
updatePage :: MonadFail m => Page -> StateT Layer m ()
updatePage (Page {contents}) = sequence_ $ mapi updateContent contents
where
updateContent source content =
edit . at Objects . at source . at Clear $ setContent content
setContent content _ = return . toStrict $ render LF content
data Pages = Pages
newtype PageNumber = P Int
data Contents = Contents
instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
where
numbered :: [Page] -> Map Int Page
numbered = Map.fromList . zip [1..]
w Pages pages = execStateT $ mapM_ updatePage pages
instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
r (P i) layer
| i < 1 = fail "Pages start at 1"
| otherwise = runReaderT (drop (i - 1) <$> pagesList >>= firstPage) layer
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
w _ page = execStateT $ updatePage page
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
r Contents = return . contents
w _ contents page = return $ page {contents}
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
cacheFonts = flip evalStateT Map.empty
withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b
withResources f p =
loadResources (resources p) >>= lift . lift . runReaderT (f p)