Hufflepdf/src/PDF/Pages.hs

159 lines
5.5 KiB
Haskell
Raw Normal View History

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Pages (
Contents(..)
, FontCache
, Page(..)
, PageNumber(..)
, Pages(..)
, withFonts
, withResources
) where
import Control.Applicative (Alternative, (<|>))
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (ReaderT, runReaderT)
2020-03-17 08:46:37 +01:00
import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
import Control.Monad.Trans (lift)
2020-03-17 08:46:37 +01:00
import Data.ByteString.Lazy (toStrict)
import Data.Id (Id, IdMap)
import qualified Data.Id as Id (empty, insert, lookup)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, toList)
2020-03-17 08:46:37 +01:00
import Data.OrderedMap (OrderedMap, build, mapi)
import PDF.Box (Box(..), at, edit)
import PDF.CMap (cMap)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Encoding (encoding)
2020-03-17 08:46:37 +01:00
import PDF.EOL (Style(..))
import PDF.Font (Font, FontSet)
2020-03-17 08:46:37 +01:00
import PDF.Layer (Layer(..), Objects(..))
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..)
,)
import PDF.Object.Navigation (
2020-03-17 08:46:37 +01:00
Clear(..), (//), (>./), (>//), getDictionary
, getKey, objectById, origin
)
2020-03-17 08:46:37 +01:00
import PDF.Output (render)
import Prelude hiding (fail)
import Text.Printf (printf)
type Except m = (Alternative m, MonadFail m)
type InLayer m = ReaderT Layer m
type CachedFonts = IdMap Object Font
type FontCache m = StateT CachedFonts (InLayer m)
data Page = Page {
contents :: OrderedMap (Id Object) Content
, resources :: Dictionary
, source :: (Id Object)
}
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content)
loadContents directObject =
sequenceA . build loadContent $ objectIds directObject
where
loadContent :: Except m => (Id Object) -> InLayer m Content
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
objectIds (Array l) = l >>= getReference
objectIds dirObj = getReference dirObj
getFontDictionary :: Except m => Object -> InLayer m Dictionary
getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font
cache loader objectId =
gets (Id.lookup objectId) >>= maybe load return
where
load = do
value <- loader objectId
modify $ Id.insert objectId value
return value
loadFont :: Except m => (Id Object) -> FontCache m Font
loadFont objectId = lift $ objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r Clear >>= cMap)
<|> (object >./ "Encoding" >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s"
loadEncoding :: MonadFail m => Object -> m Font
loadEncoding (Direct (NameObject (Name name))) = encoding name
loadEncoding object =
fail $ printf "Encoding must be a name, not that : %s" $ show object
loadResources :: Except m => Dictionary -> FontCache m FontSet
loadResources = foldM addFont Map.empty . Map.toList
where
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
getReference :: DirectObject -> [(Id Object)]
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
getReference _ = []
loadPage :: Except m => (Id Object) -> InLayer m Page
loadPage source = do
page <- objectById source
contents <- getKey "Contents" page >>= loadContents
resources <- getFontDictionary page
return $ Page {contents, resources, source}
pagesList :: Except m => InLayer m [(Id Object)]
pagesList =
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
<|> return []
where
getReferences (Array kids) = kids >>= getReference
getReferences _ = fail "Not a pages array"
2020-03-17 08:46:37 +01:00
updatePage :: MonadFail m => Page -> StateT Layer m ()
updatePage (Page {contents}) = sequence_ $ mapi updateContent contents
where
updateContent source content =
edit . at Objects . at source . at Clear $ setContent content
setContent content _ = return . toStrict $ render LF content
data Pages = Pages
newtype PageNumber = P Int
data Contents = Contents
instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
where
numbered :: [Page] -> Map Int Page
numbered = Map.fromList . zip [1..]
2020-03-17 08:46:37 +01:00
w Pages pages = execStateT $ mapM_ updatePage pages
instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
r (P i) layer
| i < 1 = fail "Pages start at 1"
| otherwise = runReaderT (drop (i - 1) <$> pagesList >>= firstPage) layer
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
2020-03-17 08:46:37 +01:00
w _ page = execStateT $ updatePage page
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
r Contents = return . contents
2020-03-17 08:46:37 +01:00
w _ contents page = return $ page {contents}
withFonts :: Monad m => (Layer -> FontCache m a) -> Layer -> m a
withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer
withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b
withResources f p =
loadResources (resources p) >>= lift . lift . runReaderT (f p)