2019-11-29 11:51:35 +01:00
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
2020-02-28 18:15:40 +01:00
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
2020-03-03 18:17:44 +01:00
|
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
2019-11-29 11:51:35 +01:00
|
|
|
module PDF.Pages (
|
2020-03-10 22:57:11 +01:00
|
|
|
Contents(..)
|
2020-03-11 18:55:18 +01:00
|
|
|
, FontCache
|
2020-03-10 22:57:11 +01:00
|
|
|
, Page(..)
|
2020-03-03 18:17:44 +01:00
|
|
|
, PageNumber(..)
|
2020-03-04 18:19:10 +01:00
|
|
|
, Pages(..)
|
2020-03-03 18:17:44 +01:00
|
|
|
, cacheFonts
|
2020-03-11 18:55:18 +01:00
|
|
|
, withResources
|
2019-11-29 11:51:35 +01:00
|
|
|
) where
|
|
|
|
|
2020-03-05 10:09:09 +01:00
|
|
|
import Control.Applicative (Alternative, (<|>))
|
2020-02-15 10:22:42 +01:00
|
|
|
import Control.Monad (foldM)
|
2020-02-11 08:29:08 +01:00
|
|
|
import Control.Monad.Fail (MonadFail(..))
|
2020-03-11 18:55:18 +01:00
|
|
|
import Control.Monad.Reader (ReaderT, runReaderT)
|
2020-03-14 16:57:16 +01:00
|
|
|
import Control.Monad.State (StateT(..), evalStateT, gets, modify)
|
|
|
|
import Control.Monad.Trans (lift)
|
2020-03-14 22:30:28 +01:00
|
|
|
import Data.Id (Id)
|
2020-03-14 16:57:16 +01:00
|
|
|
import Data.Map (Map)
|
2019-11-29 11:51:35 +01:00
|
|
|
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
2020-03-14 22:30:28 +01:00
|
|
|
import Data.OrderedMap (OrderedMap, build)
|
2020-02-26 22:13:29 +01:00
|
|
|
import PDF.Box (Box(..))
|
2020-02-08 08:15:32 +01:00
|
|
|
import PDF.CMap (cMap)
|
2020-03-14 16:57:16 +01:00
|
|
|
import PDF.Content (Content(..))
|
2020-02-10 10:54:44 +01:00
|
|
|
import qualified PDF.Content as Content (parse)
|
2020-02-08 08:15:32 +01:00
|
|
|
import PDF.Encoding (encoding)
|
|
|
|
import PDF.Font (Font, FontSet)
|
2020-02-17 15:29:59 +01:00
|
|
|
import PDF.Layer (Layer(..))
|
2019-11-29 11:51:35 +01:00
|
|
|
import PDF.Object (
|
2020-02-17 15:29:59 +01:00
|
|
|
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
2020-02-15 13:51:24 +01:00
|
|
|
, Name(..), Object(..)
|
2019-11-29 11:51:35 +01:00
|
|
|
,)
|
2020-02-11 17:59:15 +01:00
|
|
|
import PDF.Object.Navigation (
|
2020-03-08 22:18:47 +01:00
|
|
|
Clear(..), PDFContent, (//), (>./), (>//), getDictionary
|
|
|
|
, getKey, objectById, origin
|
2020-02-11 17:59:15 +01:00
|
|
|
)
|
2020-02-11 08:29:08 +01:00
|
|
|
import Prelude hiding (fail)
|
2019-11-29 11:51:35 +01:00
|
|
|
import Text.Printf (printf)
|
|
|
|
|
2020-03-14 16:57:16 +01:00
|
|
|
type Except m = (Alternative m, MonadFail m)
|
|
|
|
type InLayer m = ReaderT Layer m
|
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
type CachedFonts = Map (Id Object) Font
|
2020-03-14 16:57:16 +01:00
|
|
|
type FontCache m = StateT CachedFonts (InLayer m)
|
2019-11-29 11:51:35 +01:00
|
|
|
data Page = Page {
|
2020-03-14 22:30:28 +01:00
|
|
|
contents :: OrderedMap (Id Object) Content
|
2020-03-08 22:18:47 +01:00
|
|
|
, resources :: Dictionary
|
2020-03-14 22:30:28 +01:00
|
|
|
, source :: (Id Object)
|
2019-11-29 11:51:35 +01:00
|
|
|
}
|
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content)
|
2020-03-08 22:18:47 +01:00
|
|
|
loadContents directObject =
|
|
|
|
sequenceA . build loadContent $ objectIds directObject
|
|
|
|
where
|
2020-03-14 22:30:28 +01:00
|
|
|
loadContent :: Except m => (Id Object) -> InLayer m Content
|
2020-03-08 22:18:47 +01:00
|
|
|
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
|
|
|
|
objectIds (Array l) = l >>= getReference
|
|
|
|
objectIds dirObj = getReference dirObj
|
|
|
|
|
2020-03-14 16:57:16 +01:00
|
|
|
getFontDictionary :: Except m => Object -> InLayer m Dictionary
|
2020-02-15 13:51:24 +01:00
|
|
|
getFontDictionary pageObj =
|
2020-03-08 22:18:47 +01:00
|
|
|
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
|
2019-11-29 11:51:35 +01:00
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font
|
2019-11-29 11:51:35 +01:00
|
|
|
cache loader objectId =
|
2020-03-14 16:57:16 +01:00
|
|
|
gets (Map.lookup objectId) >>= maybe load return
|
2019-11-29 11:51:35 +01:00
|
|
|
where
|
|
|
|
load = do
|
|
|
|
value <- loader objectId
|
|
|
|
modify $ Map.insert objectId value
|
|
|
|
return value
|
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
loadFont :: Except m => (Id Object) -> FontCache m Font
|
2020-03-14 16:57:16 +01:00
|
|
|
loadFont objectId = lift $ objectById objectId >>= tryMappings
|
2020-02-08 08:15:32 +01:00
|
|
|
where
|
2020-02-15 13:51:24 +01:00
|
|
|
tryMappings object =
|
2020-03-04 18:31:30 +01:00
|
|
|
(object >./ "ToUnicode" >>= r Clear >>= cMap)
|
2020-02-15 13:51:24 +01:00
|
|
|
<|> (object >./ "Encoding" >>= loadEncoding)
|
|
|
|
<|> (fail $ unknownFormat (show objectId) (show object))
|
2020-02-08 08:15:32 +01:00
|
|
|
unknownFormat = printf "Unknown font format for object #%s : %s"
|
2020-03-03 18:17:44 +01:00
|
|
|
loadEncoding :: MonadFail m => Object -> m Font
|
2020-02-15 13:51:24 +01:00
|
|
|
loadEncoding (Direct (NameObject (Name name))) = encoding name
|
|
|
|
loadEncoding object =
|
|
|
|
fail $ printf "Encoding must be a name, not that : %s" $ show object
|
2020-02-08 08:15:32 +01:00
|
|
|
|
2020-03-14 16:57:16 +01:00
|
|
|
loadResources :: Except m => Dictionary -> FontCache m FontSet
|
2020-03-08 22:18:47 +01:00
|
|
|
loadResources = foldM addFont Map.empty . Map.toList
|
2019-11-29 11:51:35 +01:00
|
|
|
where
|
2020-02-08 08:15:32 +01:00
|
|
|
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
|
2019-11-29 11:51:35 +01:00
|
|
|
flip (Map.insert name) output <$> cache loadFont objectId
|
2020-02-08 08:15:32 +01:00
|
|
|
addFont output _ = return output
|
2019-11-29 11:51:35 +01:00
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
getReference :: DirectObject -> [(Id Object)]
|
2020-03-08 22:18:47 +01:00
|
|
|
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
|
|
|
|
getReference _ = []
|
2019-11-29 11:51:35 +01:00
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
loadPage :: Except m => (Id Object) -> InLayer m Page
|
2020-02-11 17:59:15 +01:00
|
|
|
loadPage source = do
|
2020-03-08 22:18:47 +01:00
|
|
|
page <- objectById source
|
|
|
|
contents <- getKey "Contents" page >>= loadContents
|
|
|
|
resources <- getFontDictionary page
|
|
|
|
return $ Page {contents, resources, source}
|
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
pagesList :: Except m => InLayer m [(Id Object)]
|
2020-03-08 22:18:47 +01:00
|
|
|
pagesList =
|
|
|
|
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
|
|
|
|
<|> return []
|
|
|
|
where
|
|
|
|
getReferences (Array kids) = kids >>= getReference
|
|
|
|
getReferences _ = fail "Not a pages array"
|
2019-11-29 11:51:35 +01:00
|
|
|
|
2020-02-28 18:15:40 +01:00
|
|
|
data Pages = Pages
|
2020-03-03 18:17:44 +01:00
|
|
|
newtype PageNumber = P Int
|
2020-03-08 22:18:47 +01:00
|
|
|
data Contents = Contents
|
2020-02-28 18:15:40 +01:00
|
|
|
|
2020-03-11 18:55:18 +01:00
|
|
|
instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where
|
2020-03-03 18:17:44 +01:00
|
|
|
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
|
|
|
|
where
|
|
|
|
numbered :: [Page] -> Map Int Page
|
|
|
|
numbered = Map.fromList . zip [1..]
|
2020-02-28 18:15:40 +01:00
|
|
|
|
2020-03-11 18:55:18 +01:00
|
|
|
instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
|
2020-03-05 10:09:09 +01:00
|
|
|
r (P i) layer
|
|
|
|
| i < 1 = fail "Pages start at 1"
|
|
|
|
| otherwise = runReaderT (drop (i - 1) <$> pagesList >>= firstPage) layer
|
|
|
|
where
|
|
|
|
firstPage [] = fail "Page is out of bounds"
|
|
|
|
firstPage (p:_) = loadPage p
|
2020-03-03 18:17:44 +01:00
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
|
2020-03-10 22:57:11 +01:00
|
|
|
r Contents = return . contents
|
|
|
|
|
2020-03-03 18:17:44 +01:00
|
|
|
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
|
|
|
|
cacheFonts = flip evalStateT Map.empty
|
2020-03-11 18:55:18 +01:00
|
|
|
|
2020-03-14 16:57:16 +01:00
|
|
|
withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b
|
2020-03-11 18:55:18 +01:00
|
|
|
withResources f p =
|
2020-03-14 16:57:16 +01:00
|
|
|
loadResources (resources p) >>= lift . lift . runReaderT (f p)
|