Hufflepdf/src/PDF/Pages.hs

135 lines
4.5 KiB
Haskell
Executable File

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
module PDF.Pages (
Page(..)
, PageNumber(..)
, Pages(..)
, cacheFonts
) where
import Control.Applicative (Alternative, (<|>))
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (runReaderT)
import Control.Monad.State (MonadState, StateT(..), evalStateT, modify)
import qualified Control.Monad.RWS as RWS (get)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.Text (Text)
import PDF.Box (Box(..))
import PDF.CMap (cMap)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Content.Text (renderText)
import PDF.Encoding (encoding)
import PDF.Font (Font, FontSet)
import PDF.Layer (Layer(..))
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..)
,)
import PDF.Object.Navigation (
Clear(..), PDFContent, (//), (>./), (>//), castObject, getDictionary
, objectById, origin
)
import PDF.Output (ObjectId(..))
import Prelude hiding (fail)
import Text.Printf (printf)
type CachedFonts = Map ObjectId Font
type FontCache m = (MonadState CachedFonts m, PDFContent m)
data Page = Page {
contents :: [Content]
, source :: ObjectId
}
{-
getFontDictionary :: PDFContent m => Object -> m Dictionary
getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary)
<|> return Map.empty
cache :: FontCache m => (ObjectId -> m Font) -> ObjectId -> m Font
cache loader objectId =
(maybe load return . Map.lookup objectId) =<< RWS.get
where
load = do
value <- loader objectId
modify $ Map.insert objectId value
return value
loadFont :: FontCache m => ObjectId -> m Font
loadFont objectId = objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r Clear >>= cMap)
<|> (object >./ "Encoding" >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s"
loadEncoding :: MonadFail m => Object -> m Font
loadEncoding (Direct (NameObject (Name name))) = encoding name
loadEncoding object =
fail $ printf "Encoding must be a name, not that : %s" $ show object
loadFonts :: FontCache m => Dictionary -> m FontSet
loadFonts = foldM addFont Map.empty . Map.toList
where
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
-}
pagesList :: PDFContent m => m [ObjectId]
pagesList = do
pages <- origin // ["Root", "Pages"] >>= getDictionary
case Map.lookup (Name "Kids") pages of
Just (Array kids) -> return $ getReferences kids
_ -> return []
getReferences :: [DirectObject] -> [ObjectId]
getReferences objects = do
object <- objects
case object of
Reference (IndirectObjCoordinates {objectId}) -> [objectId]
_ -> []
extractContent :: FontCache m => Object -> m [Content]
extractContent pageObj = do
--fonts <- loadFonts =<< getFontDictionary pageObj
pageObj >./ "Contents" >>= several >>= mapM loadContent
where
loadContent object = r Clear object >>= (either fail return . Content.parse)
several (Direct (Array l)) = mapM castObject l
several object = return [object]
-- >>= renderText fonts
loadPage :: FontCache m => ObjectId -> m Page
loadPage source = do
contents <- extractContent =<< objectById source
return $ Page {contents, source}
data Pages = Pages
newtype PageNumber = P Int
instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m Pages Layer (Map Int Page) where
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
where
numbered :: [Page] -> Map Int Page
numbered = Map.fromList . zip [1..]
instance (MonadFail m, Alternative m, MonadState CachedFonts m) => Box m PageNumber Layer Page where
r (P i) layer
| i < 1 = fail "Pages start at 1"
| otherwise = runReaderT (drop (i - 1) <$> pagesList >>= firstPage) layer
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
cacheFonts :: Monad m => StateT CachedFonts m a -> m a
cacheFonts = flip evalStateT Map.empty