Hufflepdf/src/PDF/Object/Navigation.hs

181 lines
6.3 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
module PDF.Object.Navigation (
Nav(..)
, PPath(..)
, ROLayer
, RWLayer
, StreamContent(..)
, (./)
, (//)
, (>./)
, (>//)
, castObject
, catalog
, getDictionary
, getKey
, objectById
, save
) where
import Codec.Compression.Zlib (compress, decompress)
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (MonadReader(..))
import Control.Monad.State (MonadState)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (length)
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
import Data.Id (Id)
import qualified Data.Id as Id (at)
import qualified Data.Map as Map (adjust, insert, lookup)
import PDF.Box (Box(..), at, edit{-, runRO-})
import PDF.Layer (Layer(..))
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Number(..), Object(..), Structure(..)
)
import Prelude hiding (fail)
import Text.Printf (printf)
newtype PPath = PPath [Component]
data DPath = DPath {
root :: Id Object
, offset :: [Component]
} deriving Show
push :: Component -> DPath -> DPath
push component dPath = dPath {offset = (offset dPath) ++ [component]}
data Nav a = Nav {
dPath :: DPath
, value :: a
} deriving (Functor)
instance Show a => Show (Nav a) where
show (Nav {dPath, value}) = "Nav {dPath = " ++ show dPath ++ ", value = " ++ show value ++ "}"
type ROLayer m = (MonadReader Layer m, MonadError String m)
type RWLayer m = (MonadState Layer m, MonadError String m)
type Component = String
getDictionary :: ROLayer m => Nav Object -> m (Nav Dictionary)
getDictionary (Nav {dPath, value}) =
case value of
(Direct (Dictionary aDict)) -> return $ Nav {dPath, value = aDict}
(Direct (Reference ref)) -> objectById (objectId ref) >>= getDictionary
(Stream {header}) -> return $ Nav {dPath, value = header}
obj -> expected "dictionary : " obj
expected :: (MonadError String m, Show a) => String -> a -> m b
expected name = throwError . printf "Not a %s: %s" name . show
getKey :: ROLayer m => String -> Nav Object -> m (Nav DirectObject)
getKey key navObject = getDictionary navObject >>= f
where
errorMessage =
printf "Key %s not found in object %s" key (show navObject)
f (Nav {dPath, value}) =
case Map.lookup (Name key) value of
Nothing -> throwError errorMessage
Just dObj -> return $ Nav {dPath = push key dPath, value = dObj}
objectById :: ROLayer m => (Id Object) -> m (Nav Object)
objectById objectId = do
layer <- ask
return $ Nav {
dPath = DPath {root = objectId, offset = []}
, value = objects layer `Id.at` objectId
}
castObject :: ROLayer m => Nav DirectObject -> m (Nav Object)
castObject (Nav {value = !(Reference (IndirectObjCoordinates {objectId}))}) =
objectById objectId
castObject (Nav {dPath, value}) = return $ Nav {dPath, value = Direct value}
(./) :: ROLayer m => m (Nav Object) -> Component -> m (Nav Object)
(./) navObject key = (navObject >>= getKey key >>= castObject)
(//) :: ROLayer m => m (Nav Object) -> PPath -> m (Nav Object)
(//) navObject (PPath []) = navObject
(//) navObject (PPath (key:keys)) = navObject ./ key // (PPath keys)
(>./) :: ROLayer m => Nav Object -> Component -> m (Nav Object)
(>./) navObject = (return navObject ./)
(>//) :: ROLayer m => Nav Object -> PPath -> m (Nav Object)
(>//) navObject = (return navObject //)
catalog :: ROLayer m => m (Nav Object)
catalog = do
value <- Direct . Dictionary . trailer . docStructure <$> ask
return $ Nav {dPath = undefined, value}
setAt :: [Component] -> DirectObject -> Dictionary -> Dictionary
setAt [] _ dict = dict
setAt [component] directObject dict =
Map.insert (Name component) directObject dict
setAt (component:components) directObject dict =
Map.adjust setDirObj (Name component) dict
where
setDirObj (Dictionary subDict) =
Dictionary $ setAt components directObject subDict
setDirObj x = x
save :: RWLayer m => Nav Object -> m ()
save nav@(Nav {dPath, value = Direct dObj}) =
edit .at (root dPath) $ return . setObj
where
setObj obj@(Stream {header}) =
obj {header = setAt (offset dPath) dObj header}
setObj (Direct (Dictionary dict)) =
Direct . Dictionary $ setAt (offset dPath) dObj dict
setObj _ = value nav
save (Nav {dPath = DPath {root, offset = []}, value}) = edit $ w root value
save _ = throwError "Streams can't be properties of PDF objects"
data StreamContent = Clear | Raw
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
contains :: String -> DirectObject -> Bool
contains needle !(NameObject (Name n)) = needle == n
contains needle !(Array directObjects) = oneOf directObjects (contains needle)
where
oneOf [] _ = False
oneOf (x:xs) p = p x || oneOf xs p
contains _ _ = False
instance MonadError String m => Box m StreamContent (Nav Object) ByteString where
r sc = r sc . value
w sc newStreamContent nav = setValue <$> w sc newStreamContent (value nav)
where
setValue value = nav {value}
instance MonadError String m => Box m StreamContent Object ByteString where
r Raw (Stream {streamContent}) = return streamContent
r Clear (Stream {header, streamContent}) = return $
case Map.lookup (Name "Filter") header of
Just directObject
| contains "FlateDecode" directObject -> onLazy decompress streamContent
_ -> streamContent
r _ obj = expected "stream" obj
w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
w Clear newStreamContent (Stream {header}) =
let streamContent = getStreamContent (Map.lookup (Name "Filter") header) in
return $ Stream {header = fixLength streamContent, streamContent}
where
getStreamContent (Just directObject)
| contains "FlateDecode" directObject = onLazy compress newStreamContent
getStreamContent _ = newStreamContent
fixLength sc =
let newLength = NumberObject . Number . fromIntegral $ BS.length sc in
Map.insert (Name "Length") newLength header
w _ _ obj = expected "stream" obj