2020-02-11 08:29:08 +01:00
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
|
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
2020-02-26 22:13:29 +01:00
|
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
2020-02-11 08:29:08 +01:00
|
|
|
module PDF.Object.Navigation (
|
2020-03-14 16:55:05 +01:00
|
|
|
Clear(..)
|
2020-03-03 18:16:12 +01:00
|
|
|
, PDFContent
|
2020-03-04 18:31:30 +01:00
|
|
|
, Raw(..)
|
2020-02-15 13:51:24 +01:00
|
|
|
, (./)
|
2020-02-15 10:22:42 +01:00
|
|
|
, (//)
|
2020-02-15 13:51:24 +01:00
|
|
|
, (>./)
|
|
|
|
, (>//)
|
2020-03-04 18:14:33 +01:00
|
|
|
, castObject
|
2020-02-11 08:29:08 +01:00
|
|
|
, getDictionary
|
2020-03-08 22:18:47 +01:00
|
|
|
, getKey
|
2020-02-11 17:35:35 +01:00
|
|
|
, objectById
|
2020-02-11 08:29:08 +01:00
|
|
|
, origin
|
|
|
|
) where
|
|
|
|
|
2020-02-26 22:13:29 +01:00
|
|
|
import Codec.Compression.Zlib (compress, decompress)
|
2020-02-11 08:29:08 +01:00
|
|
|
import Control.Monad.Reader (MonadReader(..))
|
|
|
|
import Control.Monad.Fail (MonadFail(..))
|
|
|
|
import Data.ByteString (ByteString)
|
2020-02-26 22:13:29 +01:00
|
|
|
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
|
2020-03-15 15:13:00 +01:00
|
|
|
import Data.Id (Id, at)
|
2020-02-11 08:29:08 +01:00
|
|
|
import qualified Data.Map as Map (lookup)
|
2020-02-26 22:13:29 +01:00
|
|
|
import PDF.Box (Box(..))
|
2020-02-17 15:29:59 +01:00
|
|
|
import PDF.Layer (Layer(..))
|
2020-02-11 08:29:08 +01:00
|
|
|
import PDF.Object (
|
2020-02-17 15:29:59 +01:00
|
|
|
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
2020-02-11 08:29:08 +01:00
|
|
|
, Name(..), Object(..), Structure(..)
|
|
|
|
)
|
|
|
|
import Prelude hiding (fail)
|
|
|
|
import Text.Printf (printf)
|
|
|
|
|
2020-03-14 16:55:05 +01:00
|
|
|
type PDFContent m = (MonadReader Layer m, MonadFail m)
|
2020-02-28 18:14:27 +01:00
|
|
|
type Component = String
|
2020-02-11 08:29:08 +01:00
|
|
|
|
2020-02-15 13:51:24 +01:00
|
|
|
getDictionary :: PDFContent m => Object -> m Dictionary
|
|
|
|
getDictionary (Direct (Dictionary aDict)) = return aDict
|
|
|
|
getDictionary (Direct (Reference (IndirectObjCoordinates {objectId}))) =
|
|
|
|
objectById objectId >>= getDictionary
|
|
|
|
getDictionary (Stream {header}) = return header
|
|
|
|
getDictionary obj = expected "dictionary : " obj
|
2020-02-11 17:35:35 +01:00
|
|
|
|
|
|
|
expected :: (MonadFail m, Show a) => String -> a -> m b
|
2020-02-11 08:29:08 +01:00
|
|
|
expected name = fail . printf "Not a %s: %s" name . show
|
|
|
|
|
2020-03-08 22:18:47 +01:00
|
|
|
getKey :: PDFContent m => String -> Object -> m DirectObject
|
|
|
|
getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
|
2020-02-11 08:29:08 +01:00
|
|
|
where
|
|
|
|
errorMessage =
|
2020-03-11 10:47:52 +01:00
|
|
|
printf "Key %s not found in object %s" key (show object)
|
2020-03-08 22:18:47 +01:00
|
|
|
catchMaybe = maybe (fail errorMessage) return
|
2020-02-11 08:29:08 +01:00
|
|
|
|
2020-03-14 22:30:28 +01:00
|
|
|
objectById :: PDFContent m => (Id Object) -> m Object
|
2020-02-11 17:35:35 +01:00
|
|
|
objectById objectId = do
|
2020-02-23 22:15:52 +01:00
|
|
|
layer <- ask
|
2020-03-15 15:13:00 +01:00
|
|
|
return (objects layer `at` objectId)
|
2020-02-11 08:29:08 +01:00
|
|
|
|
2020-02-28 18:14:27 +01:00
|
|
|
(./) :: PDFContent m => m Object -> Component -> m Object
|
2020-03-08 22:18:47 +01:00
|
|
|
(./) object key = (object >>= getKey key >>= castObject)
|
2020-03-04 18:14:33 +01:00
|
|
|
|
|
|
|
castObject :: PDFContent m => DirectObject -> m Object
|
|
|
|
castObject (Reference (IndirectObjCoordinates {objectId})) =
|
|
|
|
objectById objectId
|
|
|
|
castObject directObject = return $ Direct directObject
|
2020-02-15 13:51:24 +01:00
|
|
|
|
2020-02-28 18:14:27 +01:00
|
|
|
(//) :: PDFContent m => m Object -> [Component] -> m Object
|
2020-02-15 13:51:24 +01:00
|
|
|
(//) object [] = object
|
|
|
|
(//) object (key:keys) = object ./ key // keys
|
|
|
|
|
2020-02-28 18:14:27 +01:00
|
|
|
(>./) :: PDFContent m => Object -> Component -> m Object
|
2020-02-15 13:51:24 +01:00
|
|
|
(>./) object = (return object ./)
|
2020-02-11 08:29:08 +01:00
|
|
|
|
2020-02-28 18:14:27 +01:00
|
|
|
(>//) :: PDFContent m => Object -> [Component] -> m Object
|
2020-02-15 13:51:24 +01:00
|
|
|
(>//) object = (return object //)
|
2020-02-11 08:29:08 +01:00
|
|
|
|
2020-02-15 13:51:24 +01:00
|
|
|
origin :: PDFContent m => m Object
|
|
|
|
origin = Direct . Dictionary . trailer . docStructure <$> ask
|
2020-02-11 08:29:08 +01:00
|
|
|
|
2020-03-04 18:31:30 +01:00
|
|
|
data Clear = Clear
|
|
|
|
data Raw = Raw
|
2020-02-26 22:13:29 +01:00
|
|
|
|
|
|
|
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
|
|
|
|
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
|
|
|
|
|
2020-03-11 10:47:52 +01:00
|
|
|
contains :: String -> DirectObject -> Bool
|
|
|
|
contains needle (NameObject (Name n)) = needle == n
|
|
|
|
contains needle (Array directObjects) = oneOf directObjects (contains needle)
|
|
|
|
where
|
|
|
|
oneOf [] _ = False
|
|
|
|
oneOf (x:xs) p = p x || oneOf xs p
|
|
|
|
contains _ _ = False
|
|
|
|
|
2020-03-04 18:31:30 +01:00
|
|
|
instance MonadFail m => Box m Clear Object ByteString where
|
|
|
|
r Clear (Stream {header, streamContent}) = return $
|
2020-02-26 22:13:29 +01:00
|
|
|
case Map.lookup (Name "Filter") header of
|
2020-03-11 10:47:52 +01:00
|
|
|
Just directObject
|
|
|
|
| contains "FlateDecode" directObject -> onLazy decompress streamContent
|
2020-02-26 22:13:29 +01:00
|
|
|
_ -> streamContent
|
2020-03-04 18:31:30 +01:00
|
|
|
r _ obj = expected "stream" obj
|
2020-02-26 22:13:29 +01:00
|
|
|
|
2020-03-04 18:31:30 +01:00
|
|
|
w Clear streamContent obj@(Stream {header}) = return $
|
2020-02-26 22:13:29 +01:00
|
|
|
case Map.lookup (Name "Filter") header of
|
2020-03-11 10:47:52 +01:00
|
|
|
Just directObject
|
|
|
|
| contains "FlateDecode" directObject ->
|
|
|
|
obj {streamContent = onLazy compress streamContent}
|
2020-02-26 22:13:29 +01:00
|
|
|
_ -> obj {streamContent}
|
|
|
|
w _ _ obj = expected "stream" obj
|
2020-03-04 18:31:30 +01:00
|
|
|
|
|
|
|
instance MonadFail m => Box m Raw Object ByteString where
|
|
|
|
r Raw (Stream {streamContent}) = return streamContent
|
|
|
|
r _ obj = expected "stream" obj
|
|
|
|
|
|
|
|
w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
|
|
|
|
w _ _ obj = expected "stream" obj
|