Recognize openStream was just an implementation of r for the Box m () Object ByteString, and extend it implementing the w operation while we're at it
This commit is contained in:
parent
f4df4aab22
commit
99014ff30d
3 changed files with 31 additions and 15 deletions
|
@ -6,10 +6,11 @@ import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
|
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
|
||||||
import PDF (Document(..), parseDocument)
|
import PDF (Document(..), parseDocument)
|
||||||
|
import PDF.Box (Box(..))
|
||||||
import PDF.Layer (Layer(..), unify)
|
import PDF.Layer (Layer(..), unify)
|
||||||
import PDF.Object (Object(..))
|
import PDF.Object (Object(..))
|
||||||
import PDF.Object.Navigation (
|
import PDF.Object.Navigation (
|
||||||
Error(..), (//), objectById, openStream, origin
|
Error(..), StreamContent(..), (//), objectById, origin
|
||||||
)
|
)
|
||||||
import PDF.Output (ObjectId(..), Output)
|
import PDF.Output (ObjectId(..), Output)
|
||||||
import qualified PDF.Output as Output (render)
|
import qualified PDF.Output as Output (render)
|
||||||
|
@ -20,7 +21,7 @@ import Text.Read (readMaybe)
|
||||||
|
|
||||||
decodedStream :: Object -> Object
|
decodedStream :: Object -> Object
|
||||||
decodedStream object =
|
decodedStream object =
|
||||||
maybe object replaceContent $ openStream object
|
maybe object replaceContent $ r StreamContent object
|
||||||
where
|
where
|
||||||
replaceContent streamContent = object {streamContent}
|
replaceContent streamContent = object {streamContent}
|
||||||
|
|
||||||
|
|
|
@ -2,27 +2,30 @@
|
||||||
{-# LANGUAGE ConstraintKinds #-}
|
{-# LANGUAGE ConstraintKinds #-}
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
module PDF.Object.Navigation (
|
module PDF.Object.Navigation (
|
||||||
Error(..)
|
Error(..)
|
||||||
|
, StreamContent(..)
|
||||||
, (./)
|
, (./)
|
||||||
, (//)
|
, (//)
|
||||||
, (>./)
|
, (>./)
|
||||||
, (>//)
|
, (>//)
|
||||||
, getDictionary
|
, getDictionary
|
||||||
, objectById
|
, objectById
|
||||||
, openStream
|
|
||||||
, origin
|
, origin
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Codec.Compression.Zlib (decompress)
|
import Codec.Compression.Zlib (compress, decompress)
|
||||||
import Control.Applicative (Alternative(..))
|
import Control.Applicative (Alternative(..))
|
||||||
import Control.Monad (MonadPlus(..))
|
import Control.Monad (MonadPlus(..))
|
||||||
import Control.Monad.Reader (MonadReader(..))
|
import Control.Monad.Reader (MonadReader(..))
|
||||||
import Control.Monad.Fail (MonadFail(..))
|
import Control.Monad.Fail (MonadFail(..))
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import qualified Data.ByteString.Lazy as Lazy (fromStrict, toStrict)
|
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
|
||||||
import Data.Map ((!))
|
import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (lookup)
|
import qualified Data.Map as Map (lookup)
|
||||||
|
import PDF.Box (Box(..))
|
||||||
import PDF.Layer (Layer(..))
|
import PDF.Layer (Layer(..))
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
||||||
|
@ -81,10 +84,21 @@ objectById objectId = do
|
||||||
origin :: PDFContent m => m Object
|
origin :: PDFContent m => m Object
|
||||||
origin = Direct . Dictionary . trailer . docStructure <$> ask
|
origin = Direct . Dictionary . trailer . docStructure <$> ask
|
||||||
|
|
||||||
openStream :: MonadFail m => Object -> m ByteString
|
data StreamContent = StreamContent
|
||||||
openStream (Stream {header, streamContent}) = return $
|
|
||||||
case Map.lookup (Name "Filter") header of
|
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
|
||||||
Just (NameObject (Name "FlateDecode")) ->
|
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
|
||||||
Lazy.toStrict . decompress . Lazy.fromStrict $ streamContent
|
|
||||||
_ -> streamContent
|
instance MonadFail m => Box m StreamContent Object ByteString where
|
||||||
openStream obj = expected "stream" obj
|
r StreamContent (Stream {header, streamContent}) = return $
|
||||||
|
case Map.lookup (Name "Filter") header of
|
||||||
|
Just (NameObject (Name "FlateDecode")) -> onLazy decompress streamContent
|
||||||
|
_ -> streamContent
|
||||||
|
r StreamContent obj = expected "stream" obj
|
||||||
|
|
||||||
|
w StreamContent streamContent obj@(Stream {header}) = return $
|
||||||
|
case Map.lookup (Name "Filter") header of
|
||||||
|
Just (NameObject (Name "FlateDecode")) ->
|
||||||
|
obj {streamContent = onLazy compress streamContent}
|
||||||
|
_ -> obj {streamContent}
|
||||||
|
w _ _ obj = expected "stream" obj
|
||||||
|
|
|
@ -13,6 +13,7 @@ import qualified Control.Monad.RWS as RWS (get)
|
||||||
import Data.Map (Map)
|
import Data.Map (Map)
|
||||||
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
import PDF.Box (Box(..))
|
||||||
import PDF.CMap (cMap)
|
import PDF.CMap (cMap)
|
||||||
import qualified PDF.Content as Content (parse)
|
import qualified PDF.Content as Content (parse)
|
||||||
import PDF.Content.Text (renderText)
|
import PDF.Content.Text (renderText)
|
||||||
|
@ -24,7 +25,7 @@ import PDF.Object (
|
||||||
, Name(..), Object(..)
|
, Name(..), Object(..)
|
||||||
,)
|
,)
|
||||||
import PDF.Object.Navigation (
|
import PDF.Object.Navigation (
|
||||||
Error(..), (//), (>./), (>//), getDictionary, objectById, openStream
|
Error(..), StreamContent(..), (//), (>./), (>//), getDictionary, objectById
|
||||||
, origin
|
, origin
|
||||||
)
|
)
|
||||||
import PDF.Output (ObjectId(..))
|
import PDF.Output (ObjectId(..))
|
||||||
|
@ -56,7 +57,7 @@ loadFont :: ObjectId -> T Font
|
||||||
loadFont objectId = objectById objectId >>= tryMappings
|
loadFont objectId = objectById objectId >>= tryMappings
|
||||||
where
|
where
|
||||||
tryMappings object =
|
tryMappings object =
|
||||||
(object >./ "ToUnicode" >>= openStream >>= cMap)
|
(object >./ "ToUnicode" >>= r StreamContent >>= cMap)
|
||||||
<|> (object >./ "Encoding" >>= loadEncoding)
|
<|> (object >./ "Encoding" >>= loadEncoding)
|
||||||
<|> (fail $ unknownFormat (show objectId) (show object))
|
<|> (fail $ unknownFormat (show objectId) (show object))
|
||||||
unknownFormat = printf "Unknown font format for object #%s : %s"
|
unknownFormat = printf "Unknown font format for object #%s : %s"
|
||||||
|
@ -99,7 +100,7 @@ extractText pageObj = do
|
||||||
where
|
where
|
||||||
loadContent :: FontSet -> Object -> T [Text]
|
loadContent :: FontSet -> Object -> T [Text]
|
||||||
loadContent fonts object =
|
loadContent fonts object =
|
||||||
openStream object
|
r StreamContent object
|
||||||
>>= (either fail return . Content.parse)
|
>>= (either fail return . Content.parse)
|
||||||
>>= renderText fonts
|
>>= renderText fonts
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue