Recognize openStream was just an implementation of r for the Box m () Object ByteString, and extend it implementing the w operation while we're at it
This commit is contained in:
parent
f4df4aab22
commit
99014ff30d
3 changed files with 31 additions and 15 deletions
|
@ -6,10 +6,11 @@ import Data.ByteString.Lazy.Char8 (ByteString)
|
|||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
|
||||
import PDF (Document(..), parseDocument)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Layer (Layer(..), unify)
|
||||
import PDF.Object (Object(..))
|
||||
import PDF.Object.Navigation (
|
||||
Error(..), (//), objectById, openStream, origin
|
||||
Error(..), StreamContent(..), (//), objectById, origin
|
||||
)
|
||||
import PDF.Output (ObjectId(..), Output)
|
||||
import qualified PDF.Output as Output (render)
|
||||
|
@ -20,7 +21,7 @@ import Text.Read (readMaybe)
|
|||
|
||||
decodedStream :: Object -> Object
|
||||
decodedStream object =
|
||||
maybe object replaceContent $ openStream object
|
||||
maybe object replaceContent $ r StreamContent object
|
||||
where
|
||||
replaceContent streamContent = object {streamContent}
|
||||
|
||||
|
|
|
@ -2,27 +2,30 @@
|
|||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
module PDF.Object.Navigation (
|
||||
Error(..)
|
||||
, StreamContent(..)
|
||||
, (./)
|
||||
, (//)
|
||||
, (>./)
|
||||
, (>//)
|
||||
, getDictionary
|
||||
, objectById
|
||||
, openStream
|
||||
, origin
|
||||
) where
|
||||
|
||||
import Codec.Compression.Zlib (decompress)
|
||||
import Codec.Compression.Zlib (compress, decompress)
|
||||
import Control.Applicative (Alternative(..))
|
||||
import Control.Monad (MonadPlus(..))
|
||||
import Control.Monad.Reader (MonadReader(..))
|
||||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString.Lazy as Lazy (fromStrict, toStrict)
|
||||
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (lookup)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Layer (Layer(..))
|
||||
import PDF.Object (
|
||||
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
||||
|
@ -81,10 +84,21 @@ objectById objectId = do
|
|||
origin :: PDFContent m => m Object
|
||||
origin = Direct . Dictionary . trailer . docStructure <$> ask
|
||||
|
||||
openStream :: MonadFail m => Object -> m ByteString
|
||||
openStream (Stream {header, streamContent}) = return $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just (NameObject (Name "FlateDecode")) ->
|
||||
Lazy.toStrict . decompress . Lazy.fromStrict $ streamContent
|
||||
_ -> streamContent
|
||||
openStream obj = expected "stream" obj
|
||||
data StreamContent = StreamContent
|
||||
|
||||
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
|
||||
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
|
||||
|
||||
instance MonadFail m => Box m StreamContent Object ByteString where
|
||||
r StreamContent (Stream {header, streamContent}) = return $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just (NameObject (Name "FlateDecode")) -> onLazy decompress streamContent
|
||||
_ -> streamContent
|
||||
r StreamContent obj = expected "stream" obj
|
||||
|
||||
w StreamContent streamContent obj@(Stream {header}) = return $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just (NameObject (Name "FlateDecode")) ->
|
||||
obj {streamContent = onLazy compress streamContent}
|
||||
_ -> obj {streamContent}
|
||||
w _ _ obj = expected "stream" obj
|
||||
|
|
|
@ -13,6 +13,7 @@ import qualified Control.Monad.RWS as RWS (get)
|
|||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
||||
import Data.Text (Text)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.CMap (cMap)
|
||||
import qualified PDF.Content as Content (parse)
|
||||
import PDF.Content.Text (renderText)
|
||||
|
@ -24,7 +25,7 @@ import PDF.Object (
|
|||
, Name(..), Object(..)
|
||||
,)
|
||||
import PDF.Object.Navigation (
|
||||
Error(..), (//), (>./), (>//), getDictionary, objectById, openStream
|
||||
Error(..), StreamContent(..), (//), (>./), (>//), getDictionary, objectById
|
||||
, origin
|
||||
)
|
||||
import PDF.Output (ObjectId(..))
|
||||
|
@ -56,7 +57,7 @@ loadFont :: ObjectId -> T Font
|
|||
loadFont objectId = objectById objectId >>= tryMappings
|
||||
where
|
||||
tryMappings object =
|
||||
(object >./ "ToUnicode" >>= openStream >>= cMap)
|
||||
(object >./ "ToUnicode" >>= r StreamContent >>= cMap)
|
||||
<|> (object >./ "Encoding" >>= loadEncoding)
|
||||
<|> (fail $ unknownFormat (show objectId) (show object))
|
||||
unknownFormat = printf "Unknown font format for object #%s : %s"
|
||||
|
@ -99,7 +100,7 @@ extractText pageObj = do
|
|||
where
|
||||
loadContent :: FontSet -> Object -> T [Text]
|
||||
loadContent fonts object =
|
||||
openStream object
|
||||
r StreamContent object
|
||||
>>= (either fail return . Content.parse)
|
||||
>>= renderText fonts
|
||||
|
||||
|
|
Loading…
Reference in a new issue