Recognize openStream was just an implementation of r for the Box m () Object ByteString, and extend it implementing the w operation while we're at it

This commit is contained in:
Tissevert 2020-02-26 22:13:29 +01:00
parent f4df4aab22
commit 99014ff30d
3 changed files with 31 additions and 15 deletions

View file

@ -6,10 +6,11 @@ import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn) import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
import PDF (Document(..), parseDocument) import PDF (Document(..), parseDocument)
import PDF.Box (Box(..))
import PDF.Layer (Layer(..), unify) import PDF.Layer (Layer(..), unify)
import PDF.Object (Object(..)) import PDF.Object (Object(..))
import PDF.Object.Navigation ( import PDF.Object.Navigation (
Error(..), (//), objectById, openStream, origin Error(..), StreamContent(..), (//), objectById, origin
) )
import PDF.Output (ObjectId(..), Output) import PDF.Output (ObjectId(..), Output)
import qualified PDF.Output as Output (render) import qualified PDF.Output as Output (render)
@ -20,7 +21,7 @@ import Text.Read (readMaybe)
decodedStream :: Object -> Object decodedStream :: Object -> Object
decodedStream object = decodedStream object =
maybe object replaceContent $ openStream object maybe object replaceContent $ r StreamContent object
where where
replaceContent streamContent = object {streamContent} replaceContent streamContent = object {streamContent}

View file

@ -2,27 +2,30 @@
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object.Navigation ( module PDF.Object.Navigation (
Error(..) Error(..)
, StreamContent(..)
, (./) , (./)
, (//) , (//)
, (>./) , (>./)
, (>//) , (>//)
, getDictionary , getDictionary
, objectById , objectById
, openStream
, origin , origin
) where ) where
import Codec.Compression.Zlib (decompress) import Codec.Compression.Zlib (compress, decompress)
import Control.Applicative (Alternative(..)) import Control.Applicative (Alternative(..))
import Control.Monad (MonadPlus(..)) import Control.Monad (MonadPlus(..))
import Control.Monad.Reader (MonadReader(..)) import Control.Monad.Reader (MonadReader(..))
import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Fail (MonadFail(..))
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as Lazy (fromStrict, toStrict) import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (lookup) import qualified Data.Map as Map (lookup)
import PDF.Box (Box(..))
import PDF.Layer (Layer(..)) import PDF.Layer (Layer(..))
import PDF.Object ( import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..) Dictionary, DirectObject(..), IndirectObjCoordinates(..)
@ -81,10 +84,21 @@ objectById objectId = do
origin :: PDFContent m => m Object origin :: PDFContent m => m Object
origin = Direct . Dictionary . trailer . docStructure <$> ask origin = Direct . Dictionary . trailer . docStructure <$> ask
openStream :: MonadFail m => Object -> m ByteString data StreamContent = StreamContent
openStream (Stream {header, streamContent}) = return $
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
instance MonadFail m => Box m StreamContent Object ByteString where
r StreamContent (Stream {header, streamContent}) = return $
case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) -> onLazy decompress streamContent
_ -> streamContent
r StreamContent obj = expected "stream" obj
w StreamContent streamContent obj@(Stream {header}) = return $
case Map.lookup (Name "Filter") header of case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) -> Just (NameObject (Name "FlateDecode")) ->
Lazy.toStrict . decompress . Lazy.fromStrict $ streamContent obj {streamContent = onLazy compress streamContent}
_ -> streamContent _ -> obj {streamContent}
openStream obj = expected "stream" obj w _ _ obj = expected "stream" obj

View file

@ -13,6 +13,7 @@ import qualified Control.Monad.RWS as RWS (get)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.Text (Text) import Data.Text (Text)
import PDF.Box (Box(..))
import PDF.CMap (cMap) import PDF.CMap (cMap)
import qualified PDF.Content as Content (parse) import qualified PDF.Content as Content (parse)
import PDF.Content.Text (renderText) import PDF.Content.Text (renderText)
@ -24,7 +25,7 @@ import PDF.Object (
, Name(..), Object(..) , Name(..), Object(..)
,) ,)
import PDF.Object.Navigation ( import PDF.Object.Navigation (
Error(..), (//), (>./), (>//), getDictionary, objectById, openStream Error(..), StreamContent(..), (//), (>./), (>//), getDictionary, objectById
, origin , origin
) )
import PDF.Output (ObjectId(..)) import PDF.Output (ObjectId(..))
@ -56,7 +57,7 @@ loadFont :: ObjectId -> T Font
loadFont objectId = objectById objectId >>= tryMappings loadFont objectId = objectById objectId >>= tryMappings
where where
tryMappings object = tryMappings object =
(object >./ "ToUnicode" >>= openStream >>= cMap) (object >./ "ToUnicode" >>= r StreamContent >>= cMap)
<|> (object >./ "Encoding" >>= loadEncoding) <|> (object >./ "Encoding" >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show object)) <|> (fail $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s" unknownFormat = printf "Unknown font format for object #%s : %s"
@ -99,7 +100,7 @@ extractText pageObj = do
where where
loadContent :: FontSet -> Object -> T [Text] loadContent :: FontSet -> Object -> T [Text]
loadContent fonts object = loadContent fonts object =
openStream object r StreamContent object
>>= (either fail return . Content.parse) >>= (either fail return . Content.parse)
>>= renderText fonts >>= renderText fonts