Add a separate instance for Raw streams that don't try to decode them

This commit is contained in:
Tissevert 2020-03-04 18:31:30 +01:00
parent 309f6ed461
commit 2b9abc24b6
3 changed files with 21 additions and 14 deletions

View File

@ -10,7 +10,7 @@ import PDF.Box (Box(..))
import PDF.Layer (Layer(..), unify)
import PDF.Object (Object(..))
import PDF.Object.Navigation (
Error(..), StreamContent(..), (//), objectById, origin
Clear(..), Error(..), Raw(..), (//), objectById, origin
)
import PDF.Output (ObjectId(..), Output)
import qualified PDF.Output as Output (render)
@ -21,9 +21,7 @@ import Text.Read (readMaybe)
decodedStream :: Object -> Object
decodedStream object =
maybe object forceClear $ r StreamContent object
where
forceClear streamContent = object {streamContent}
maybe object id $ r Clear object >>= flip (w Raw) object
display :: Output a => ReaderT Layer Error a -> Document -> Either String ByteString
display getter (Document {eolStyle, layers}) =

View File

@ -7,7 +7,8 @@
module PDF.Object.Navigation (
Error(..)
, PDFContent
, StreamContent(..)
, Clear(..)
, Raw(..)
, (./)
, (//)
, (>./)
@ -88,21 +89,29 @@ castObject directObject = return $ Direct directObject
origin :: PDFContent m => m Object
origin = Direct . Dictionary . trailer . docStructure <$> ask
data StreamContent = StreamContent
data Clear = Clear
data Raw = Raw
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
instance MonadFail m => Box m StreamContent Object ByteString where
r StreamContent (Stream {header, streamContent}) = return $
instance MonadFail m => Box m Clear Object ByteString where
r Clear (Stream {header, streamContent}) = return $
case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) -> onLazy decompress streamContent
_ -> streamContent
r StreamContent obj = expected "stream" obj
r _ obj = expected "stream" obj
w StreamContent streamContent obj@(Stream {header}) = return $
w Clear streamContent obj@(Stream {header}) = return $
case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) ->
obj {streamContent = onLazy compress streamContent}
_ -> obj {streamContent}
w _ _ obj = expected "stream" obj
instance MonadFail m => Box m Raw Object ByteString where
r Raw (Stream {streamContent}) = return streamContent
r _ obj = expected "stream" obj
w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
w _ _ obj = expected "stream" obj

View File

@ -35,8 +35,8 @@ import PDF.Object (
, Name(..), Object(..)
,)
import PDF.Object.Navigation (
Error(..), PDFContent, StreamContent(..), (./), (//), (>./), (>//)
, castObject, getDictionary, objectById, origin
Clear(..), Error(..), PDFContent, (./), (//), (>./), (>//), castObject
, getDictionary, objectById, origin
)
import PDF.Output (ObjectId(..))
import Prelude hiding (fail)
@ -69,7 +69,7 @@ loadFont :: FontCache m => ObjectId -> m Font
loadFont objectId = objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r StreamContent >>= cMap)
(object >./ "ToUnicode" >>= r Clear >>= cMap)
<|> (object >./ "Encoding" >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s"
@ -110,7 +110,7 @@ extractText pageObj = do
concat <$> mapM (loadContent fonts) objects
where
loadContent fonts object =
r StreamContent object
r Clear object
>>= (either fail return . Content.parse)
>>= renderText fonts