From 379a8215504d69dc8b5de6fee26445668802ba99 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Wed, 15 May 2019 15:03:55 +0200 Subject: [PATCH] Fix bugs preventing the objects from loading --- src/PDF/Body.hs | 58 ++++++++++++++++++++++++++--------------------- src/PDF/Object.hs | 8 ++----- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/src/PDF/Body.hs b/src/PDF/Body.hs index c0a79b0..7e43763 100644 --- a/src/PDF/Body.hs +++ b/src/PDF/Body.hs @@ -4,11 +4,12 @@ module PDF.Body where import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack) import Data.Int (Int64) +import Data.Map ((!)) import qualified Data.Map as Map (insert, lookup) import PDF.Object ( Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..) , Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..) - , eol, eolCharset, dictionary, directObject, integer, line + , blank, eol, eolCharset, dictionary, directObject, integer, line ) import Text.Parsec @@ -55,17 +56,20 @@ getOffset objectId = do "obj " ++ show objectId ++ " is referenced but missing in XRef table" Just offset -> return offset +on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a +on parser input = do + originalInput <- getInput + setInput input >> parser <* setInput originalInput + loadNumber :: Int -> SParser Float loadNumber objectId = do offset <- getOffset objectId - state <- getState - let objectStart = BS.drop offset (input state) - case runParser object state ("object@" ++ show offset) objectStart of - Left _ -> fail $ "Could not parse length object@" ++ show offset - Right number@(Direct (Number n)) -> - addObject objectId number >> return n - Right obj -> fail $ - "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" + objectStart <- BS.drop offset . input <$> getState + indirectObjCoordinates `on` objectStart >> return () + objectValue <- (!objectId) . objects . content <$> getState + case objectValue of + Direct (Number n) -> return n + obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" invalidValue :: Object -> String invalidValue v = "Invalid value " ++ show v @@ -82,26 +86,28 @@ getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length" getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length" -object :: SParser Object -object = - Direct <$> directObject - <|> do - header <- dictionary - size <- getSize (Map.lookup "Length" header) - streamContent <- BS.pack <$> stream (truncate size) - return $ Stream {header, streamContent} +streamObject :: SParser Object +streamObject = try $ do + header <- dictionary <* blank + size <- getSize (Map.lookup "Length" header) + streamContent <- BS.pack <$> stream (truncate size) + return $ Stream {header, streamContent} where - stream size = line "stream" *> count size anyChar <* eol <* line "endstream" + stream size = line "stream" *> count size anyChar <* blank <* line "endstream" + +object :: SParser Object +object = streamObject <|> Direct <$> directObject + +indirectObjCoordinates :: SParser IndirectObjCoordinates +indirectObjCoordinates = do + objectId <- integer + coordinates <- IndirectObjCoordinates objectId <$> integer + objectValue <- line "obj" *> object <* blank <* line "endobj" + addObject objectId objectValue + return coordinates occurrence :: SParser Occurrence -occurrence = Comment <$> comment <|> indirectObj - where - indirectObj = do - objectId <- integer - coordinates <- IndirectObjCoordinates objectId <$> integer - objectValue <- line "obj" *> object <* eol <* line "endobj" - addObject objectId objectValue - return $ Indirect coordinates +occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates populate :: ByteString -> Content -> Content populate input initialContent = diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 2de386d..bc96161 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -10,6 +10,7 @@ module PDF.Object ( , XRefEntry(..) , XRefSection , XRefSubSection(..) + , blank , content , dictionary , directObject @@ -108,11 +109,6 @@ blank = skipMany whiteSpace delimiterCharset :: String delimiterCharset = "()<>[]{}/%" -{- -delimiter :: Parser u Char -delimiter = oneOf delimiterCharset --} - regular :: Parser u Char regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset @@ -145,7 +141,7 @@ stringObj = Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')') <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>') where - literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar + literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar matchingParenthesis = (++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)