Fix bugs preventing the objects from loading

This commit is contained in:
Tissevert 2019-05-15 15:03:55 +02:00
parent 44508a204c
commit 379a821550
2 changed files with 34 additions and 32 deletions

View file

@ -4,11 +4,12 @@ module PDF.Body where
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack) import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup) import qualified Data.Map as Map (insert, lookup)
import PDF.Object ( import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..) Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..) , Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
, eol, eolCharset, dictionary, directObject, integer, line , blank, eol, eolCharset, dictionary, directObject, integer, line
) )
import Text.Parsec import Text.Parsec
@ -55,17 +56,20 @@ getOffset objectId = do
"obj " ++ show objectId ++ " is referenced but missing in XRef table" "obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset Just offset -> return offset
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
on parser input = do
originalInput <- getInput
setInput input >> parser <* setInput originalInput
loadNumber :: Int -> SParser Float loadNumber :: Int -> SParser Float
loadNumber objectId = do loadNumber objectId = do
offset <- getOffset objectId offset <- getOffset objectId
state <- getState objectStart <- BS.drop offset . input <$> getState
let objectStart = BS.drop offset (input state) indirectObjCoordinates `on` objectStart >> return ()
case runParser object state ("object@" ++ show offset) objectStart of objectValue <- (!objectId) . objects . content <$> getState
Left _ -> fail $ "Could not parse length object@" ++ show offset case objectValue of
Right number@(Direct (Number n)) -> Direct (Number n) -> return n
addObject objectId number >> return n obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
Right obj -> fail $
"Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v invalidValue v = "Invalid value " ++ show v
@ -82,26 +86,28 @@ getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length" invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length" getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
object :: SParser Object streamObject :: SParser Object
object = streamObject = try $ do
Direct <$> directObject header <- dictionary <* blank
<|> do size <- getSize (Map.lookup "Length" header)
header <- dictionary streamContent <- BS.pack <$> stream (truncate size)
size <- getSize (Map.lookup "Length" header) return $ Stream {header, streamContent}
streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent}
where where
stream size = line "stream" *> count size anyChar <* eol <* line "endstream" stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
object :: SParser Object
object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
return coordinates
occurrence :: SParser Occurrence occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> indirectObj occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
where
indirectObj = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* eol <* line "endobj"
addObject objectId objectValue
return $ Indirect coordinates
populate :: ByteString -> Content -> Content populate :: ByteString -> Content -> Content
populate input initialContent = populate input initialContent =

View file

@ -10,6 +10,7 @@ module PDF.Object (
, XRefEntry(..) , XRefEntry(..)
, XRefSection , XRefSection
, XRefSubSection(..) , XRefSubSection(..)
, blank
, content , content
, dictionary , dictionary
, directObject , directObject
@ -108,11 +109,6 @@ blank = skipMany whiteSpace
delimiterCharset :: String delimiterCharset :: String
delimiterCharset = "()<>[]{}/%" delimiterCharset = "()<>[]{}/%"
{-
delimiter :: Parser u Char
delimiter = oneOf delimiterCharset
-}
regular :: Parser u Char regular :: Parser u Char
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
@ -145,7 +141,7 @@ stringObj =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')') Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>') <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where where
literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
matchingParenthesis = matchingParenthesis =
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")" (++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode) escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)