Fix bugs preventing the objects from loading

This commit is contained in:
Tissevert 2019-05-15 15:03:55 +02:00
parent 44508a204c
commit 379a821550
2 changed files with 34 additions and 32 deletions

View File

@ -4,11 +4,12 @@ module PDF.Body where
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64)
import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup)
import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
, eol, eolCharset, dictionary, directObject, integer, line
, blank, eol, eolCharset, dictionary, directObject, integer, line
)
import Text.Parsec
@ -55,17 +56,20 @@ getOffset objectId = do
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
on parser input = do
originalInput <- getInput
setInput input >> parser <* setInput originalInput
loadNumber :: Int -> SParser Float
loadNumber objectId = do
offset <- getOffset objectId
state <- getState
let objectStart = BS.drop offset (input state)
case runParser object state ("object@" ++ show offset) objectStart of
Left _ -> fail $ "Could not parse length object@" ++ show offset
Right number@(Direct (Number n)) ->
addObject objectId number >> return n
Right obj -> fail $
"Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
objectStart <- BS.drop offset . input <$> getState
indirectObjCoordinates `on` objectStart >> return ()
objectValue <- (!objectId) . objects . content <$> getState
case objectValue of
Direct (Number n) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v
@ -82,26 +86,28 @@ getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
object :: SParser Object
object =
Direct <$> directObject
<|> do
header <- dictionary
size <- getSize (Map.lookup "Length" header)
streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent}
streamObject :: SParser Object
streamObject = try $ do
header <- dictionary <* blank
size <- getSize (Map.lookup "Length" header)
streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent}
where
stream size = line "stream" *> count size anyChar <* eol <* line "endstream"
stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
object :: SParser Object
object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
return coordinates
occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> indirectObj
where
indirectObj = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* eol <* line "endobj"
addObject objectId objectValue
return $ Indirect coordinates
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> Content -> Content
populate input initialContent =

View File

@ -10,6 +10,7 @@ module PDF.Object (
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, blank
, content
, dictionary
, directObject
@ -108,11 +109,6 @@ blank = skipMany whiteSpace
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
{-
delimiter :: Parser u Char
delimiter = oneOf delimiterCharset
-}
regular :: Parser u Char
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
@ -145,7 +141,7 @@ stringObj =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where
literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar
literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
matchingParenthesis =
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)