Fix bugs preventing the objects from loading
This commit is contained in:
parent
44508a204c
commit
379a821550
2 changed files with 34 additions and 32 deletions
|
@ -4,11 +4,12 @@ module PDF.Body where
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
|
import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (insert, lookup)
|
import qualified Data.Map as Map (insert, lookup)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
||||||
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||||
, eol, eolCharset, dictionary, directObject, integer, line
|
, blank, eol, eolCharset, dictionary, directObject, integer, line
|
||||||
)
|
)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
|
|
||||||
|
@ -55,17 +56,20 @@ getOffset objectId = do
|
||||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||||
Just offset -> return offset
|
Just offset -> return offset
|
||||||
|
|
||||||
|
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
|
||||||
|
on parser input = do
|
||||||
|
originalInput <- getInput
|
||||||
|
setInput input >> parser <* setInput originalInput
|
||||||
|
|
||||||
loadNumber :: Int -> SParser Float
|
loadNumber :: Int -> SParser Float
|
||||||
loadNumber objectId = do
|
loadNumber objectId = do
|
||||||
offset <- getOffset objectId
|
offset <- getOffset objectId
|
||||||
state <- getState
|
objectStart <- BS.drop offset . input <$> getState
|
||||||
let objectStart = BS.drop offset (input state)
|
indirectObjCoordinates `on` objectStart >> return ()
|
||||||
case runParser object state ("object@" ++ show offset) objectStart of
|
objectValue <- (!objectId) . objects . content <$> getState
|
||||||
Left _ -> fail $ "Could not parse length object@" ++ show offset
|
case objectValue of
|
||||||
Right number@(Direct (Number n)) ->
|
Direct (Number n) -> return n
|
||||||
addObject objectId number >> return n
|
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
||||||
Right obj -> fail $
|
|
||||||
"Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
|
||||||
|
|
||||||
invalidValue :: Object -> String
|
invalidValue :: Object -> String
|
||||||
invalidValue v = "Invalid value " ++ show v
|
invalidValue v = "Invalid value " ++ show v
|
||||||
|
@ -82,26 +86,28 @@ getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
||||||
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
||||||
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
||||||
|
|
||||||
object :: SParser Object
|
streamObject :: SParser Object
|
||||||
object =
|
streamObject = try $ do
|
||||||
Direct <$> directObject
|
header <- dictionary <* blank
|
||||||
<|> do
|
size <- getSize (Map.lookup "Length" header)
|
||||||
header <- dictionary
|
streamContent <- BS.pack <$> stream (truncate size)
|
||||||
size <- getSize (Map.lookup "Length" header)
|
return $ Stream {header, streamContent}
|
||||||
streamContent <- BS.pack <$> stream (truncate size)
|
|
||||||
return $ Stream {header, streamContent}
|
|
||||||
where
|
where
|
||||||
stream size = line "stream" *> count size anyChar <* eol <* line "endstream"
|
stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
|
||||||
|
|
||||||
|
object :: SParser Object
|
||||||
|
object = streamObject <|> Direct <$> directObject
|
||||||
|
|
||||||
|
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
||||||
|
indirectObjCoordinates = do
|
||||||
|
objectId <- integer
|
||||||
|
coordinates <- IndirectObjCoordinates objectId <$> integer
|
||||||
|
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
||||||
|
addObject objectId objectValue
|
||||||
|
return coordinates
|
||||||
|
|
||||||
occurrence :: SParser Occurrence
|
occurrence :: SParser Occurrence
|
||||||
occurrence = Comment <$> comment <|> indirectObj
|
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
||||||
where
|
|
||||||
indirectObj = do
|
|
||||||
objectId <- integer
|
|
||||||
coordinates <- IndirectObjCoordinates objectId <$> integer
|
|
||||||
objectValue <- line "obj" *> object <* eol <* line "endobj"
|
|
||||||
addObject objectId objectValue
|
|
||||||
return $ Indirect coordinates
|
|
||||||
|
|
||||||
populate :: ByteString -> Content -> Content
|
populate :: ByteString -> Content -> Content
|
||||||
populate input initialContent =
|
populate input initialContent =
|
||||||
|
|
|
@ -10,6 +10,7 @@ module PDF.Object (
|
||||||
, XRefEntry(..)
|
, XRefEntry(..)
|
||||||
, XRefSection
|
, XRefSection
|
||||||
, XRefSubSection(..)
|
, XRefSubSection(..)
|
||||||
|
, blank
|
||||||
, content
|
, content
|
||||||
, dictionary
|
, dictionary
|
||||||
, directObject
|
, directObject
|
||||||
|
@ -108,11 +109,6 @@ blank = skipMany whiteSpace
|
||||||
delimiterCharset :: String
|
delimiterCharset :: String
|
||||||
delimiterCharset = "()<>[]{}/%"
|
delimiterCharset = "()<>[]{}/%"
|
||||||
|
|
||||||
{-
|
|
||||||
delimiter :: Parser u Char
|
|
||||||
delimiter = oneOf delimiterCharset
|
|
||||||
-}
|
|
||||||
|
|
||||||
regular :: Parser u Char
|
regular :: Parser u Char
|
||||||
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
|
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
|
||||||
|
|
||||||
|
@ -145,7 +141,7 @@ stringObj =
|
||||||
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
||||||
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
||||||
where
|
where
|
||||||
literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar
|
literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
|
||||||
matchingParenthesis =
|
matchingParenthesis =
|
||||||
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
|
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
|
||||||
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
||||||
|
|
Loading…
Reference in a new issue