{-# LANGUAGE NamedFieldPuns #-} module PDF.Body ( populate ) where import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack) import Data.Int (Int64) import Data.Map ((!)) import qualified Data.Map as Map (insert, lookup) import PDF.Object ( Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..) , Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..) , blank, eol, eolCharset, dictionary, directObject, integer, line ) import Text.Parsec data UserState = UserState { input :: ByteString , content :: Content } type SParser = Parser UserState modifyContent :: (Content -> Content) -> SParser () modifyContent f = modifyState $ \state -> state {content = f $ content state} addObject :: Int -> Object -> SParser () addObject objectId newObject = modifyContent $ \content -> content { objects = Map.insert objectId newObject $ objects content } pushOccurrence :: Occurrence -> SParser () pushOccurrence newOccurrence = modifyContent $ \content -> content { body = newOccurrence : (body content) } comment :: Parser u String comment = char '%' *> many (noneOf eolCharset) <* eol lookupOffset :: Int -> XRefSection -> Maybe Int64 lookupOffset _ [] = Nothing lookupOffset objectId (xrefSubSection:others) = let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in let index = objectId - firstObjectId in if index >= 0 && index < entriesNumber then case Map.lookup index entries of Just (InUse {offset}) -> Just offset _ -> Nothing else lookupOffset objectId others getOffset :: Int -> SParser Int64 getOffset objectId = do Content {xrefSection} <- content <$> getState case lookupOffset objectId xrefSection of Nothing -> fail $ "obj " ++ show objectId ++ " is referenced but missing in XRef table" Just offset -> return offset on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a on parser input = do originalInput <- getInput setInput input >> parser <* setInput originalInput loadNumber :: Int -> SParser Float loadNumber objectId = do offset <- getOffset objectId objectStart <- BS.drop offset . input <$> getState indirectObjCoordinates `on` objectStart >> return () objectValue <- (!objectId) . objects . content <$> getState case objectValue of Direct (Number n) -> return n obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" invalidValue :: Object -> String invalidValue v = "Invalid value " ++ show v getSize :: Maybe DirectObject -> SParser Float getSize Nothing = fail "Missing '/Length' key on stream" getSize (Just (Number size)) = return size getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do Content {objects} <- content <$> getState case Map.lookup objectId objects of Nothing -> loadNumber objectId Just (Direct (Number size)) -> return size Just v -> fail $ invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length" getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length" streamObject :: SParser Object streamObject = try $ do header <- dictionary <* blank size <- getSize (Map.lookup "Length" header) streamContent <- BS.pack <$> stream (truncate size) return $ Stream {header, streamContent} where stream size = line "stream" *> count size anyChar <* blank <* line "endstream" object :: SParser Object object = streamObject <|> Direct <$> directObject indirectObjCoordinates :: SParser IndirectObjCoordinates indirectObjCoordinates = do objectId <- integer coordinates <- IndirectObjCoordinates objectId <$> integer objectValue <- line "obj" *> object <* blank <* line "endobj" addObject objectId objectValue return coordinates occurrence :: SParser Occurrence occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates populate :: ByteString -> Content -> Content populate input initialContent = let bodyInput = BS.drop (startOffset initialContent) input in case runParser recurseOnOccurrences initialState "" bodyInput of Left _ -> initialContent Right finalState -> let finalContent = content finalState in finalContent {body = reverse (body finalContent)} where initialState = UserState {input, content = initialContent} recurseOnOccurrences :: SParser UserState recurseOnOccurrences = (occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> getState