{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} module PDF.Body ( populate ) where import Control.Applicative ((<|>)) import Control.Monad.State (get, gets, modify) import Data.Attoparsec.ByteString.Char8 (option) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (cons, drop, unpack) import Data.Map ((!)) import qualified Data.Map as Map (empty, insert, lookup) import qualified PDF.EOL as EOL (charset, parser) import PDF.Object ( Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..) , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..) , Structure(..), XRefEntry(..), XRefSection , blank, dictionary, directObject, integer, line ) import PDF.Output (ObjectId(..), Offset(..)) import PDF.Parser (Parser, (), block, char, evalParser, on, takeAll) data UserState = UserState { input :: ByteString , xreferences :: XRefSection , flow :: Flow } type SParser = Parser UserState modifyFlow :: (Flow -> Flow) -> SParser () modifyFlow f = modify $ \state -> state {flow = f $ flow state} addObject :: ObjectId -> Object -> SParser () addObject objectId newObject = modifyFlow $ \flow -> flow { tmpObjects = Map.insert objectId newObject $ tmpObjects flow } pushOccurrence :: Occurrence -> SParser () pushOccurrence newOccurrence = modifyFlow $ \flow -> flow { occurrencesStack = newOccurrence : (occurrencesStack flow) } comment :: Parser u String comment = BS.unpack <$> (option "" afterPercent <* EOL.parser) where afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset)) lookupOffset :: ObjectId -> SParser Offset lookupOffset objectId = do table <- gets xreferences case Map.lookup objectId table >>= entryOffset of Nothing -> fail $ "obj " ++ show objectId ++ " is referenced but missing in XRef table" Just offset -> return offset where entryOffset (InUse {offset}) = Just offset entryOffset _ = Nothing loadNumber :: ObjectId -> SParser Double loadNumber objectId = do offset <- getOffset <$> lookupOffset objectId objectStart <- BS.drop offset <$> gets input indirectObjCoordinates `on` (objectStart :: ByteString) >> return () objectValue <- (!objectId) . tmpObjects <$> gets flow case objectValue of Direct (NumberObject (Number n)) -> return n obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" invalidValue :: Object -> String invalidValue v = "Invalid value " ++ show v getSize :: Maybe DirectObject -> SParser Double getSize Nothing = fail "Missing '/Length' key on stream" getSize (Just (NumberObject (Number size))) = return size getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do Flow {tmpObjects} <- gets flow case Map.lookup objectId tmpObjects of Nothing -> loadNumber objectId Just (Direct (NumberObject (Number size))) -> return size Just v -> fail $ invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length" getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length" streamObject :: SParser Object streamObject = do header <- dictionary <* blank size <- getSize (Map.lookup (Name "Length") header) streamContent <- stream (truncate size) return $ Stream {header, streamContent} where stream size = line "stream" *> block size <* blank <* line "endstream" object :: SParser Object object = streamObject <|> Direct <$> directObject indirectObjCoordinates :: SParser IndirectObjCoordinates indirectObjCoordinates = do objectId <- ObjectId <$> integer coordinates <- IndirectObjCoordinates objectId <$> integer objectValue <- line "obj" *> object <* blank <* line "endobj" addObject objectId objectValue return coordinates occurrence :: SParser Occurrence occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates "comment or object" populate :: ByteString -> InputStructure -> Content populate input structure = let bodyInput = BS.drop (startOffset structure) input in case evalParser recurseOnOccurrences initialState bodyInput of Left _ -> Content {occurrences = [], objects = Map.empty, docStructure} Right finalState -> let Flow {occurrencesStack, tmpObjects} = flow finalState in Content { occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure } where docStructure = inputStructure structure xreferences = xRef docStructure initialState = UserState { input, xreferences, flow = Flow { occurrencesStack = [], tmpObjects = Map.empty } } recurseOnOccurrences :: SParser UserState recurseOnOccurrences = (occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> get