Hufflepdf/src/PDF/Body.hs

129 lines
4.4 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
module PDF.Body (
populate
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64)
import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line
)
import Text.Parsec
data UserState = UserState {
input :: ByteString
, content :: Content
}
type SParser = Parser UserState
modifyContent :: (Content -> Content) -> SParser ()
modifyContent f = modifyState $ \state -> state {content = f $ content state}
addObject :: Int -> Object -> SParser ()
addObject objectId newObject = modifyContent $ \content -> content {
objects = Map.insert objectId newObject $ objects content
}
pushOccurrence :: Occurrence -> SParser ()
pushOccurrence newOccurrence = modifyContent $ \content -> content {
body = newOccurrence : (body content)
}
comment :: Parser u String
comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser
lookupOffset :: Int -> XRefSection -> Maybe Int64
lookupOffset _ [] = Nothing
lookupOffset objectId (xrefSubSection:others) =
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
let index = objectId - firstObjectId in
if index >= 0 && index < entriesNumber
then
case Map.lookup index entries of
Just (InUse {offset}) -> Just offset
_ -> Nothing
else lookupOffset objectId others
getOffset :: Int -> SParser Int64
getOffset objectId = do
Content {xrefSection} <- content <$> getState
case lookupOffset objectId xrefSection of
Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
on parser input = do
originalInput <- getInput
setInput input >> parser <* setInput originalInput
loadNumber :: Int -> SParser Float
loadNumber objectId = do
offset <- getOffset objectId
objectStart <- BS.drop offset . input <$> getState
indirectObjCoordinates `on` objectStart >> return ()
objectValue <- (!objectId) . objects . content <$> getState
case objectValue of
Direct (Number n) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v
getSize :: Maybe DirectObject -> SParser Float
getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (Number size)) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
Content {objects} <- content <$> getState
case Map.lookup objectId objects of
Nothing -> loadNumber objectId
Just (Direct (Number size)) -> return size
Just v -> fail $
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
streamObject :: SParser Object
streamObject = try $ do
header <- dictionary <* blank
size <- getSize (Map.lookup "Length" header)
streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent}
where
stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
object :: SParser Object
object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
return coordinates
occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> Content -> Content
populate input initialContent =
let bodyInput = BS.drop (startOffset initialContent) input in
case runParser recurseOnOccurrences initialState "" bodyInput of
Left _ -> initialContent
Right finalState ->
let finalContent = content finalState in
finalContent {body = reverse (body finalContent)}
where
initialState = UserState {input, content = initialContent}
recurseOnOccurrences :: SParser UserState
recurseOnOccurrences =
(occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> getState