Hufflepdf/src/PDF/Body.hs

125 lines
4.2 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
module PDF.Body (
populate
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64)
import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup)
import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, eol, eolCharset, dictionary, directObject, integer, line
)
import Text.Parsec
data UserState = UserState {
input :: ByteString
, content :: Content
}
type SParser = Parser UserState
modifyContent :: (Content -> Content) -> SParser ()
modifyContent f = modifyState $ \state -> state {content = f $ content state}
addObject :: Int -> Object -> SParser ()
addObject objectId newObject = modifyContent $ \content -> content {
objects = Map.insert objectId newObject $ objects content
}
pushOccurrence :: Occurrence -> SParser ()
pushOccurrence newOccurrence = modifyContent $ \content -> content {
body = newOccurrence : (body content)
}
comment :: Parser u String
comment = char '%' *> many (noneOf eolCharset) <* eol
lookupOffset :: Int -> XRefSection -> Maybe Int64
lookupOffset _ [] = Nothing
lookupOffset objectId (xrefSubSection:others) =
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
let index = objectId - firstObjectId in
if index >= 0 && index < entriesNumber
then
case Map.lookup index entries of
Just (InUse {offset}) -> Just offset
_ -> Nothing
else lookupOffset objectId others
getOffset :: Int -> SParser Int64
getOffset objectId = do
Content {xrefSection} <- content <$> getState
case lookupOffset objectId xrefSection of
Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
on parser input = do
originalInput <- getInput
setInput input >> parser <* setInput originalInput
loadNumber :: Int -> SParser Float
loadNumber objectId = do
offset <- getOffset objectId
objectStart <- BS.drop offset . input <$> getState
indirectObjCoordinates `on` objectStart >> return ()
objectValue <- (!objectId) . objects . content <$> getState
case objectValue of
Direct (Number n) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v
getSize :: Maybe DirectObject -> SParser Float
getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (Number size)) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
Content {objects} <- content <$> getState
case Map.lookup objectId objects of
Nothing -> loadNumber objectId
Just (Direct (Number size)) -> return size
Just v -> fail $
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
streamObject :: SParser Object
streamObject = try $ do
header <- dictionary <* blank
size <- getSize (Map.lookup "Length" header)
streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent}
where
stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
object :: SParser Object
object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
return coordinates
occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> Content -> Content
populate input initialContent =
case runParser recurseOnOccurrences initialState "" input of
Left _ -> initialContent
Right finalState -> content finalState
where
initialState = UserState {input, content = initialContent}
recurseOnOccurrences :: SParser UserState
recurseOnOccurrences =
(occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> getState