Hufflepdf/src/PDF/Body.hs
2019-05-18 09:01:13 +02:00

138 lines
4.8 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
module PDF.Body (
populate
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64)
import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
, Parser, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line
)
import Text.Parsec
data UserState = UserState {
input :: ByteString
, xreferences :: XRefSection
, flow :: Flow
}
type SParser = Parser UserState
modifyFlow :: (Flow -> Flow) -> SParser ()
modifyFlow f = modifyState $ \state -> state {flow = f $ flow state}
addObject :: Int -> Object -> SParser ()
addObject objectId newObject = modifyFlow $ \flow -> flow {
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
}
pushOccurrence :: Occurrence -> SParser ()
pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
occurrencesStack = newOccurrence : (occurrencesStack flow)
}
comment :: Parser u String
comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser
lookupOffset :: Int -> XRefSection -> Maybe Int64
lookupOffset _ [] = Nothing
lookupOffset objectId (xrefSubSection:others) =
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
let index = objectId - firstObjectId in
if index >= 0 && index < entriesNumber
then
case Map.lookup index entries of
Just (InUse {offset}) -> Just offset
_ -> Nothing
else lookupOffset objectId others
getOffset :: Int -> SParser Int64
getOffset objectId = do
table <- xreferences <$> getState
case lookupOffset objectId table of
Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
on parser input = do
originalInput <- getInput
setInput input >> parser <* setInput originalInput
loadNumber :: Int -> SParser Float
loadNumber objectId = do
offset <- getOffset objectId
objectStart <- BS.drop offset . input <$> getState
indirectObjCoordinates `on` objectStart >> return ()
objectValue <- (!objectId) . tmpObjects . flow <$> getState
case objectValue of
Direct (NumberObject (Number n)) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v
getSize :: Maybe DirectObject -> SParser Float
getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (NumberObject (Number size))) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
Flow {tmpObjects} <- flow <$> getState
case Map.lookup objectId tmpObjects of
Nothing -> loadNumber objectId
Just (Direct (NumberObject (Number size))) -> return size
Just v -> fail $
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
streamObject :: SParser Object
streamObject = try $ do
header <- dictionary <* blank
size <- getSize (Map.lookup (Name "Length") header)
streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent}
where
stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
object :: SParser Object
object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
return coordinates
occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> InputStructure -> Content
populate input structure =
let bodyInput = BS.drop (startOffset structure) input in
case runParser recurseOnOccurrences initialState "" bodyInput of
Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
Right finalState ->
let Flow {occurrencesStack, tmpObjects} = flow finalState in
Content {
occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
}
where
docStructure = inputStructure structure
xreferences = xrefSection docStructure
initialState = UserState {
input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty
}
}
recurseOnOccurrences :: SParser UserState
recurseOnOccurrences =
(occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> getState