Hufflepdf/src/PDF/Body.hs

139 lines
4.9 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
module PDF.Body (
populate
) where
import Control.Applicative ((<|>))
import Control.Monad.State (get, gets, modify)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line
)
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
data UserState = UserState {
input :: ByteString
, xreferences :: XRefSection
, flow :: Flow
}
type SParser = Parser UserState
modifyFlow :: (Flow -> Flow) -> SParser ()
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
addObject :: Int -> Object -> SParser ()
addObject objectId newObject = modifyFlow $ \flow -> flow {
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
}
pushOccurrence :: Occurrence -> SParser ()
pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
occurrencesStack = newOccurrence : (occurrencesStack flow)
}
comment :: Parser u String
comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
where
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
lookupOffset :: Int -> XRefSection -> Maybe Int
lookupOffset _ [] = Nothing
lookupOffset objectId (xrefSubSection:others) =
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
let index = objectId - firstObjectId in
if index >= 0 && index < entriesNumber
then
case Map.lookup index entries of
Just (InUse {offset}) -> Just offset
_ -> Nothing
else lookupOffset objectId others
getOffset :: Int -> SParser Int
getOffset objectId = do
table <- gets xreferences
case lookupOffset objectId table of
Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
loadNumber :: Int -> SParser Float
loadNumber objectId = do
offset <- getOffset objectId
objectStart <- BS.drop offset <$> gets input
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
objectValue <- (!objectId) . tmpObjects <$> gets flow
case objectValue of
Direct (NumberObject (Number n)) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v
getSize :: Maybe DirectObject -> SParser Float
getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (NumberObject (Number size))) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
Flow {tmpObjects} <- gets flow
case Map.lookup objectId tmpObjects of
Nothing -> loadNumber objectId
Just (Direct (NumberObject (Number size))) -> return size
Just v -> fail $
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
streamObject :: SParser Object
streamObject = do
header <- dictionary <* blank
size <- getSize (Map.lookup (Name "Length") header)
streamContent <- stream (truncate size)
return $ Stream {header, streamContent}
where
stream size = line "stream" *> block size <* blank <* line "endstream"
object :: SParser Object
object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
return coordinates
occurrence :: SParser Occurrence
occurrence =
Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"
populate :: ByteString -> InputStructure -> Content
populate input structure =
let bodyInput = BS.drop (startOffset structure) input in
case runParser recurseOnOccurrences initialState bodyInput of
Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
Right finalState ->
let Flow {occurrencesStack, tmpObjects} = flow finalState in
Content {
occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
}
where
docStructure = inputStructure structure
xreferences = xrefSection docStructure
initialState = UserState {
input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty
}
}
recurseOnOccurrences :: SParser UserState
recurseOnOccurrences =
(occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> get