125 lines
4.2 KiB
Haskell
125 lines
4.2 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
module PDF.Body (
|
|
populate
|
|
) where
|
|
|
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
|
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
|
import Data.Int (Int64)
|
|
import Data.Map ((!))
|
|
import qualified Data.Map as Map (insert, lookup)
|
|
import PDF.Object (
|
|
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
|
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
|
, blank, eol, eolCharset, dictionary, directObject, integer, line
|
|
)
|
|
import Text.Parsec
|
|
|
|
data UserState = UserState {
|
|
input :: ByteString
|
|
, content :: Content
|
|
}
|
|
|
|
type SParser = Parser UserState
|
|
|
|
modifyContent :: (Content -> Content) -> SParser ()
|
|
modifyContent f = modifyState $ \state -> state {content = f $ content state}
|
|
|
|
addObject :: Int -> Object -> SParser ()
|
|
addObject objectId newObject = modifyContent $ \content -> content {
|
|
objects = Map.insert objectId newObject $ objects content
|
|
}
|
|
|
|
pushOccurrence :: Occurrence -> SParser ()
|
|
pushOccurrence newOccurrence = modifyContent $ \content -> content {
|
|
body = newOccurrence : (body content)
|
|
}
|
|
|
|
comment :: Parser u String
|
|
comment = char '%' *> many (noneOf eolCharset) <* eol
|
|
|
|
lookupOffset :: Int -> XRefSection -> Maybe Int64
|
|
lookupOffset _ [] = Nothing
|
|
lookupOffset objectId (xrefSubSection:others) =
|
|
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
|
|
let index = objectId - firstObjectId in
|
|
if index >= 0 && index < entriesNumber
|
|
then
|
|
case Map.lookup index entries of
|
|
Just (InUse {offset}) -> Just offset
|
|
_ -> Nothing
|
|
else lookupOffset objectId others
|
|
|
|
getOffset :: Int -> SParser Int64
|
|
getOffset objectId = do
|
|
Content {xrefSection} <- content <$> getState
|
|
case lookupOffset objectId xrefSection of
|
|
Nothing -> fail $
|
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
|
Just offset -> return offset
|
|
|
|
on :: Monad m => ParsecT s u m a -> s -> ParsecT s u m a
|
|
on parser input = do
|
|
originalInput <- getInput
|
|
setInput input >> parser <* setInput originalInput
|
|
|
|
loadNumber :: Int -> SParser Float
|
|
loadNumber objectId = do
|
|
offset <- getOffset objectId
|
|
objectStart <- BS.drop offset . input <$> getState
|
|
indirectObjCoordinates `on` objectStart >> return ()
|
|
objectValue <- (!objectId) . objects . content <$> getState
|
|
case objectValue of
|
|
Direct (Number n) -> return n
|
|
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
|
|
|
invalidValue :: Object -> String
|
|
invalidValue v = "Invalid value " ++ show v
|
|
|
|
getSize :: Maybe DirectObject -> SParser Float
|
|
getSize Nothing = fail "Missing '/Length' key on stream"
|
|
getSize (Just (Number size)) = return size
|
|
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
|
Content {objects} <- content <$> getState
|
|
case Map.lookup objectId objects of
|
|
Nothing -> loadNumber objectId
|
|
Just (Direct (Number size)) -> return size
|
|
Just v -> fail $
|
|
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
|
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
|
|
|
streamObject :: SParser Object
|
|
streamObject = try $ do
|
|
header <- dictionary <* blank
|
|
size <- getSize (Map.lookup "Length" header)
|
|
streamContent <- BS.pack <$> stream (truncate size)
|
|
return $ Stream {header, streamContent}
|
|
where
|
|
stream size = line "stream" *> count size anyChar <* blank <* line "endstream"
|
|
|
|
object :: SParser Object
|
|
object = streamObject <|> Direct <$> directObject
|
|
|
|
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
|
indirectObjCoordinates = do
|
|
objectId <- integer
|
|
coordinates <- IndirectObjCoordinates objectId <$> integer
|
|
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
|
addObject objectId objectValue
|
|
return coordinates
|
|
|
|
occurrence :: SParser Occurrence
|
|
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
|
|
|
populate :: ByteString -> Content -> Content
|
|
populate input initialContent =
|
|
case runParser recurseOnOccurrences initialState "" input of
|
|
Left _ -> initialContent
|
|
Right finalState -> content finalState
|
|
where
|
|
initialState = UserState {input, content = initialContent}
|
|
|
|
recurseOnOccurrences :: SParser UserState
|
|
recurseOnOccurrences =
|
|
(occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> getState
|