2019-05-14 18:42:11 +02:00
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
2019-05-24 10:48:09 +02:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2019-05-15 19:12:38 +02:00
|
|
|
module PDF.Body (
|
|
|
|
populate
|
|
|
|
) where
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
import Control.Applicative ((<|>))
|
|
|
|
import Control.Monad.State (get, gets, modify)
|
2019-09-24 18:36:17 +02:00
|
|
|
import Data.Attoparsec.ByteString.Char8 (option)
|
2019-05-24 10:48:09 +02:00
|
|
|
import Data.ByteString.Char8 (ByteString)
|
2019-05-31 15:07:41 +02:00
|
|
|
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
|
2019-05-15 15:03:55 +02:00
|
|
|
import Data.Map ((!))
|
2019-05-17 16:14:06 +02:00
|
|
|
import qualified Data.Map as Map (empty, insert, lookup)
|
2019-05-16 22:41:14 +02:00
|
|
|
import qualified PDF.EOL as EOL (charset, parser)
|
2020-02-17 15:29:59 +01:00
|
|
|
import PDF.Layer (Layer(..))
|
2019-05-14 18:42:11 +02:00
|
|
|
import PDF.Object (
|
2020-02-17 15:29:59 +01:00
|
|
|
DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
2019-05-18 09:01:13 +02:00
|
|
|
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
2019-09-20 22:39:14 +02:00
|
|
|
, Structure(..), XRefEntry(..), XRefSection
|
2019-05-16 22:41:14 +02:00
|
|
|
, blank, dictionary, directObject, integer, line
|
2019-05-14 18:42:11 +02:00
|
|
|
)
|
2019-09-20 22:39:14 +02:00
|
|
|
import PDF.Output (ObjectId(..), Offset(..))
|
2020-02-14 18:02:40 +01:00
|
|
|
import PDF.Parser (MonadParser(..), Parser, (<?>), evalParser, on)
|
2019-05-14 18:42:11 +02:00
|
|
|
|
|
|
|
data UserState = UserState {
|
|
|
|
input :: ByteString
|
2019-05-18 09:01:13 +02:00
|
|
|
, xreferences :: XRefSection
|
2019-05-17 16:14:06 +02:00
|
|
|
, flow :: Flow
|
2019-05-14 18:42:11 +02:00
|
|
|
}
|
|
|
|
|
2019-05-15 09:04:17 +02:00
|
|
|
type SParser = Parser UserState
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
modifyFlow :: (Flow -> Flow) -> SParser ()
|
2019-05-24 10:48:09 +02:00
|
|
|
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
addObject :: ObjectId -> Object -> SParser ()
|
2019-05-17 16:14:06 +02:00
|
|
|
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
|
|
|
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
2019-05-14 18:42:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pushOccurrence :: Occurrence -> SParser ()
|
2019-05-17 16:14:06 +02:00
|
|
|
pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
|
|
|
|
occurrencesStack = newOccurrence : (occurrencesStack flow)
|
2019-05-14 18:42:11 +02:00
|
|
|
}
|
|
|
|
|
2019-05-15 09:04:17 +02:00
|
|
|
comment :: Parser u String
|
2019-05-31 15:07:41 +02:00
|
|
|
comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
|
|
|
|
where
|
|
|
|
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
lookupOffset :: ObjectId -> SParser Offset
|
|
|
|
lookupOffset objectId = do
|
2019-05-24 10:48:09 +02:00
|
|
|
table <- gets xreferences
|
2019-09-20 22:39:14 +02:00
|
|
|
case Map.lookup objectId table >>= entryOffset of
|
2019-05-14 18:42:11 +02:00
|
|
|
Nothing -> fail $
|
|
|
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
|
|
|
Just offset -> return offset
|
2019-09-20 22:39:14 +02:00
|
|
|
where
|
|
|
|
entryOffset (InUse {offset}) = Just offset
|
|
|
|
entryOffset _ = Nothing
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-09-22 01:34:17 +02:00
|
|
|
loadNumber :: ObjectId -> SParser Double
|
2019-05-14 18:42:11 +02:00
|
|
|
loadNumber objectId = do
|
2019-09-20 22:39:14 +02:00
|
|
|
offset <- getOffset <$> lookupOffset objectId
|
2019-05-24 10:48:09 +02:00
|
|
|
objectStart <- BS.drop offset <$> gets input
|
|
|
|
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
|
|
|
|
objectValue <- (!objectId) . tmpObjects <$> gets flow
|
2019-05-15 15:03:55 +02:00
|
|
|
case objectValue of
|
2019-05-17 16:14:06 +02:00
|
|
|
Direct (NumberObject (Number n)) -> return n
|
2019-05-15 15:03:55 +02:00
|
|
|
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
2019-05-14 18:42:11 +02:00
|
|
|
|
|
|
|
invalidValue :: Object -> String
|
|
|
|
invalidValue v = "Invalid value " ++ show v
|
|
|
|
|
2019-09-22 01:34:17 +02:00
|
|
|
getSize :: Maybe DirectObject -> SParser Double
|
2019-05-14 18:42:11 +02:00
|
|
|
getSize Nothing = fail "Missing '/Length' key on stream"
|
2019-05-17 16:14:06 +02:00
|
|
|
getSize (Just (NumberObject (Number size))) = return size
|
2019-05-14 18:42:11 +02:00
|
|
|
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
2019-05-24 10:48:09 +02:00
|
|
|
Flow {tmpObjects} <- gets flow
|
2019-05-17 16:14:06 +02:00
|
|
|
case Map.lookup objectId tmpObjects of
|
2019-05-14 18:42:11 +02:00
|
|
|
Nothing -> loadNumber objectId
|
2019-05-17 16:14:06 +02:00
|
|
|
Just (Direct (NumberObject (Number size))) -> return size
|
2019-05-14 18:42:11 +02:00
|
|
|
Just v -> fail $
|
|
|
|
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
|
|
|
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
|
|
|
|
2019-05-15 15:03:55 +02:00
|
|
|
streamObject :: SParser Object
|
2019-05-24 10:48:09 +02:00
|
|
|
streamObject = do
|
2019-05-15 15:03:55 +02:00
|
|
|
header <- dictionary <* blank
|
2019-05-17 16:14:06 +02:00
|
|
|
size <- getSize (Map.lookup (Name "Length") header)
|
2019-05-24 10:48:09 +02:00
|
|
|
streamContent <- stream (truncate size)
|
2019-05-15 15:03:55 +02:00
|
|
|
return $ Stream {header, streamContent}
|
2019-05-14 18:42:11 +02:00
|
|
|
where
|
2019-05-24 10:48:09 +02:00
|
|
|
stream size = line "stream" *> block size <* blank <* line "endstream"
|
2019-05-15 15:03:55 +02:00
|
|
|
|
|
|
|
object :: SParser Object
|
|
|
|
object = streamObject <|> Direct <$> directObject
|
|
|
|
|
|
|
|
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
|
|
|
indirectObjCoordinates = do
|
2019-09-20 22:39:14 +02:00
|
|
|
objectId <- ObjectId <$> integer
|
2019-05-15 15:03:55 +02:00
|
|
|
coordinates <- IndirectObjCoordinates objectId <$> integer
|
|
|
|
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
|
|
|
addObject objectId objectValue
|
|
|
|
return coordinates
|
2019-05-14 18:42:11 +02:00
|
|
|
|
|
|
|
occurrence :: SParser Occurrence
|
2019-05-31 15:06:20 +02:00
|
|
|
occurrence =
|
|
|
|
Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2020-02-17 15:29:59 +01:00
|
|
|
populate :: ByteString -> InputStructure -> Layer
|
2019-05-17 16:14:06 +02:00
|
|
|
populate input structure =
|
|
|
|
let bodyInput = BS.drop (startOffset structure) input in
|
2019-09-24 18:32:23 +02:00
|
|
|
case evalParser recurseOnOccurrences initialState bodyInput of
|
2020-02-17 15:29:59 +01:00
|
|
|
Left _ -> Layer {occurrences = [], objects = Map.empty, docStructure}
|
2019-05-16 11:01:50 +02:00
|
|
|
Right finalState ->
|
2019-05-17 16:14:06 +02:00
|
|
|
let Flow {occurrencesStack, tmpObjects} = flow finalState in
|
2020-02-17 15:29:59 +01:00
|
|
|
Layer {
|
2019-05-18 09:01:13 +02:00
|
|
|
occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
|
|
|
|
}
|
2019-05-14 18:42:11 +02:00
|
|
|
where
|
2019-05-18 09:01:13 +02:00
|
|
|
docStructure = inputStructure structure
|
2019-09-20 22:39:14 +02:00
|
|
|
xreferences = xRef docStructure
|
2019-05-17 16:14:06 +02:00
|
|
|
initialState = UserState {
|
2019-05-18 09:01:13 +02:00
|
|
|
input, xreferences, flow = Flow {
|
|
|
|
occurrencesStack = [], tmpObjects = Map.empty
|
|
|
|
}
|
2019-05-17 16:14:06 +02:00
|
|
|
}
|
2019-05-14 18:42:11 +02:00
|
|
|
|
|
|
|
recurseOnOccurrences :: SParser UserState
|
|
|
|
recurseOnOccurrences =
|
2019-05-24 10:48:09 +02:00
|
|
|
(occurrence >>= pushOccurrence >> recurseOnOccurrences) <|> get
|