2019-05-16 17:04:45 +02:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2019-05-13 18:22:05 +02:00
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
2019-05-16 17:04:45 +02:00
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
2019-05-13 18:22:05 +02:00
|
|
|
module PDF.Object (
|
|
|
|
Content(..)
|
2019-09-23 18:00:47 +02:00
|
|
|
, Dictionary
|
2019-05-13 18:22:05 +02:00
|
|
|
, DirectObject(..)
|
2019-05-17 16:14:06 +02:00
|
|
|
, Flow(..)
|
2019-09-22 01:34:17 +02:00
|
|
|
, IndexedObjects
|
2019-05-14 18:42:11 +02:00
|
|
|
, IndirectObjCoordinates(..)
|
2019-05-18 09:01:13 +02:00
|
|
|
, InputStructure(..)
|
2019-05-17 16:14:06 +02:00
|
|
|
, Name(..)
|
|
|
|
, Number(..)
|
2019-05-13 18:22:05 +02:00
|
|
|
, Object(..)
|
|
|
|
, Occurrence(..)
|
2019-09-25 18:42:34 +02:00
|
|
|
, StringObject(..)
|
2019-05-17 16:14:06 +02:00
|
|
|
, Structure(..)
|
2019-05-13 18:22:05 +02:00
|
|
|
, XRefEntry(..)
|
2019-05-14 18:42:11 +02:00
|
|
|
, XRefSection
|
2019-09-25 18:42:34 +02:00
|
|
|
, array
|
2019-05-15 15:03:55 +02:00
|
|
|
, blank
|
2019-05-14 18:42:11 +02:00
|
|
|
, dictionary
|
|
|
|
, directObject
|
2019-05-16 17:04:45 +02:00
|
|
|
, eofMarker
|
2019-05-14 18:42:11 +02:00
|
|
|
, integer
|
|
|
|
, line
|
2019-05-16 17:04:45 +02:00
|
|
|
, magicNumber
|
2019-09-23 23:19:27 +02:00
|
|
|
, name
|
2019-09-25 18:42:34 +02:00
|
|
|
, number
|
2019-09-23 23:19:27 +02:00
|
|
|
, regular
|
2019-09-25 18:42:34 +02:00
|
|
|
, stringObject
|
2019-05-17 16:14:06 +02:00
|
|
|
, structure
|
2019-10-04 18:46:07 +02:00
|
|
|
, toByteString
|
2019-05-13 18:22:05 +02:00
|
|
|
) where
|
|
|
|
|
2019-09-24 18:36:17 +02:00
|
|
|
import Control.Applicative ((<|>), many)
|
|
|
|
import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
|
2019-09-27 18:16:12 +02:00
|
|
|
import Data.ByteString (ByteString)
|
2019-10-03 14:43:56 +02:00
|
|
|
import qualified Data.ByteString as BS (concat)
|
2019-09-27 18:16:12 +02:00
|
|
|
import qualified Data.ByteString.Char8 as Char8 (cons, pack, singleton, unpack)
|
2019-10-04 18:46:07 +02:00
|
|
|
import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
|
2019-05-18 09:01:13 +02:00
|
|
|
import Data.Map (Map, (!), mapWithKey)
|
2019-09-20 22:39:14 +02:00
|
|
|
import qualified Data.Map as Map (
|
|
|
|
delete, empty, fromList, lookup, minViewWithKey, toList, union
|
|
|
|
)
|
2019-05-16 17:04:45 +02:00
|
|
|
import qualified PDF.EOL as EOL (charset, parser)
|
2020-02-11 17:35:35 +01:00
|
|
|
import qualified PDF.Output as Output (line, string)
|
2019-05-17 16:14:06 +02:00
|
|
|
import PDF.Output (
|
2019-09-20 22:39:14 +02:00
|
|
|
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
|
|
|
|
, byteString, getObjectId, getOffset, getOffsets, join, newLine
|
|
|
|
, saveOffset
|
2019-05-17 16:14:06 +02:00
|
|
|
)
|
2019-09-24 18:36:17 +02:00
|
|
|
import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
|
2019-05-16 17:04:45 +02:00
|
|
|
import Text.Printf (printf)
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
line :: MonadParser m => String -> m ()
|
2019-11-27 18:05:47 +01:00
|
|
|
line l = (string (Char8.pack l) *> blank *> return ()) <?> printf "line «%s»" l
|
2019-05-17 16:14:06 +02:00
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
magicNumber :: ByteString
|
2019-05-17 16:14:06 +02:00
|
|
|
magicNumber = "%PDF-"
|
|
|
|
|
|
|
|
eofMarker :: ByteString
|
|
|
|
eofMarker = "%%EOF"
|
|
|
|
|
|
|
|
whiteSpaceCharset :: String
|
|
|
|
whiteSpaceCharset = "\0\t\12 "
|
|
|
|
|
2019-09-24 18:36:17 +02:00
|
|
|
blank :: MonadParser m => m ()
|
|
|
|
blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure ()
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
delimiterCharset :: String
|
|
|
|
delimiterCharset = "()<>[]{}/%"
|
|
|
|
|
2019-05-24 10:48:09 +02:00
|
|
|
regular :: Char -> Bool
|
|
|
|
regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
|
2019-05-17 16:14:06 +02:00
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
integer :: (Read a, Num a, MonadParser m) => m a
|
2019-09-27 18:16:12 +02:00
|
|
|
integer = read . Char8.unpack <$> decNumber <* blank <?> "decimal integer"
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
-------------------------------------
|
|
|
|
-- OBJECTS
|
|
|
|
-------------------------------------
|
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
type IndexedObjects = Map ObjectId Object
|
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Boolean
|
|
|
|
--
|
2019-09-25 18:42:34 +02:00
|
|
|
boolean :: MonadParser m => m Bool
|
2019-05-24 10:48:09 +02:00
|
|
|
boolean =
|
|
|
|
(string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Number
|
|
|
|
--
|
2019-09-22 01:34:17 +02:00
|
|
|
newtype Number = Number Double deriving Show
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
instance Output Number where
|
|
|
|
output (Number f) = Output.string $
|
|
|
|
case properFraction f of
|
|
|
|
(n, 0) -> printf "%d" (n :: Int)
|
|
|
|
_ -> printf "%f" f
|
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
number :: MonadParser m => m Number
|
2019-09-27 18:16:12 +02:00
|
|
|
number = Number . read . Char8.unpack <$>
|
|
|
|
(mappend <$> sign <*> (integerPart <|> Char8.cons '0' <$> floatPart))
|
2019-05-24 10:48:09 +02:00
|
|
|
<?> "number"
|
2019-05-17 16:14:06 +02:00
|
|
|
where
|
|
|
|
sign = string "-" <|> option "" (char '+' >> return "")
|
2019-05-24 10:48:09 +02:00
|
|
|
integerPart = mappend <$> decNumber <*> option "" floatPart
|
2019-09-27 18:16:12 +02:00
|
|
|
floatPart = Char8.cons <$> char '.' <*> (option "0" $ decNumber)
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- StringObject
|
|
|
|
--
|
2019-10-04 18:46:07 +02:00
|
|
|
data StringObject = Literal ByteString | Hexadecimal B16Int deriving Show
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
instance Output StringObject where
|
2019-09-30 14:13:12 +02:00
|
|
|
output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s))
|
2019-10-04 18:46:07 +02:00
|
|
|
output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n))
|
2019-05-17 16:14:06 +02:00
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
stringObject :: MonadParser m => m StringObject
|
2019-05-17 16:14:06 +02:00
|
|
|
stringObject =
|
2019-09-30 14:13:12 +02:00
|
|
|
Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
|
|
|
|
<|> Hexadecimal <$> (char '<' *> hexNumber <* char '>')
|
2019-05-24 10:48:09 +02:00
|
|
|
<?> "string object (literal or hexadecimal)"
|
2019-05-17 16:14:06 +02:00
|
|
|
where
|
2019-05-24 10:48:09 +02:00
|
|
|
literalString = many literalStringBlock
|
|
|
|
literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
|
|
|
|
normalChar = not . (`elem` ("\\()" :: String))
|
2019-05-17 16:14:06 +02:00
|
|
|
matchingParenthesis =
|
2019-09-27 18:16:12 +02:00
|
|
|
mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")"
|
2019-05-24 10:48:09 +02:00
|
|
|
escapedChar =
|
2019-10-04 18:46:07 +02:00
|
|
|
Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode)
|
2019-09-27 18:16:12 +02:00
|
|
|
octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3]
|
|
|
|
|
2019-10-04 18:46:07 +02:00
|
|
|
toByteString :: StringObject -> ByteString
|
|
|
|
toByteString (Hexadecimal h) = b16ToBytes h
|
|
|
|
toByteString (Literal s) = unescape s
|
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Name
|
|
|
|
--
|
|
|
|
newtype Name = Name String deriving (Eq, Ord, Show)
|
|
|
|
|
|
|
|
instance Output Name where
|
2019-09-20 22:39:14 +02:00
|
|
|
output (Name n) = Output.string ('/':n)
|
2019-05-17 16:14:06 +02:00
|
|
|
|
2019-09-24 18:36:17 +02:00
|
|
|
name :: MonadParser m => m Name
|
2019-09-27 18:16:12 +02:00
|
|
|
name = Name . Char8.unpack <$> (char '/' *> takeAll regular) <?> "name"
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Array
|
|
|
|
--
|
2019-09-25 18:42:34 +02:00
|
|
|
array :: MonadParser m => m [DirectObject]
|
2019-05-24 10:48:09 +02:00
|
|
|
array =
|
|
|
|
char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Dictionary
|
|
|
|
--
|
|
|
|
type Dictionary = Map Name DirectObject
|
|
|
|
|
|
|
|
instance Output Dictionary where
|
2020-02-11 08:29:08 +01:00
|
|
|
output aDictionary =
|
2019-05-16 17:04:45 +02:00
|
|
|
"<<" `mappend` keyValues `mappend` ">>"
|
|
|
|
where
|
2020-02-11 08:29:08 +01:00
|
|
|
keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary
|
2019-05-17 16:14:06 +02:00
|
|
|
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
2020-02-11 17:35:35 +01:00
|
|
|
outputKeyVal (key, val) = mconcat [output key, " ", output val]
|
2019-05-16 17:04:45 +02:00
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
dictionary :: MonadParser m => m Dictionary
|
2019-05-17 16:14:06 +02:00
|
|
|
dictionary =
|
2019-05-24 10:48:09 +02:00
|
|
|
string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
|
2019-05-17 16:14:06 +02:00
|
|
|
where
|
|
|
|
keyVal = (,) <$> name <* blank <*> directObject
|
2019-05-24 10:48:09 +02:00
|
|
|
keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Null
|
|
|
|
--
|
2019-09-25 18:42:34 +02:00
|
|
|
nullObject :: MonadParser m => m ()
|
2019-05-24 10:48:09 +02:00
|
|
|
nullObject = string "null" *> return () <?> "null object"
|
2019-05-16 17:04:45 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Reference
|
|
|
|
--
|
|
|
|
data IndirectObjCoordinates = IndirectObjCoordinates {
|
2019-09-20 22:39:14 +02:00
|
|
|
objectId :: ObjectId
|
2019-05-17 16:14:06 +02:00
|
|
|
, versionNumber :: Int
|
|
|
|
} deriving Show
|
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
reference :: MonadParser m => m IndirectObjCoordinates
|
2019-09-20 22:39:14 +02:00
|
|
|
reference = IndirectObjCoordinates
|
|
|
|
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- DirectObject
|
|
|
|
--
|
2019-05-13 18:22:05 +02:00
|
|
|
data DirectObject =
|
|
|
|
Boolean Bool
|
2019-05-17 16:14:06 +02:00
|
|
|
| NumberObject Number
|
|
|
|
| StringObject StringObject
|
|
|
|
| NameObject Name
|
2019-05-13 18:22:05 +02:00
|
|
|
| Array [DirectObject]
|
|
|
|
| Dictionary Dictionary
|
|
|
|
| Null
|
2019-05-14 18:42:11 +02:00
|
|
|
| Reference IndirectObjCoordinates
|
2019-05-13 18:22:05 +02:00
|
|
|
deriving Show
|
|
|
|
|
2019-05-16 17:04:45 +02:00
|
|
|
instance Output DirectObject where
|
|
|
|
output (Boolean b) = output b
|
2019-05-17 16:14:06 +02:00
|
|
|
output (NumberObject n) = output n
|
|
|
|
output (StringObject s) = output s
|
|
|
|
output (NameObject n) = output n
|
2020-02-11 17:35:35 +01:00
|
|
|
output (Array a) = mconcat ["[", join " " a, "]"]
|
2019-05-16 17:04:45 +02:00
|
|
|
output (Dictionary d) = output d
|
|
|
|
output (Null) = "null"
|
|
|
|
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
2019-09-20 22:39:14 +02:00
|
|
|
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
|
2019-05-16 17:04:45 +02:00
|
|
|
|
2019-09-25 18:42:34 +02:00
|
|
|
directObject :: MonadParser m => m DirectObject
|
2020-02-12 17:34:27 +01:00
|
|
|
directObject = (peek >>= dispatch) <?> "direct object"
|
|
|
|
where
|
|
|
|
dispatch 't' = Boolean <$> boolean
|
|
|
|
dispatch 'f' = Boolean <$> boolean
|
|
|
|
dispatch '(' = StringObject <$> stringObject
|
|
|
|
dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary
|
|
|
|
dispatch '/' = NameObject <$> name
|
|
|
|
dispatch '[' = Array <$> array
|
|
|
|
dispatch 'n' = nullObject *> return Null
|
|
|
|
dispatch _ =
|
|
|
|
Reference <$> reference {- defined before Number because Number is a prefix of it -}
|
|
|
|
<|> NumberObject <$> number
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Object
|
|
|
|
--
|
2019-05-13 18:22:05 +02:00
|
|
|
data Object =
|
|
|
|
Direct DirectObject
|
|
|
|
| Stream {
|
|
|
|
header :: Dictionary
|
|
|
|
, streamContent :: ByteString
|
|
|
|
}
|
|
|
|
deriving Show
|
|
|
|
|
2019-05-16 17:04:45 +02:00
|
|
|
instance Output Object where
|
|
|
|
output (Direct d) = output d
|
2020-02-11 17:35:35 +01:00
|
|
|
output (Stream {header, streamContent}) = mconcat [
|
2019-09-20 22:39:14 +02:00
|
|
|
output header, newLine
|
|
|
|
, Output.line "stream"
|
|
|
|
, byteString streamContent
|
|
|
|
, "endstream"
|
|
|
|
]
|
2019-05-16 17:04:45 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Occurrence
|
|
|
|
--
|
2019-05-14 18:42:11 +02:00
|
|
|
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
|
|
|
|
outputOccurrence _ (Comment c) = Output.line c
|
2019-05-16 22:41:14 +02:00
|
|
|
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
2020-02-11 17:35:35 +01:00
|
|
|
saveOffset (Object objectId) >> mconcat [
|
2019-09-20 22:39:14 +02:00
|
|
|
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
|
|
|
|
, output (objects ! objectId), newLine
|
|
|
|
, Output.line "endobj"
|
|
|
|
]
|
2019-05-16 22:41:14 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
-------------------------------------
|
|
|
|
-- XREF TABLE
|
|
|
|
-------------------------------------
|
|
|
|
|
|
|
|
--
|
|
|
|
-- XRefEntry
|
|
|
|
--
|
2019-05-13 18:22:05 +02:00
|
|
|
data XRefEntry = InUse {
|
2019-09-20 22:39:14 +02:00
|
|
|
offset :: Offset
|
2019-05-13 18:22:05 +02:00
|
|
|
, generation :: Int
|
|
|
|
} | Free {
|
2019-09-20 22:39:14 +02:00
|
|
|
nextFree :: ObjectId
|
2019-05-13 18:22:05 +02:00
|
|
|
, generation :: Int
|
|
|
|
} deriving Show
|
|
|
|
|
2019-05-16 17:04:45 +02:00
|
|
|
instance Output XRefEntry where
|
|
|
|
output (InUse {offset, generation}) =
|
2019-09-20 22:39:14 +02:00
|
|
|
Output.line (printf "%010d %05d n " (getOffset offset) generation)
|
2019-05-16 17:04:45 +02:00
|
|
|
output (Free {nextFree, generation}) =
|
2019-09-20 22:39:14 +02:00
|
|
|
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
|
2019-05-16 17:04:45 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
entry :: Parser u XRefEntry
|
|
|
|
entry = do
|
|
|
|
(big, small) <- (,) <$> integer <*> integer
|
2019-05-24 10:48:09 +02:00
|
|
|
(inUse big small <|> free big small <?> "XRef entry") <* blank
|
2019-05-17 16:14:06 +02:00
|
|
|
where
|
2019-05-24 10:48:09 +02:00
|
|
|
inUse :: Int -> Int -> Parser u XRefEntry
|
2019-09-20 22:39:14 +02:00
|
|
|
inUse big generation =
|
|
|
|
char 'n' *> return (InUse {offset = Offset big, generation})
|
2019-05-24 10:48:09 +02:00
|
|
|
free :: Int -> Int -> Parser u XRefEntry
|
2019-09-20 22:39:14 +02:00
|
|
|
free big generation =
|
|
|
|
char 'f' *> return (Free {nextFree = ObjectId big, generation})
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- XRefSubSection
|
|
|
|
--
|
2019-05-13 18:22:05 +02:00
|
|
|
data XRefSubSection = XRefSubSection {
|
2019-09-20 22:39:14 +02:00
|
|
|
firstObjectId :: ObjectId
|
|
|
|
, entries :: [XRefEntry]
|
2019-05-13 18:22:05 +02:00
|
|
|
} deriving Show
|
|
|
|
|
2019-05-16 17:04:45 +02:00
|
|
|
instance Output XRefSubSection where
|
2019-09-20 22:39:14 +02:00
|
|
|
output (XRefSubSection {firstObjectId, entries}) =
|
|
|
|
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
|
|
|
|
`mappend` output entries
|
|
|
|
|
|
|
|
xRefSubSection :: Parser u XRefSubSection
|
|
|
|
xRefSubSection = do
|
|
|
|
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
|
|
|
entries <- count entriesNumber entry
|
|
|
|
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
|
2019-05-16 17:04:45 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
type XRefSection = Map ObjectId XRefEntry
|
2019-05-17 16:14:06 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
instance Output XRefSection where
|
|
|
|
output = output . sections
|
|
|
|
where
|
|
|
|
sections tmp =
|
|
|
|
case Map.minViewWithKey tmp of
|
|
|
|
Nothing -> []
|
|
|
|
Just ((objectId@(ObjectId value), firstEntry), rest) ->
|
|
|
|
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
|
|
|
|
subSection : sections sndRest
|
|
|
|
section firstObjectId stack nextValue tmp =
|
|
|
|
let nextId = ObjectId nextValue in
|
|
|
|
case Map.lookup nextId tmp of
|
|
|
|
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
|
|
|
|
Just nextEntry ->
|
|
|
|
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
|
|
|
|
|
|
|
|
|
|
|
|
xRefSection :: Parser u XRefSection
|
|
|
|
xRefSection = foldr addSubsection Map.empty <$>
|
2019-09-22 01:37:28 +02:00
|
|
|
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
|
2019-09-20 22:39:14 +02:00
|
|
|
where
|
|
|
|
addSubsection (XRefSubSection {firstObjectId, entries}) =
|
|
|
|
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
|
2019-05-14 18:42:11 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Structure
|
|
|
|
--
|
2019-05-18 09:01:13 +02:00
|
|
|
data InputStructure = InputStructure {
|
2019-05-24 10:48:09 +02:00
|
|
|
startOffset :: Int
|
2019-05-18 09:01:13 +02:00
|
|
|
, inputStructure :: Structure
|
|
|
|
}
|
|
|
|
|
|
|
|
data Structure = Structure {
|
2019-09-20 22:39:14 +02:00
|
|
|
xRef :: XRefSection
|
2019-05-18 09:01:13 +02:00
|
|
|
, trailer :: Dictionary
|
2019-05-13 18:22:05 +02:00
|
|
|
} deriving Show
|
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
structure :: Parser u Structure
|
|
|
|
structure =
|
2019-05-18 09:01:13 +02:00
|
|
|
Structure
|
2019-09-20 22:39:14 +02:00
|
|
|
<$> xRefSection
|
2019-05-31 15:08:54 +02:00
|
|
|
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
|
2019-05-17 16:14:06 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
|
|
|
|
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
|
2019-05-18 09:01:13 +02:00
|
|
|
where
|
2019-09-20 22:39:14 +02:00
|
|
|
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
|
|
|
|
updateEntry _ e = e
|
2019-05-17 16:14:06 +02:00
|
|
|
|
|
|
|
--
|
|
|
|
-- Flow
|
|
|
|
--
|
|
|
|
data Flow = Flow {
|
|
|
|
occurrencesStack :: [Occurrence]
|
2019-09-20 22:39:14 +02:00
|
|
|
, tmpObjects :: IndexedObjects
|
2019-05-17 16:14:06 +02:00
|
|
|
} deriving Show
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
--
|
|
|
|
-- Content
|
|
|
|
--
|
|
|
|
data Content = Content {
|
|
|
|
occurrences :: [Occurrence]
|
2019-09-20 22:39:14 +02:00
|
|
|
, objects :: IndexedObjects
|
2019-05-18 09:01:13 +02:00
|
|
|
, docStructure :: Structure
|
2019-05-17 16:14:06 +02:00
|
|
|
} deriving Show
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-09-20 22:39:14 +02:00
|
|
|
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
|
2019-05-17 16:14:06 +02:00
|
|
|
outputBody (occurrences, objects) =
|
|
|
|
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
2019-05-13 18:22:05 +02:00
|
|
|
|
2019-05-17 16:14:06 +02:00
|
|
|
instance Output Content where
|
2019-05-18 09:01:13 +02:00
|
|
|
output (Content {occurrences, objects, docStructure}) =
|
2019-09-20 22:39:14 +02:00
|
|
|
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
|
2020-02-11 17:35:35 +01:00
|
|
|
>>= \(body, (xref, startXRef)) -> mconcat [
|
2019-09-20 22:39:14 +02:00
|
|
|
body
|
|
|
|
, Output.line "xref"
|
|
|
|
, output xref
|
|
|
|
, Output.line "trailer"
|
|
|
|
, output trailer, newLine
|
|
|
|
, Output.line "startxref"
|
|
|
|
, Output.line (printf "%d" (getOffset startXRef))
|
|
|
|
, byteString eofMarker
|
|
|
|
]
|
2019-05-18 09:01:13 +02:00
|
|
|
where
|
2019-09-20 22:39:14 +02:00
|
|
|
Structure {xRef, trailer} = docStructure
|