2019-05-13 18:22:05 +02:00
|
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
|
|
|
|
module PDF.Object (
|
|
|
|
|
Content(..)
|
|
|
|
|
, DirectObject(..)
|
|
|
|
|
, EOLStyle(..)
|
2019-05-14 18:42:11 +02:00
|
|
|
|
, IndirectObjCoordinates(..)
|
2019-05-13 18:22:05 +02:00
|
|
|
|
, Object(..)
|
|
|
|
|
, Occurrence(..)
|
2019-05-15 09:04:17 +02:00
|
|
|
|
, Parser
|
2019-05-13 18:22:05 +02:00
|
|
|
|
, XRefEntry(..)
|
2019-05-14 18:42:11 +02:00
|
|
|
|
, XRefSection
|
2019-05-13 18:22:05 +02:00
|
|
|
|
, XRefSubSection(..)
|
|
|
|
|
, content
|
2019-05-14 18:42:11 +02:00
|
|
|
|
, dictionary
|
|
|
|
|
, directObject
|
2019-05-13 18:22:05 +02:00
|
|
|
|
, eol
|
|
|
|
|
, eolCharset
|
2019-05-14 18:42:11 +02:00
|
|
|
|
, integer
|
|
|
|
|
, line
|
2019-05-13 18:22:05 +02:00
|
|
|
|
) where
|
|
|
|
|
|
|
|
|
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
|
|
|
|
import Data.Int (Int64)
|
|
|
|
|
import Data.Map (Map)
|
2019-05-14 18:42:11 +02:00
|
|
|
|
import qualified Data.Map as Map (empty, fromList)
|
2019-05-13 18:22:05 +02:00
|
|
|
|
import Text.Parsec
|
2019-05-14 18:42:11 +02:00
|
|
|
|
--import Text.Parsec.ByteString.Lazy (Parser)
|
|
|
|
|
|
|
|
|
|
type Parser u = Parsec ByteString u
|
2019-05-13 18:22:05 +02:00
|
|
|
|
|
|
|
|
|
data EOLStyle = CR | LF | CRLF
|
|
|
|
|
|
|
|
|
|
type Dictionary = Map String DirectObject
|
|
|
|
|
|
|
|
|
|
data StringObj = Literal String | Hexadecimal String deriving Show
|
|
|
|
|
|
|
|
|
|
data DirectObject =
|
|
|
|
|
Boolean Bool
|
|
|
|
|
| Number Float
|
|
|
|
|
| String StringObj
|
|
|
|
|
| Name String
|
|
|
|
|
| Array [DirectObject]
|
|
|
|
|
| Dictionary Dictionary
|
|
|
|
|
| Null
|
2019-05-14 18:42:11 +02:00
|
|
|
|
| Reference IndirectObjCoordinates
|
2019-05-13 18:22:05 +02:00
|
|
|
|
deriving Show
|
|
|
|
|
|
|
|
|
|
data Object =
|
|
|
|
|
Direct DirectObject
|
|
|
|
|
| Stream {
|
|
|
|
|
header :: Dictionary
|
|
|
|
|
, streamContent :: ByteString
|
|
|
|
|
}
|
|
|
|
|
deriving Show
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
data IndirectObjCoordinates = IndirectObjCoordinates {
|
|
|
|
|
objectId :: Int
|
|
|
|
|
, versionNumber :: Int
|
|
|
|
|
} deriving Show
|
|
|
|
|
|
|
|
|
|
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
2019-05-13 18:22:05 +02:00
|
|
|
|
|
|
|
|
|
data XRefEntry = InUse {
|
|
|
|
|
offset :: Int64
|
|
|
|
|
, generation :: Int
|
|
|
|
|
} | Free {
|
|
|
|
|
nextFree :: Int64
|
|
|
|
|
, generation :: Int
|
|
|
|
|
} deriving Show
|
|
|
|
|
|
|
|
|
|
data XRefSubSection = XRefSubSection {
|
|
|
|
|
firstObjectId :: Int
|
2019-05-14 18:42:11 +02:00
|
|
|
|
, entriesNumber :: Int
|
2019-05-13 18:22:05 +02:00
|
|
|
|
, entries :: Map Int XRefEntry
|
|
|
|
|
} deriving Show
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
type XRefSection = [XRefSubSection]
|
|
|
|
|
|
2019-05-13 18:22:05 +02:00
|
|
|
|
data Content = Content {
|
|
|
|
|
body :: [Occurrence]
|
2019-05-14 18:42:11 +02:00
|
|
|
|
, objects :: Map Int Object
|
|
|
|
|
, xrefSection :: XRefSection
|
2019-05-13 18:22:05 +02:00
|
|
|
|
, trailer :: Dictionary
|
|
|
|
|
, startXrefPosition :: Int64
|
|
|
|
|
} deriving Show
|
|
|
|
|
|
|
|
|
|
eolCharset :: String
|
|
|
|
|
eolCharset = "\r\n"
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
eol :: Parser u EOLStyle
|
2019-05-13 18:22:05 +02:00
|
|
|
|
eol =
|
|
|
|
|
try (string "\r\n" >> return CRLF)
|
|
|
|
|
<|> (string "\r" >> return CR)
|
|
|
|
|
<|> (string "\n" >> return LF)
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
line :: String -> Parser u ()
|
2019-05-13 18:22:05 +02:00
|
|
|
|
line l = string l *> eol *> return ()
|
|
|
|
|
|
|
|
|
|
whiteSpaceCharset :: String
|
|
|
|
|
whiteSpaceCharset = "\0\t\12 "
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
whiteSpace :: Parser u ()
|
2019-05-13 18:22:05 +02:00
|
|
|
|
whiteSpace = oneOf whiteSpaceCharset *> return () <|> eol *> return ()
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
blank :: Parser u ()
|
2019-05-13 18:22:05 +02:00
|
|
|
|
blank = skipMany whiteSpace
|
|
|
|
|
|
|
|
|
|
delimiterCharset :: String
|
|
|
|
|
delimiterCharset = "()<>[]{}/%"
|
|
|
|
|
|
|
|
|
|
{-
|
2019-05-14 18:42:11 +02:00
|
|
|
|
delimiter :: Parser u Char
|
2019-05-13 18:22:05 +02:00
|
|
|
|
delimiter = oneOf delimiterCharset
|
|
|
|
|
-}
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
regular :: Parser u Char
|
2019-05-13 18:22:05 +02:00
|
|
|
|
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
integer :: (Read a, Num a) => Parser u a
|
2019-05-13 18:22:05 +02:00
|
|
|
|
integer = read <$> many1 digit <* whiteSpace
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
directObject :: Parser u DirectObject
|
2019-05-13 18:22:05 +02:00
|
|
|
|
directObject =
|
|
|
|
|
Boolean <$> try boolean
|
|
|
|
|
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
|
|
|
|
|
<|> Number <$> try number
|
|
|
|
|
<|> String <$> try stringObj
|
|
|
|
|
<|> Name <$> try name
|
|
|
|
|
<|> Array <$> try array
|
|
|
|
|
<|> Dictionary <$> try dictionary
|
|
|
|
|
<|> const Null <$> try nullObject
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
boolean :: Parser u Bool
|
2019-05-13 18:22:05 +02:00
|
|
|
|
boolean = (string "true" *> return True) <|> (string "false" *> return False)
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
number :: Parser u Float
|
2019-05-13 18:22:05 +02:00
|
|
|
|
number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
|
|
|
|
|
where
|
|
|
|
|
sign = string "-" <|> option "" (char '+' >> return "")
|
|
|
|
|
integerPart = mappend <$> many1 digit <*> option "" floatPart
|
|
|
|
|
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
stringObj :: Parser u StringObj
|
2019-05-13 18:22:05 +02:00
|
|
|
|
stringObj =
|
|
|
|
|
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
|
|
|
|
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
|
|
|
|
where
|
|
|
|
|
literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar
|
|
|
|
|
matchingParenthesis =
|
|
|
|
|
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
|
|
|
|
|
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
|
|
|
|
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
name :: Parser u String
|
2019-05-13 18:22:05 +02:00
|
|
|
|
name = char '/' *> many regular
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
array :: Parser u [DirectObject]
|
2019-05-13 18:22:05 +02:00
|
|
|
|
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
dictionary :: Parser u Dictionary
|
2019-05-13 18:22:05 +02:00
|
|
|
|
dictionary =
|
|
|
|
|
try (string "<<" *> blank *> keyValPairs <* string ">>")
|
|
|
|
|
where
|
|
|
|
|
keyVal = (,) <$> name <* blank <*> directObject
|
|
|
|
|
keyValPairs = Map.fromList <$> keyVal `endBy` blank
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
nullObject :: Parser u ()
|
2019-05-13 18:22:05 +02:00
|
|
|
|
nullObject = string "null" *> return ()
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
reference :: Parser u IndirectObjCoordinates
|
|
|
|
|
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
|
2019-05-13 18:22:05 +02:00
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
entry :: Parser u XRefEntry
|
2019-05-13 18:22:05 +02:00
|
|
|
|
entry = do
|
|
|
|
|
(big, small) <- (,) <$> integer <*> integer
|
|
|
|
|
(inUse big small <|> free big small) <* blank
|
|
|
|
|
where
|
2019-05-14 18:42:11 +02:00
|
|
|
|
inUse :: Int64 -> Int -> Parser u XRefEntry
|
2019-05-13 18:22:05 +02:00
|
|
|
|
inUse offset generation = char 'n' *> return (InUse {offset, generation})
|
2019-05-14 18:42:11 +02:00
|
|
|
|
free :: Int64 -> Int -> Parser u XRefEntry
|
2019-05-13 18:22:05 +02:00
|
|
|
|
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
xrefSubSection :: Parser u XRefSubSection
|
2019-05-13 18:22:05 +02:00
|
|
|
|
xrefSubSection = do
|
|
|
|
|
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
|
|
|
|
|
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
2019-05-14 18:42:11 +02:00
|
|
|
|
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
2019-05-13 18:22:05 +02:00
|
|
|
|
|
2019-05-14 18:42:11 +02:00
|
|
|
|
content :: Parser u Content
|
2019-05-13 18:22:05 +02:00
|
|
|
|
content =
|
2019-05-14 18:42:11 +02:00
|
|
|
|
Content [] Map.empty
|
2019-05-13 18:22:05 +02:00
|
|
|
|
<$> (line "xref" *> xrefSubSection `sepBy` eol)
|
|
|
|
|
<*> (line "trailer" *> dictionary <* eol)
|
|
|
|
|
<*> (line "startxref" *> integer)
|