{-# LANGUAGE NamedFieldPuns #-} module PDF.Object ( Content(..) , DirectObject(..) , EOLStyle(..) , IndirectObjCoordinates(..) , Object(..) , Occurrence(..) , XRefEntry(..) , XRefSection , XRefSubSection(..) , content , dictionary , directObject , eol , eolCharset , integer , line ) where import Data.ByteString.Lazy.Char8 (ByteString) import Data.Int (Int64) import Data.Map (Map) import qualified Data.Map as Map (empty, fromList) import Text.Parsec --import Text.Parsec.ByteString.Lazy (Parser) type Parser u = Parsec ByteString u data EOLStyle = CR | LF | CRLF type Dictionary = Map String DirectObject data StringObj = Literal String | Hexadecimal String deriving Show data DirectObject = Boolean Bool | Number Float | String StringObj | Name String | Array [DirectObject] | Dictionary Dictionary | Null | Reference IndirectObjCoordinates deriving Show data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: Int , versionNumber :: Int } deriving Show data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show data XRefEntry = InUse { offset :: Int64 , generation :: Int } | Free { nextFree :: Int64 , generation :: Int } deriving Show data XRefSubSection = XRefSubSection { firstObjectId :: Int , entriesNumber :: Int , entries :: Map Int XRefEntry } deriving Show type XRefSection = [XRefSubSection] data Content = Content { body :: [Occurrence] , objects :: Map Int Object , xrefSection :: XRefSection , trailer :: Dictionary , startXrefPosition :: Int64 } deriving Show eolCharset :: String eolCharset = "\r\n" eol :: Parser u EOLStyle eol = try (string "\r\n" >> return CRLF) <|> (string "\r" >> return CR) <|> (string "\n" >> return LF) line :: String -> Parser u () line l = string l *> eol *> return () whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " whiteSpace :: Parser u () whiteSpace = oneOf whiteSpaceCharset *> return () <|> eol *> return () blank :: Parser u () blank = skipMany whiteSpace delimiterCharset :: String delimiterCharset = "()<>[]{}/%" {- delimiter :: Parser u Char delimiter = oneOf delimiterCharset -} regular :: Parser u Char regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset integer :: (Read a, Num a) => Parser u a integer = read <$> many1 digit <* whiteSpace directObject :: Parser u DirectObject directObject = Boolean <$> try boolean <|> Reference <$> try reference {- defined before Number because Number is a prefix of it -} <|> Number <$> try number <|> String <$> try stringObj <|> Name <$> try name <|> Array <$> try array <|> Dictionary <$> try dictionary <|> const Null <$> try nullObject boolean :: Parser u Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) number :: Parser u Float number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart)) where sign = string "-" <|> option "" (char '+' >> return "") integerPart = mappend <$> many1 digit <*> option "" floatPart floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit) stringObj :: Parser u StringObj stringObj = Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')') <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>') where literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar matchingParenthesis = (++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode) octalCode = choice $ (\n -> count n octDigit) <$> [1..3] name :: Parser u String name = char '/' *> many regular array :: Parser u [DirectObject] array = char '[' *> blank *> directObject `endBy` blank <* char ']' dictionary :: Parser u Dictionary dictionary = try (string "<<" *> blank *> keyValPairs <* string ">>") where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `endBy` blank nullObject :: Parser u () nullObject = string "null" *> return () reference :: Parser u IndirectObjCoordinates reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small) <* blank where inUse :: Int64 -> Int -> Parser u XRefEntry inUse offset generation = char 'n' *> return (InUse {offset, generation}) free :: Int64 -> Int -> Parser u XRefEntry free nextFree generation = char 'f' *> return (Free {nextFree, generation}) xrefSubSection :: Parser u XRefSubSection xrefSubSection = do (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry return $ XRefSubSection {firstObjectId, entriesNumber, entries} content :: Parser u Content content = Content [] Map.empty <$> (line "xref" *> xrefSubSection `sepBy` eol) <*> (line "trailer" *> dictionary <* eol) <*> (line "startxref" *> integer)