{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Content(..) , DirectObject(..) , IndirectObjCoordinates(..) , Object(..) , Occurrence(..) , Parser , XRefEntry(..) , XRefSection , XRefSubSection(..) , blank , content , dictionary , directObject , eofMarker , integer , line , magicNumber ) where import Data.ByteString.Lazy.Char8 (ByteString) import Data.Int (Int64) import Data.Map (Map, (!)) import qualified Data.Map as Map (empty, elems, fromList, toList) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (string) import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine) import Text.Parsec import Text.Printf (printf) type Parser u = Parsec ByteString u type Dictionary = Map String DirectObject instance Output (Map String DirectObject) where output dict = "<<" `mappend` keyValues `mappend` ">>" where keyValues = join " " $ outputKeyVal <$> Map.toList dict outputKeyVal (key, val) = Output.string (printf "/%s " key) `mappend` output val data StringObj = Literal String | Hexadecimal String deriving Show instance Output StringObj where output (Literal s) = Output.string (printf "(%s)" s) output (Hexadecimal s) = Output.string (printf "<%s>" s) data DirectObject = Boolean Bool | Number Float | String StringObj | Name String | Array [DirectObject] | Dictionary Dictionary | Null | Reference IndirectObjCoordinates deriving Show outputFloat :: Float -> OBuilder outputFloat f = Output.string $ case properFraction f of (n, 0) -> printf "%d" (n :: Int) _ -> printf "%f" f instance Output DirectObject where output (Boolean b) = output b output (Number n) = outputFloat n output (String s) = output s output (Name n) = "/" `mappend` Output.string n output (Array a) = "[" `mappend` join " " a `mappend` "]" output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d R" objectId versionNumber) data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show instance Output Object where output (Direct d) = output d output (Stream {header, streamContent}) = output header `nextLine` "stream" `nextLine` byteString streamContent `mappend` "endstream" data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: Int , versionNumber :: Int } deriving Show data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show outputOccurrence :: Map Int Object -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.string (printf "%%%s" c) `mappend` newLine outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d obj" objectId versionNumber) `nextLine` output (objects ! objectId) `nextLine` "endobj" `mappend` newLine data XRefEntry = InUse { offset :: Int64 , generation :: Int } | Free { nextFree :: Int64 , generation :: Int } deriving Show instance Output XRefEntry where output (InUse {offset, generation}) = Output.string (printf "%010d %05d n " offset generation) `mappend` newLine output (Free {nextFree, generation}) = Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine data XRefSubSection = XRefSubSection { firstObjectId :: Int , entriesNumber :: Int , entries :: Map Int XRefEntry } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entriesNumber, entries}) = Output.string (printf "%d %d" firstObjectId entriesNumber) `nextLine` output (Map.elems entries) type XRefSection = [XRefSubSection] data Content = Content { startOffset :: Int64 , body :: [Occurrence] , objects :: Map Int Object , xrefSection :: XRefSection , trailer :: Dictionary , startXrefPosition :: Int64 } deriving Show instance Output Content where output (Content {body, objects, trailer, xrefSection, startXrefPosition}) = output (outputOccurrence objects <$> body) `mappend` output xrefSection `mappend` "trailer" `nextLine` output trailer `nextLine` "startxref" `nextLine` Output.string (printf "%d" startXrefPosition) `nextLine` byteString eofMarker line :: String -> Parser u () line l = string l *> EOL.parser *> return () magicNumber :: String magicNumber = "%PDF-" eofMarker :: ByteString eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " whiteSpace :: Parser u () whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return () blank :: Parser u () blank = skipMany whiteSpace delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Parser u Char regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset integer :: (Read a, Num a) => Parser u a integer = read <$> many1 digit <* whiteSpace directObject :: Parser u DirectObject directObject = Boolean <$> try boolean <|> Reference <$> try reference {- defined before Number because Number is a prefix of it -} <|> Number <$> try number <|> String <$> try stringObj <|> Name <$> try name <|> Array <$> try array <|> Dictionary <$> try dictionary <|> const Null <$> try nullObject boolean :: Parser u Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) number :: Parser u Float number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart)) where sign = string "-" <|> option "" (char '+' >> return "") integerPart = mappend <$> many1 digit <*> option "" floatPart floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit) stringObj :: Parser u StringObj stringObj = Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')') <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>') where literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar matchingParenthesis = (++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode) octalCode = choice $ (\n -> count n octDigit) <$> [1..3] name :: Parser u String name = char '/' *> many regular array :: Parser u [DirectObject] array = char '[' *> blank *> directObject `endBy` blank <* char ']' dictionary :: Parser u Dictionary dictionary = try (string "<<" *> blank *> keyValPairs <* string ">>") where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `endBy` blank nullObject :: Parser u () nullObject = string "null" *> return () reference :: Parser u IndirectObjCoordinates reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small) <* blank where inUse :: Int64 -> Int -> Parser u XRefEntry inUse offset generation = char 'n' *> return (InUse {offset, generation}) free :: Int64 -> Int -> Parser u XRefEntry free nextFree generation = char 'f' *> return (Free {nextFree, generation}) xrefSubSection :: Parser u XRefSubSection xrefSubSection = do (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry return $ XRefSubSection {firstObjectId, entriesNumber, entries} content :: Parser u Content content = Content 0 [] Map.empty <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) <*> (line "trailer" *> dictionary <* EOL.parser) <*> (line "startxref" *> integer)