{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Content(..) , DirectObject(..) , Flow(..) , IndirectObjCoordinates(..) , InputStructure(..) , Name(..) , Number(..) , Object(..) , Occurrence(..) , Parser , Structure(..) , XRefEntry(..) , XRefSection , XRefSubSection(..) , blank , dictionary , directObject , eofMarker , integer , line , magicNumber , structure ) where import Data.ByteString.Lazy.Char8 (ByteString) import Data.Int (Int64) import Data.Map (Map, (!), mapWithKey) import qualified Data.Map as Map (elems, fromList, toList) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (string) import PDF.Output ( OBuilder, Offset(..), Output(..) , byteString, getOffsets, join, newLine, nextLine, saveOffset ) import Text.Parsec import Text.Printf (printf) type Parser u = Parsec ByteString u line :: String -> Parser u () line l = string l *> EOL.parser *> return () magicNumber :: String magicNumber = "%PDF-" eofMarker :: ByteString eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " whiteSpace :: Parser u () whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return () blank :: Parser u () blank = skipMany whiteSpace delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Parser u Char regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset integer :: (Read a, Num a) => Parser u a integer = read <$> many1 digit <* whiteSpace ------------------------------------- -- OBJECTS ------------------------------------- -- -- Boolean -- boolean :: Parser u Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) -- -- Number -- newtype Number = Number Float deriving Show instance Output Number where output (Number f) = Output.string $ case properFraction f of (n, 0) -> printf "%d" (n :: Int) _ -> printf "%f" f number :: Parser u Number number = Number . read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart)) where sign = string "-" <|> option "" (char '+' >> return "") integerPart = mappend <$> many1 digit <*> option "" floatPart floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit) -- -- StringObject -- data StringObject = Literal String | Hexadecimal String deriving Show instance Output StringObject where output (Literal s) = Output.string (printf "(%s)" s) output (Hexadecimal s) = Output.string (printf "<%s>" s) stringObject :: Parser u StringObject stringObject = Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')') <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>') where literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar matchingParenthesis = (++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode) octalCode = choice $ (\n -> count n octDigit) <$> [1..3] -- -- Name -- newtype Name = Name String deriving (Eq, Ord, Show) instance Output Name where output (Name n) = "/" `mappend` Output.string n name :: Parser u Name name = Name <$> (char '/' *> many regular) -- -- Array -- array :: Parser u [DirectObject] array = char '[' *> blank *> directObject `endBy` blank <* char ']' -- -- Dictionary -- type Dictionary = Map Name DirectObject instance Output Dictionary where output dict = "<<" `mappend` keyValues `mappend` ">>" where keyValues = join " " $ outputKeyVal <$> Map.toList dict outputKeyVal :: (Name, DirectObject) -> OBuilder outputKeyVal (key, val) = output key `mappend` " " `mappend` output val dictionary :: Parser u Dictionary dictionary = try (string "<<" *> blank *> keyValPairs <* string ">>") where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `endBy` blank -- -- Null -- nullObject :: Parser u () nullObject = string "null" *> return () -- -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: Int , versionNumber :: Int } deriving Show reference :: Parser u IndirectObjCoordinates reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' -- -- DirectObject -- data DirectObject = Boolean Bool | NumberObject Number | StringObject StringObject | NameObject Name | Array [DirectObject] | Dictionary Dictionary | Null | Reference IndirectObjCoordinates deriving Show instance Output DirectObject where output (Boolean b) = output b output (NumberObject n) = output n output (StringObject s) = output s output (NameObject n) = output n output (Array a) = "[" `mappend` join " " a `mappend` "]" output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d R" objectId versionNumber) directObject :: Parser u DirectObject directObject = Boolean <$> try boolean <|> Reference <$> try reference {- defined before Number because Number is a prefix of it -} <|> NumberObject <$> try number <|> StringObject <$> try stringObject <|> NameObject <$> try name <|> Array <$> try array <|> Dictionary <$> try dictionary <|> const Null <$> try nullObject -- -- Object -- data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show instance Output Object where output (Direct d) = output d output (Stream {header, streamContent}) = output header `nextLine` "stream" `nextLine` byteString streamContent `mappend` "endstream" -- -- Occurrence -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show outputOccurrence :: Map Int Object -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.string (printf "%%%s" c) `mappend` newLine outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = saveOffset (ObjectId objectId) >> Output.string (printf "%d %d obj" objectId versionNumber) `nextLine` output (objects ! objectId) `nextLine` "endobj" `mappend` newLine ------------------------------------- -- XREF TABLE ------------------------------------- -- -- XRefEntry -- data XRefEntry = InUse { offset :: Int64 , generation :: Int } | Free { nextFree :: Int64 , generation :: Int } deriving Show instance Output XRefEntry where output (InUse {offset, generation}) = Output.string (printf "%010d %05d n " offset generation) `mappend` newLine output (Free {nextFree, generation}) = Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small) <* blank where inUse :: Int64 -> Int -> Parser u XRefEntry inUse offset generation = char 'n' *> return (InUse {offset, generation}) free :: Int64 -> Int -> Parser u XRefEntry free nextFree generation = char 'f' *> return (Free {nextFree, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { firstObjectId :: Int , entriesNumber :: Int , entries :: Map Int XRefEntry } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entriesNumber, entries}) = Output.string (printf "%d %d" firstObjectId entriesNumber) `nextLine` output (Map.elems entries) xrefSubSection :: Parser u XRefSubSection xrefSubSection = do (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry return $ XRefSubSection {firstObjectId, entriesNumber, entries} type XRefSection = [XRefSubSection] -- -- Structure -- data InputStructure = InputStructure { startOffset :: Int64 , inputStructure :: Structure } data Structure = Structure { xrefSection :: XRefSection , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = Structure <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) <*> (line "trailer" *> dictionary <* EOL.parser) updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64) updateXrefs xrefSection offsets = ( updateSubSection <$> xrefSection , offsets ! StartXRef ) where updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) = subSection {entries = mapWithKey (updateEntry firstObjectId) entries} updateEntry firstObjectId index e@(InUse {}) = e {offset = offsets ! (ObjectId $ firstObjectId + index)} updateEntry _ _ e = e -- -- Flow -- data Flow = Flow { occurrencesStack :: [Occurrence] , tmpObjects :: Map Int Object } deriving Show -- -- Content -- data Content = Content { occurrences :: [Occurrence] , objects :: Map Int Object , docStructure :: Structure } deriving Show outputBody :: ([Occurrence], Map Int Object) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef instance Output Content where output (Content {occurrences, objects, docStructure}) = fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects)) >>= \(body, (xref, startXRef)) -> body `mappend` "xref" `nextLine` output xref `mappend` "trailer" `nextLine` output trailer `nextLine` "startxref" `nextLine` (Output.string (printf "%d" startXRef)) `nextLine` byteString eofMarker where Structure {xrefSection, trailer} = docStructure