{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Content(..) , DirectObject(..) , Flow(..) , IndirectObjCoordinates(..) , InputStructure(..) , Name(..) , Number(..) , Object(..) , Occurrence(..) , Structure(..) , XRefEntry(..) , XRefSection , blank , dictionary , directObject , eofMarker , integer , line , magicNumber , structure ) where import Control.Applicative ((<|>)) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS ( concat, cons, pack, singleton, unpack ) import Data.Map (Map, (!), mapWithKey) import qualified Data.Map as Map ( delete, empty, fromList, lookup, minViewWithKey, toList, union ) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (concat, line, string) import PDF.Output ( OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) , byteString, getObjectId, getOffset, getOffsets, join, newLine , saveOffset ) import PDF.Parser ( Parser, () , char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option , sepBy, string, takeAll, takeAll1 ) import Text.Printf (printf) line :: String -> Parser u () line l = (string (BS.pack l) *> EOL.parser *> return ()) printf "line «%s»" l magicNumber :: ByteString magicNumber = "%PDF-" eofMarker :: ByteString eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " blank :: Parser u () blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return () delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Char -> Bool regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset)) integer :: (Read a, Num a) => Parser u a integer = read . BS.unpack <$> decNumber <* blank "decimal integer" ------------------------------------- -- OBJECTS ------------------------------------- type IndexedObjects = Map ObjectId Object -- -- Boolean -- boolean :: Parser u Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) "boolean" -- -- Number -- newtype Number = Number Float deriving Show instance Output Number where output (Number f) = Output.string $ case properFraction f of (n, 0) -> printf "%d" (n :: Int) _ -> printf "%f" f number :: Parser u Number number = Number . read . BS.unpack <$> (mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart)) "number" where sign = string "-" <|> option "" (char '+' >> return "") integerPart = mappend <$> decNumber <*> option "" floatPart floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber) -- -- StringObject -- data StringObject = Literal String | Hexadecimal String deriving Show instance Output StringObject where output (Literal s) = Output.string (printf "(%s)" s) output (Hexadecimal s) = Output.string (printf "<%s>" s) stringObject :: Parser u StringObject stringObject = Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')') <|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>') "string object (literal or hexadecimal)" where literalString = many literalStringBlock literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar normalChar = not . (`elem` ("\\()" :: String)) matchingParenthesis = mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode) octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3] -- -- Name -- newtype Name = Name String deriving (Eq, Ord, Show) instance Output Name where output (Name n) = Output.string ('/':n) name :: Parser u Name name = Name . BS.unpack <$> (char '/' *> takeAll regular) "name" -- -- Array -- array :: Parser u [DirectObject] array = char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' "array" -- -- Dictionary -- type Dictionary = Map Name DirectObject instance Output Dictionary where output dict = "<<" `mappend` keyValues `mappend` ">>" where keyValues = join " " $ outputKeyVal <$> Map.toList dict outputKeyVal :: (Name, DirectObject) -> OBuilder outputKeyVal (key, val) = Output.concat [output key, " ", output val] dictionary :: Parser u Dictionary dictionary = string "<<" *> blank *> keyValPairs <* string ">>" "dictionary" where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank -- -- Null -- nullObject :: Parser u () nullObject = string "null" *> return () "null object" -- -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: ObjectId , versionNumber :: Int } deriving Show reference :: Parser u IndirectObjCoordinates reference = IndirectObjCoordinates <$> (fmap ObjectId integer) <*> integer <* char 'R' "reference to an object" -- -- DirectObject -- data DirectObject = Boolean Bool | NumberObject Number | StringObject StringObject | NameObject Name | Array [DirectObject] | Dictionary Dictionary | Null | Reference IndirectObjCoordinates deriving Show instance Output DirectObject where output (Boolean b) = output b output (NumberObject n) = output n output (StringObject s) = output s output (NameObject n) = output n output (Array a) = Output.concat ["[", join " " a, "]"] output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) directObject :: Parser u DirectObject directObject = Boolean <$> boolean <|> Reference <$> reference {- defined before Number because Number is a prefix of it -} <|> NumberObject <$> number <|> StringObject <$> stringObject <|> NameObject <$> name <|> Array <$> array <|> Dictionary <$> dictionary <|> const Null <$> nullObject "direct object" -- -- Object -- data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show instance Output Object where output (Direct d) = output d output (Stream {header, streamContent}) = Output.concat [ output header, newLine , Output.line "stream" , byteString streamContent , "endstream" ] -- -- Occurrence -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = saveOffset (Object objectId) >> Output.concat [ Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) , output (objects ! objectId), newLine , Output.line "endobj" ] ------------------------------------- -- XREF TABLE ------------------------------------- -- -- XRefEntry -- data XRefEntry = InUse { offset :: Offset , generation :: Int } | Free { nextFree :: ObjectId , generation :: Int } deriving Show instance Output XRefEntry where output (InUse {offset, generation}) = Output.line (printf "%010d %05d n " (getOffset offset) generation) output (Free {nextFree, generation}) = Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small "XRef entry") <* blank where inUse :: Int -> Int -> Parser u XRefEntry inUse big generation = char 'n' *> return (InUse {offset = Offset big, generation}) free :: Int -> Int -> Parser u XRefEntry free big generation = char 'f' *> return (Free {nextFree = ObjectId big, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { firstObjectId :: ObjectId , entries :: [XRefEntry] } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entries}) = Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) `mappend` output entries xRefSubSection :: Parser u XRefSubSection xRefSubSection = do (firstId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" entries <- count entriesNumber entry return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} type XRefSection = Map ObjectId XRefEntry instance Output XRefSection where output = output . sections where sections tmp = case Map.minViewWithKey tmp of Nothing -> [] Just ((objectId@(ObjectId value), firstEntry), rest) -> let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in subSection : sections sndRest section firstObjectId stack nextValue tmp = let nextId = ObjectId nextValue in case Map.lookup nextId tmp of Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Just nextEntry -> section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp) xRefSection :: Parser u XRefSection xRefSection = foldr addSubsection Map.empty <$> (line "xref" *> xRefSubSection `sepBy` EOL.parser) where addSubsection (XRefSubSection {firstObjectId, entries}) = Map.union . Map.fromList $ zip ([firstObjectId..]) entries -- -- Structure -- data InputStructure = InputStructure { startOffset :: Int , inputStructure :: Structure } data Structure = Structure { xRef :: XRefSection , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = Structure <$> xRefSection <*> (string "trailer" *> blank *> dictionary <* EOL.parser) updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset) updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef) where updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)} updateEntry _ e = e -- -- Flow -- data Flow = Flow { occurrencesStack :: [Occurrence] , tmpObjects :: IndexedObjects } deriving Show -- -- Content -- data Content = Content { occurrences :: [Occurrence] , objects :: IndexedObjects , docStructure :: Structure } deriving Show outputBody :: ([Occurrence], IndexedObjects) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef instance Output Content where output (Content {occurrences, objects, docStructure}) = fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects)) >>= \(body, (xref, startXRef)) -> Output.concat [ body , Output.line "xref" , output xref , Output.line "trailer" , output trailer, newLine , Output.line "startxref" , Output.line (printf "%d" (getOffset startXRef)) , byteString eofMarker ] where Structure {xRef, trailer} = docStructure