{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Content(..) , Dictionary , DirectObject(..) , Flow(..) , IndexedObjects , IndirectObjCoordinates(..) , InputStructure(..) , Name(..) , Number(..) , Object(..) , Occurrence(..) , StringObject(..) , Structure(..) , XRefEntry(..) , XRefSection , array , blank , dictionary , directObject , eofMarker , integer , line , magicNumber , name , number , regular , stringObject , structure , toByteString ) where import Control.Applicative ((<|>), many) import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy) import Data.ByteString (ByteString) import qualified Data.ByteString as BS (concat) import qualified Data.ByteString.Char8 as Char8 (cons, pack, singleton, unpack) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) import Data.Map (Map, (!), mapWithKey) import qualified Data.Map as Map ( delete, empty, fromList, lookup, minViewWithKey, toList, union ) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (line, string) import PDF.Output ( OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) , byteString, getObjectId, getOffset, getOffsets, join, newLine , saveOffset ) import PDF.Parser (MonadParser(..), Parser, (), octDigit, oneOf) import Text.Printf (printf) line :: MonadParser m => String -> m () line l = (string (Char8.pack l) *> blank *> return ()) printf "line «%s»" l magicNumber :: ByteString magicNumber = "%PDF-" eofMarker :: ByteString eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " blank :: MonadParser m => m () blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure () delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Char -> Bool regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset)) integer :: (Read a, Num a, MonadParser m) => m a integer = read . Char8.unpack <$> decNumber <* blank "decimal integer" ------------------------------------- -- OBJECTS ------------------------------------- type IndexedObjects = Map ObjectId Object -- -- Boolean -- boolean :: MonadParser m => m Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) "boolean" -- -- Number -- newtype Number = Number Double deriving (Eq, Show) instance Output Number where output (Number f) = Output.string $ case properFraction f of (n, 0) -> printf "%d" (n :: Int) _ -> printf "%f" f number :: MonadParser m => m Number number = Number . read . Char8.unpack <$> (mappend <$> sign <*> (integerPart <|> Char8.cons '0' <$> floatPart)) "number" where sign = string "-" <|> option "" (char '+' >> return "") integerPart = mappend <$> decNumber <*> option "" floatPart floatPart = Char8.cons <$> char '.' <*> (option "0" $ decNumber) -- -- StringObject -- data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show) instance Output StringObject where output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s)) output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n)) stringObject :: MonadParser m => m StringObject stringObject = Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')') <|> Hexadecimal <$> (char '<' *> hexNumber <* char '>') "string object (literal or hexadecimal)" where literalString = many literalStringBlock literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar normalChar = not . (`elem` ("\\()" :: String)) matchingParenthesis = mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode) octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3] toByteString :: StringObject -> ByteString toByteString (Hexadecimal h) = b16ToBytes h toByteString (Literal s) = unescape s -- -- Name -- newtype Name = Name String deriving (Eq, Ord, Show) instance Output Name where output (Name n) = Output.string ('/':n) name :: MonadParser m => m Name name = Name . Char8.unpack <$> (char '/' *> takeAll regular) "name" -- -- Array -- array :: MonadParser m => m [DirectObject] array = char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' "array" -- -- Dictionary -- type Dictionary = Map Name DirectObject instance Output Dictionary where output aDictionary = "<<" `mappend` keyValues `mappend` ">>" where keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary outputKeyVal :: (Name, DirectObject) -> OBuilder outputKeyVal (key, val) = mconcat [output key, " ", output val] dictionary :: MonadParser m => m Dictionary dictionary = string "<<" *> blank *> keyValPairs <* string ">>" "dictionary" where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank -- -- Null -- nullObject :: MonadParser m => m () nullObject = string "null" *> return () "null object" -- -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: ObjectId , versionNumber :: Int } deriving Show reference :: MonadParser m => m IndirectObjCoordinates reference = IndirectObjCoordinates <$> (fmap ObjectId integer) <*> integer <* char 'R' "reference to an object" -- -- DirectObject -- data DirectObject = Boolean Bool | NumberObject Number | StringObject StringObject | NameObject Name | Array [DirectObject] | Dictionary Dictionary | Null | Reference IndirectObjCoordinates deriving Show instance Output DirectObject where output (Boolean b) = output b output (NumberObject n) = output n output (StringObject s) = output s output (NameObject n) = output n output (Array a) = mconcat ["[", join " " a, "]"] output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) directObject :: MonadParser m => m DirectObject directObject = (peek >>= dispatch) "direct object" where dispatch 't' = Boolean <$> boolean dispatch 'f' = Boolean <$> boolean dispatch '(' = StringObject <$> stringObject dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary dispatch '/' = NameObject <$> name dispatch '[' = Array <$> array dispatch 'n' = nullObject *> return Null dispatch _ = Reference <$> reference {- defined before Number because Number is a prefix of it -} <|> NumberObject <$> number -- -- Object -- data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show instance Output Object where output (Direct d) = output d output (Stream {header, streamContent}) = mconcat [ output header, newLine , Output.line "stream" , byteString streamContent , "endstream" ] -- -- Occurrence -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = saveOffset (Object objectId) >> mconcat [ Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) , output (objects ! objectId), newLine , Output.line "endobj" ] ------------------------------------- -- XREF TABLE ------------------------------------- -- -- XRefEntry -- data XRefEntry = InUse { offset :: Offset , generation :: Int } | Free { nextFree :: ObjectId , generation :: Int } deriving Show instance Output XRefEntry where output (InUse {offset, generation}) = Output.line (printf "%010d %05d n " (getOffset offset) generation) output (Free {nextFree, generation}) = Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small "XRef entry") <* blank where inUse :: Int -> Int -> Parser u XRefEntry inUse big generation = char 'n' *> return (InUse {offset = Offset big, generation}) free :: Int -> Int -> Parser u XRefEntry free big generation = char 'f' *> return (Free {nextFree = ObjectId big, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { firstObjectId :: ObjectId , entries :: [XRefEntry] } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entries}) = Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) `mappend` output entries xRefSubSection :: Parser u XRefSubSection xRefSubSection = do (firstId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" entries <- count entriesNumber entry return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} type XRefSection = Map ObjectId XRefEntry instance Output XRefSection where output = output . sections where sections tmp = case Map.minViewWithKey tmp of Nothing -> [] Just ((objectId@(ObjectId value), firstEntry), rest) -> let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in subSection : sections sndRest section firstObjectId stack nextValue tmp = let nextId = ObjectId nextValue in case Map.lookup nextId tmp of Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Just nextEntry -> section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp) xRefSection :: Parser u XRefSection xRefSection = foldr addSubsection Map.empty <$> (line "xref" *> xRefSubSection `sepBy` many EOL.parser) where addSubsection (XRefSubSection {firstObjectId, entries}) = Map.union . Map.fromList $ zip ([firstObjectId..]) entries -- -- Structure -- data InputStructure = InputStructure { startOffset :: Int , inputStructure :: Structure } data Structure = Structure { xRef :: XRefSection , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = Structure <$> xRefSection <*> (string "trailer" *> blank *> dictionary <* EOL.parser) updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset) updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef) where updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)} updateEntry _ e = e -- -- Flow -- data Flow = Flow { occurrencesStack :: [Occurrence] , tmpObjects :: IndexedObjects } deriving Show -- -- Content -- data Content = Content { occurrences :: [Occurrence] , objects :: IndexedObjects , docStructure :: Structure } deriving Show outputBody :: ([Occurrence], IndexedObjects) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef instance Output Content where output (Content {occurrences, objects, docStructure}) = fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects)) >>= \(body, (xref, startXRef)) -> mconcat [ body , Output.line "xref" , output xref , Output.line "trailer" , output trailer, newLine , Output.line "startxref" , Output.line (printf "%d" (getOffset startXRef)) , byteString eofMarker ] where Structure {xRef, trailer} = docStructure