{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Dictionary , DirectObject(..) , Flow(..) , IndexedObjects , IndirectObjCoordinates(..) , InputStructure(..) , Name(..) , Number(..) , Object(..) , Occurrence(..) , StringObject(..) , Structure(..) , XRefEntry(..) , XRefSection , array , blank , dictionary , directObject , eofMarker , integer , line , magicNumber , name , number , outputBody , regular , stringObject , structure , toByteString ) where import Control.Applicative ((<|>), many) import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy) import Data.ByteString (ByteString) import qualified Data.ByteString as BS (concat) import qualified Data.ByteString.Char8 as Char8 ( cons, length, pack, singleton, snoc, unpack ) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) import Data.Map (Map, (!)) import qualified Data.Map as Map ( delete, empty, fromList, lookup, minViewWithKey, toList, union ) import qualified Data.Set as Set (fromList, member) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (line, string) import PDF.Output ( OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) , byteString, getObjectId, getOffset, join, newLine, saveOffset ) import PDF.Parser (MonadParser(..), Parser, (), octDigit, oneOf) import Text.Printf (printf) line :: MonadParser m => String -> m () line l = (string (Char8.pack l) *> blank *> return ()) printf "line «%s»" l magicNumber :: ByteString magicNumber = "%PDF-" eofMarker :: ByteString eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " blank :: MonadParser m => m () blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure () delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Char -> Bool regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset)) integer :: MonadParser m => m Int integer = decNumber <* blank "decimal integer" ------------------------------------- -- OBJECTS ------------------------------------- type IndexedObjects = Map ObjectId Object -- -- Boolean -- boolean :: MonadParser m => m Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) "boolean" -- -- Number -- newtype Number = Number Double deriving (Eq, Show) instance Output Number where output (Number f) = Output.string $ case properFraction f of (n, 0) -> printf "%d" (n :: Int) _ -> printf "%f" f number :: MonadParser m => m Number number = Number <$> (sign <*> value) "number" where sign = (string "-" *> return negate) <|> option id (char '+' >> return id) value = floatNumber <|> (char '.' *> afterPoint) afterPoint = read . ("0." ++) . Char8.unpack <$> takeAll (`Set.member` digits) digits = Set.fromList ['0' .. '9'] -- -- StringObject -- data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show) instance Output StringObject where output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s)) output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n)) stringObject :: MonadParser m => m StringObject stringObject = Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')') <|> Hexadecimal . roundBytes <$> (char '<' *> hexNumber <* char '>') "string object (literal or hexadecimal)" where literalString = many literalStringBlock literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar normalChar = not . (`elem` ("\\()" :: String)) matchingParenthesis = mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode) octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3] roundBytes (B16Int bs) | Char8.length bs `mod` 2 == 1 = B16Int (bs `Char8.snoc` '0') | otherwise = B16Int bs toByteString :: StringObject -> ByteString toByteString (Hexadecimal h) = b16ToBytes h toByteString (Literal s) = unescape s -- -- Name -- newtype Name = Name String deriving (Eq, Ord, Show) instance Output Name where output (Name n) = Output.string ('/':n) name :: MonadParser m => m Name name = Name . Char8.unpack <$> (char '/' *> takeAll regular) "name" -- -- Array -- array :: MonadParser m => m [DirectObject] array = char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' "array" -- -- Dictionary -- type Dictionary = Map Name DirectObject instance Output Dictionary where output aDictionary = "<<" `mappend` keyValues `mappend` ">>" where keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary outputKeyVal :: (Name, DirectObject) -> OBuilder outputKeyVal (key, val) = mconcat [output key, " ", output val] dictionary :: MonadParser m => m Dictionary dictionary = string "<<" *> blank *> keyValPairs <* string ">>" "dictionary" where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank -- -- Null -- nullObject :: MonadParser m => m () nullObject = string "null" *> return () "null object" -- -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: ObjectId , versionNumber :: Int } deriving Show reference :: MonadParser m => m IndirectObjCoordinates reference = IndirectObjCoordinates <$> (fmap ObjectId integer) <*> integer <* char 'R' "reference to an object" -- -- DirectObject -- data DirectObject = Boolean Bool | NumberObject Number | StringObject StringObject | NameObject Name | Array [DirectObject] | Dictionary Dictionary | Null | Reference IndirectObjCoordinates deriving Show instance Output DirectObject where output (Boolean b) = output b output (NumberObject n) = output n output (StringObject s) = output s output (NameObject n) = output n output (Array a) = mconcat ["[", join " " a, "]"] output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) directObject :: MonadParser m => m DirectObject directObject = (peek >>= dispatch) "direct object" where dispatch 't' = Boolean <$> boolean dispatch 'f' = Boolean <$> boolean dispatch '(' = StringObject <$> stringObject dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary dispatch '/' = NameObject <$> name dispatch '[' = Array <$> array dispatch 'n' = nullObject *> return Null dispatch _ = Reference <$> reference {- defined before Number because Number is a prefix of it -} <|> NumberObject <$> number -- -- Object -- data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show instance Output Object where output (Direct d) = output d output (Stream {header, streamContent}) = mconcat [ output header, newLine , Output.line "stream" , byteString streamContent , "endstream" ] -- -- Occurrence -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = saveOffset (Object objectId) >> mconcat [ Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) , output (objects ! objectId), newLine , Output.line "endobj" ] outputBody :: ([Occurrence], IndexedObjects) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef ------------------------------------- -- XREF TABLE ------------------------------------- -- -- XRefEntry -- data XRefEntry = InUse { offset :: Offset , generation :: Int } | Free { nextFree :: ObjectId , generation :: Int } deriving Show instance Output XRefEntry where output (InUse {offset, generation}) = Output.line (printf "%010d %05d n " (getOffset offset) generation) output (Free {nextFree, generation}) = Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small "XRef entry") <* blank where inUse :: Int -> Int -> Parser u XRefEntry inUse big generation = char 'n' *> return (InUse {offset = Offset big, generation}) free :: Int -> Int -> Parser u XRefEntry free big generation = char 'f' *> return (Free {nextFree = ObjectId big, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { firstObjectId :: ObjectId , entries :: [XRefEntry] } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entries}) = Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) `mappend` output entries xRefSubSection :: Parser u XRefSubSection xRefSubSection = do (firstId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" entries <- count entriesNumber entry return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} type XRefSection = Map ObjectId XRefEntry instance Output XRefSection where output = output . sections where sections tmp = case Map.minViewWithKey tmp of Nothing -> [] Just ((objectId@(ObjectId value), firstEntry), rest) -> let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in subSection : sections sndRest section firstObjectId stack nextValue tmp = let nextId = ObjectId nextValue in case Map.lookup nextId tmp of Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Just nextEntry -> section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp) xRefSection :: Parser u XRefSection xRefSection = foldr addSubsection Map.empty <$> (line "xref" *> xRefSubSection `sepBy` many EOL.parser) where addSubsection (XRefSubSection {firstObjectId, entries}) = Map.union . Map.fromList $ zip ([firstObjectId..]) entries -- -- Structure -- data InputStructure = InputStructure { startOffset :: Int , inputStructure :: Structure } data Structure = Structure { xRef :: XRefSection , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = Structure <$> xRefSection <*> (string "trailer" *> blank *> dictionary <* EOL.parser) -- -- Flow -- data Flow = Flow { occurrencesStack :: [Occurrence] , tmpObjects :: IndexedObjects } deriving Show