{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Dictionary , DirectObject(..) , Flow(..) , IndirectObjCoordinates(..) , InputStructure(..) , Name(..) , Number(..) , Object(..) , Occurrence(..) , StringObject(..) , Structure(..) , XRefEntry(..) , XRefSection , array , blank , dictionary , directObject , eofMarker , integer , line , magicNumber , name , number , object , outputBody , regular , stringObject , structure , toByteString ) where import Control.Applicative ((<|>), many) import Control.Monad.Reader (asks) import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy) import Data.ByteString (ByteString) import qualified Data.ByteString as BS (concat) import qualified Data.ByteString.Char8 as Char8 ( cons, length, pack, singleton, snoc, unpack ) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) import Data.Id (Id(..), IdMap, Indexed) import qualified Data.Id as Id ( at, delete, empty, fromList, lookup, minViewWithKey, union ) import Data.Map (Map) import qualified Data.Map as Map (fromList, toList) import qualified Data.Set as Set (fromList, member) import qualified PDF.EOL as EOL (Style(..), charset, parser) import qualified PDF.Output as Output (line, string) import PDF.Output ( OBuilder, OContext(..), Offset(..), Output(..), Resource(..), byteString , getOffset, join, newLine, saveOffset ) import PDF.Parser (MonadParser(..), Parser, (), octDigit, oneOf) import Text.Printf (printf) line :: MonadParser m => String -> m () line l = (string (Char8.pack l) *> blank *> return ()) printf "line «%s»" l magicNumber :: ByteString magicNumber = "%PDF-" eofMarker :: ByteString eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " blank :: MonadParser m => m () blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure () delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Char -> Bool regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset)) integer :: MonadParser m => m Int integer = decNumber <* blank "decimal integer" ------------------------------------- -- OBJECTS ------------------------------------- -- -- Boolean -- boolean :: MonadParser m => m Bool boolean = (string "true" *> return True) <|> (string "false" *> return False) "boolean" -- -- Number -- newtype Number = Number Double deriving (Eq, Show) instance Output Number where output (Number f) = Output.string $ case properFraction f of (n, 0) -> printf "%d" (n :: Int) _ -> printf "%f" f number :: MonadParser m => m Number number = Number <$> (sign <*> value) "number" where sign = (string "-" *> return negate) <|> option id (char '+' >> return id) value = floatNumber <|> (char '.' *> afterPoint) afterPoint = read . ("0." ++) . Char8.unpack <$> takeAll (`Set.member` digits) digits = Set.fromList ['0' .. '9'] -- -- StringObject -- data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show) instance Output StringObject where output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s)) output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n)) stringObject :: MonadParser m => m StringObject stringObject = Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')') <|> Hexadecimal . roundBytes <$> (char '<' *> hexNumber <* char '>') "string object (literal or hexadecimal)" where literalString = many literalStringBlock literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar normalChar = not . (`elem` ("\\()" :: String)) matchingParenthesis = mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")" escapedChar = Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode) octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3] roundBytes (B16Int bs) | Char8.length bs `mod` 2 == 1 = B16Int (bs `Char8.snoc` '0') | otherwise = B16Int bs toByteString :: StringObject -> ByteString toByteString (Hexadecimal h) = b16ToBytes h toByteString (Literal s) = unescape s -- -- Name -- newtype Name = Name String deriving (Eq, Ord, Show) instance Output Name where output (Name n) = Output.string ('/':n) name :: MonadParser m => m Name name = Name . Char8.unpack <$> (char '/' *> takeAll regular) "name" -- -- Array -- array :: MonadParser m => m [DirectObject] array = char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' "array" -- -- Dictionary -- type Dictionary = Map Name DirectObject instance Output Dictionary where output aDictionary = "<<" `mappend` keyValues `mappend` ">>" where keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary outputKeyVal :: (Name, DirectObject) -> OBuilder outputKeyVal (key, val) = mconcat [output key, " ", output val] dictionary :: MonadParser m => m Dictionary dictionary = string "<<" *> blank *> keyValPairs <* string ">>" "dictionary" where keyVal = (,) <$> name <* blank <*> directObject keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank -- -- Null -- nullObject :: MonadParser m => m () nullObject = string "null" *> return () "null object" -- -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: {-# UNPACK #-} !(Id Object) , versionNumber :: {-# UNPACK #-} !Int } deriving Show reference :: MonadParser m => m IndirectObjCoordinates reference = IndirectObjCoordinates <$> (fmap Id integer) <*> integer <* char 'R' "reference to an object" -- -- DirectObject -- data DirectObject = Boolean !Bool | NumberObject !Number | StringObject !StringObject | NameObject !Name | Array ![DirectObject] | Dictionary !Dictionary | Null | Reference !IndirectObjCoordinates deriving Show instance Output DirectObject where output (Boolean b) = output b output (NumberObject n) = output n output (StringObject s) = output s output (NameObject n) = output n output (Array a) = mconcat ["[", join " " a, "]"] output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = Output.string (printf "%d %d R" (getId objectId) versionNumber) directObject :: MonadParser m => m DirectObject directObject = (peek >>= dispatch) "direct object" where dispatch 't' = Boolean <$> boolean dispatch 'f' = Boolean <$> boolean dispatch '(' = StringObject <$> stringObject dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary dispatch '/' = NameObject <$> name dispatch '[' = Array <$> array dispatch 'n' = nullObject *> return Null dispatch _ = Reference <$> reference {- defined before Number because Number is a prefix of it -} <|> NumberObject <$> number -- -- Object -- data Object = Direct DirectObject | Stream { header :: Dictionary , streamContent :: ByteString } deriving Show instance Output Object where output (Direct d) = output d output (Stream {header, streamContent}) = mconcat [ output header, newLine , Output.line "stream" , byteString streamContent , "endstream" ] object :: Int -> Id Object object = Id -- -- Occurrence -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = saveOffset (ObjectId $ getId objectId) >> mconcat [ Output.line (printf "%d %d obj" (getId objectId) versionNumber) , output (objects `Id.at` objectId), newLine , Output.line "endobj" ] outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef ------------------------------------- -- XREF TABLE ------------------------------------- -- -- XRefEntry -- data XRefEntry = InUse { offset :: Offset , generation :: Int } | Free { nextFree :: (Id Object) , generation :: Int } deriving Show instance Output XRefEntry where output xRefEntry = Output.string (build xRefEntry) `mappend` endXRefEntryLine where build (InUse {offset, generation}) = printf "%010d %05d n" (getOffset offset) generation build (Free {nextFree, generation}) = printf "%010d %05d f" (getId nextFree) generation endXRefEntryLine = OContext (asks padEOLToTwoBytes) >>= Output.line padEOLToTwoBytes EOL.CRLF = ("" :: String) padEOLToTwoBytes _ = " " entry :: Parser u XRefEntry entry = do (big, small) <- (,) <$> integer <*> integer (inUse big small <|> free big small "XRef entry") <* blank where inUse :: Int -> Int -> Parser u XRefEntry inUse big generation = char 'n' *> return (InUse {offset = Offset big, generation}) free :: Int -> Int -> Parser u XRefEntry free big generation = char 'f' *> return (Free {nextFree = Id big, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { firstObjectId :: (Id Object) , entries :: [XRefEntry] } deriving Show instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entries}) = Output.line (printf "%d %d" (getId firstObjectId) (length entries)) `mappend` output entries xRefSubSection :: Parser u XRefSubSection xRefSubSection = do (firstId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" entries <- count entriesNumber entry return $ XRefSubSection {firstObjectId = Id firstId, entries} type XRefSection = IdMap Object XRefEntry instance Output XRefSection where output = output . sections where sections tmp = case Id.minViewWithKey tmp of Nothing -> [] Just ((objectId@(Id value), firstEntry), rest) -> let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in subSection : sections sndRest section firstObjectId stack nextValue tmp = let nextId = (Id nextValue :: Id Object) in case Id.lookup nextId tmp of Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Just nextEntry -> section firstObjectId (nextEntry:stack) (nextValue + 1) (Id.delete nextId tmp) xRefSection :: Parser u XRefSection xRefSection = foldr addSubsection Id.empty <$> (line "xref" *> xRefSubSection `sepBy` many EOL.parser) where addSubsection (XRefSubSection {firstObjectId, entries}) = Id.union . Id.fromList $ zip ([firstObjectId..]) entries -- -- Structure -- data InputStructure = InputStructure { startOffset :: Int , inputStructure :: Structure } data Structure = Structure { xRef :: XRefSection , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = Structure <$> xRefSection <*> (string "trailer" *> blank *> dictionary <* EOL.parser) -- -- Flow -- data Flow = Flow { occurrencesStack :: [Occurrence] , tmpObjects :: (Indexed Object) } deriving Show