Hufflepdf/src/PDF/Object.hs

390 lines
11 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object (
Dictionary
, DirectObject(..)
, Flow(..)
, IndirectObjCoordinates(..)
, InputStructure(..)
, Name(..)
, Number(..)
, Object(..)
, Occurrence(..)
, StringObject(..)
, Structure(..)
, XRefEntry(..)
, XRefSection
, array
, blank
, dictionary
, directObject
, eofMarker
, integer
, line
, magicNumber
, name
, number
, object
, outputBody
, regular
, stringObject
, structure
, toByteString
) where
import Control.Applicative ((<|>), many)
import Control.Monad.Reader (asks)
import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (concat)
import qualified Data.ByteString.Char8 as Char8 (
cons, length, pack, singleton, snoc, unpack
)
import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
import Data.Id (Id(..), IdMap, Indexed)
import qualified Data.Id as Id (
at, delete, empty, fromList, lookup, minViewWithKey, union
)
import Data.Map (Map)
import qualified Data.Map as Map (fromList, toList)
import qualified Data.Set as Set (fromList, member)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import qualified PDF.Output as Output (line, string)
import PDF.Output (
OBuilder, OContext(..), Offset(..), Output(..), Resource(..), byteString
, getOffset, join, newLine, saveOffset
)
import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
import Text.Printf (printf)
line :: MonadParser m => String -> m ()
line l = (string (Char8.pack l) *> blank *> return ()) <?> printf "line «%s»" l
magicNumber :: ByteString
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
blank :: MonadParser m => m ()
blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure ()
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
regular :: Char -> Bool
regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
integer :: MonadParser m => m Int
integer = decNumber <* blank <?> "decimal integer"
-------------------------------------
-- OBJECTS
-------------------------------------
--
-- Boolean
--
boolean :: MonadParser m => m Bool
boolean =
(string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
--
-- Number
--
newtype Number = Number Double deriving (Eq, Show)
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: MonadParser m => m Number
number = Number <$> (sign <*> value) <?> "number"
where
sign = (string "-" *> return negate) <|> option id (char '+' >> return id)
value = floatNumber <|> (char '.' *> afterPoint)
afterPoint = read . ("0." ++) . Char8.unpack <$> takeAll (`Set.member` digits)
digits = Set.fromList ['0' .. '9']
--
-- StringObject
--
data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show)
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s))
output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n))
stringObject :: MonadParser m => m StringObject
stringObject =
Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
<|> Hexadecimal . roundBytes <$> (char '<' *> hexNumber <* char '>')
<?> "string object (literal or hexadecimal)"
where
literalString = many literalStringBlock
literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
normalChar = not . (`elem` ("\\()" :: String))
matchingParenthesis =
mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar =
Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode)
octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3]
roundBytes (B16Int bs)
| Char8.length bs `mod` 2 == 1 = B16Int (bs `Char8.snoc` '0')
| otherwise = B16Int bs
toByteString :: StringObject -> ByteString
toByteString (Hexadecimal h) = b16ToBytes h
toByteString (Literal s) = unescape s
--
-- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = Output.string ('/':n)
name :: MonadParser m => m Name
name = Name . Char8.unpack <$> (char '/' *> takeAll regular) <?> "name"
--
-- Array
--
array :: MonadParser m => m [DirectObject]
array =
char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output aDictionary =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = mconcat [output key, " ", output val]
dictionary :: MonadParser m => m Dictionary
dictionary =
string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
--
-- Null
--
nullObject :: MonadParser m => m ()
nullObject = string "null" *> return () <?> "null object"
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: {-# UNPACK #-} !(Id Object)
, versionNumber :: {-# UNPACK #-} !Int
} deriving Show
reference :: MonadParser m => m IndirectObjCoordinates
reference = IndirectObjCoordinates
<$> (fmap Id integer) <*> integer <* char 'R' <?> "reference to an object"
--
-- DirectObject
--
data DirectObject =
Boolean !Bool
| NumberObject !Number
| StringObject !StringObject
| NameObject !Name
| Array ![DirectObject]
| Dictionary !Dictionary
| Null
| Reference !IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = mconcat ["[", join " " a, "]"]
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" (getId objectId) versionNumber)
directObject :: MonadParser m => m DirectObject
directObject = (peek >>= dispatch) <?> "direct object"
where
dispatch 't' = Boolean <$> boolean
dispatch 'f' = Boolean <$> boolean
dispatch '(' = StringObject <$> stringObject
dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary
dispatch '/' = NameObject <$> name
dispatch '[' = Array <$> array
dispatch 'n' = nullObject *> return Null
dispatch _ =
Reference <$> reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> number
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) = mconcat [
output header, newLine
, Output.line "stream"
, byteString streamContent
, "endstream"
]
object :: Int -> Id Object
object = Id
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) = Output.line c
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId $ getId objectId) >> mconcat [
Output.line (printf "%d %d obj" (getId objectId) versionNumber)
, output (objects `Id.at` objectId), newLine
, Output.line "endobj"
]
outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Offset
, generation :: Int
} | Free {
nextFree :: (Id Object)
, generation :: Int
} deriving Show
instance Output XRefEntry where
output xRefEntry = Output.string (build xRefEntry) `mappend` endXRefEntryLine
where
build (InUse {offset, generation}) =
printf "%010d %05d n" (getOffset offset) generation
build (Free {nextFree, generation}) =
printf "%010d %05d f" (getId nextFree) generation
endXRefEntryLine = OContext (asks padEOLToTwoBytes) >>= Output.line
padEOLToTwoBytes EOL.CRLF = ("" :: String)
padEOLToTwoBytes _ = " "
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small <?> "XRef entry") <* blank
where
inUse :: Int -> Int -> Parser u XRefEntry
inUse big generation =
char 'n' *> return (InUse {offset = Offset big, generation})
free :: Int -> Int -> Parser u XRefEntry
free big generation =
char 'f' *> return (Free {nextFree = Id big, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: (Id Object)
, entries :: [XRefEntry]
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entries}) =
Output.line (printf "%d %d" (getId firstObjectId) (length entries))
`mappend` output entries
xRefSubSection :: Parser u XRefSubSection
xRefSubSection = do
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- count entriesNumber entry
return $ XRefSubSection {firstObjectId = Id firstId, entries}
type XRefSection = IdMap Object XRefEntry
instance Output XRefSection where
output = output . sections
where
sections tmp =
case Id.minViewWithKey tmp of
Nothing -> []
Just ((objectId@(Id value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest
section firstObjectId stack nextValue tmp =
let nextId = (Id nextValue :: Id Object) in
case Id.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry ->
section firstObjectId (nextEntry:stack) (nextValue + 1) (Id.delete nextId tmp)
xRefSection :: Parser u XRefSection
xRefSection = foldr addSubsection Id.empty <$>
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
where
addSubsection (XRefSubSection {firstObjectId, entries}) =
Id.union . Id.fromList $ zip ([firstObjectId..]) entries
--
-- Structure
--
data InputStructure = InputStructure {
startOffset :: Int
, inputStructure :: Structure
}
data Structure = Structure {
xRef :: XRefSection
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
Structure
<$> xRefSection
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: (Indexed Object)
} deriving Show