Hufflepdf/src/PDF/Object.hs

398 lines
11 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, Flow(..)
, IndexedObjects
, IndirectObjCoordinates(..)
, InputStructure(..)
, Name(..)
, Number(..)
, Object(..)
, Occurrence(..)
, Structure(..)
, XRefEntry(..)
, XRefSection
, blank
, dictionary
, directObject
, eofMarker
, integer
, line
, magicNumber
, structure
) where
import Control.Applicative ((<|>))
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (
concat, cons, pack, singleton, unpack
)
import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map (
delete, empty, fromList, lookup, minViewWithKey, toList, union
)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (concat, line, string)
import PDF.Output (
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
, byteString, getObjectId, getOffset, getOffsets, join, newLine
, saveOffset
)
import PDF.Parser (
Parser, (<?>)
, char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option
, sepBy, string, takeAll, takeAll1
)
import Text.Printf (printf)
line :: String -> Parser u ()
line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
magicNumber :: ByteString
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
blank :: Parser u ()
blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return ()
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
regular :: Char -> Bool
regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
integer :: (Read a, Num a) => Parser u a
integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
-------------------------------------
-- OBJECTS
-------------------------------------
type IndexedObjects = Map ObjectId Object
--
-- Boolean
--
boolean :: Parser u Bool
boolean =
(string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
--
-- Number
--
newtype Number = Number Double deriving Show
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: Parser u Number
number = Number . read . BS.unpack <$>
(mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart))
<?> "number"
where
sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> decNumber <*> option "" floatPart
floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber)
--
-- StringObject
--
data StringObject = Literal String | Hexadecimal String deriving Show
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
stringObject :: Parser u StringObject
stringObject =
Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
<|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>')
<?> "string object (literal or hexadecimal)"
where
literalString = many literalStringBlock
literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
normalChar = not . (`elem` ("\\()" :: String))
matchingParenthesis =
mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar =
BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3]
--
-- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = Output.string ('/':n)
name :: Parser u Name
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
--
-- Array
--
array :: Parser u [DirectObject]
array =
char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = Output.concat [output key, " ", output val]
dictionary :: Parser u Dictionary
dictionary =
string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
--
-- Null
--
nullObject :: Parser u ()
nullObject = string "null" *> return () <?> "null object"
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: ObjectId
, versionNumber :: Int
} deriving Show
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
--
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = Output.concat ["[", join " " a, "]"]
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
directObject :: Parser u DirectObject
directObject =
Boolean <$> boolean
<|> Reference <$> reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> number
<|> StringObject <$> stringObject
<|> NameObject <$> name
<|> Array <$> array
<|> Dictionary <$> dictionary
<|> const Null <$> nullObject
<?> "direct object"
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) = Output.concat [
output header, newLine
, Output.line "stream"
, byteString streamContent
, "endstream"
]
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) = Output.line c
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (Object objectId) >> Output.concat [
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
, output (objects ! objectId), newLine
, Output.line "endobj"
]
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Offset
, generation :: Int
} | Free {
nextFree :: ObjectId
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.line (printf "%010d %05d n " (getOffset offset) generation)
output (Free {nextFree, generation}) =
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small <?> "XRef entry") <* blank
where
inUse :: Int -> Int -> Parser u XRefEntry
inUse big generation =
char 'n' *> return (InUse {offset = Offset big, generation})
free :: Int -> Int -> Parser u XRefEntry
free big generation =
char 'f' *> return (Free {nextFree = ObjectId big, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: ObjectId
, entries :: [XRefEntry]
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entries}) =
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
`mappend` output entries
xRefSubSection :: Parser u XRefSubSection
xRefSubSection = do
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- count entriesNumber entry
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
type XRefSection = Map ObjectId XRefEntry
instance Output XRefSection where
output = output . sections
where
sections tmp =
case Map.minViewWithKey tmp of
Nothing -> []
Just ((objectId@(ObjectId value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest
section firstObjectId stack nextValue tmp =
let nextId = ObjectId nextValue in
case Map.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry ->
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
xRefSection :: Parser u XRefSection
xRefSection = foldr addSubsection Map.empty <$>
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
where
addSubsection (XRefSubSection {firstObjectId, entries}) =
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
--
-- Structure
--
data InputStructure = InputStructure {
startOffset :: Int
, inputStructure :: Structure
}
data Structure = Structure {
xRef :: XRefSection
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
Structure
<$> xRefSection
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
where
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
updateEntry _ e = e
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: IndexedObjects
} deriving Show
--
-- Content
--
data Content = Content {
occurrences :: [Occurrence]
, objects :: IndexedObjects
, docStructure :: Structure
} deriving Show
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
output (Content {occurrences, objects, docStructure}) =
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) -> Output.concat [
body
, Output.line "xref"
, output xref
, Output.line "trailer"
, output trailer, newLine
, Output.line "startxref"
, Output.line (printf "%d" (getOffset startXRef))
, byteString eofMarker
]
where
Structure {xRef, trailer} = docStructure