Hufflepdf/src/PDF/Object.hs

371 lines
10 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, Flow(..)
, IndirectObjCoordinates(..)
, InputStructure(..)
, Name(..)
, Number(..)
, Object(..)
, Occurrence(..)
, Structure(..)
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, blank
, dictionary
, directObject
, eofMarker
, integer
, line
, magicNumber
, structure
) where
import Control.Applicative ((<|>))
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (
concat, cons, pack, singleton, unpack
)
import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map (elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
import PDF.Output (
OBuilder, Offset(..), Output(..)
, byteString, getOffsets, join, newLine, nextLine, saveOffset
)
import PDF.Parser (
Parser, (<?>)
, char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option
, sepBy, string, takeAll, takeAll1
)
import Text.Printf (printf)
line :: String -> Parser u ()
line l = string (BS.pack l) *> EOL.parser *> return () <?> printf "line «%s»" l
magicNumber :: ByteString
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
blank :: Parser u ()
blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return ()
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
regular :: Char -> Bool
regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
integer :: (Read a, Num a) => Parser u a
integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
-------------------------------------
-- OBJECTS
-------------------------------------
--
-- Boolean
--
boolean :: Parser u Bool
boolean =
(string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
--
-- Number
--
newtype Number = Number Float deriving Show
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: Parser u Number
number = Number . read . BS.unpack <$>
(mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart))
<?> "number"
where
sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> decNumber <*> option "" floatPart
floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber)
--
-- StringObject
--
data StringObject = Literal String | Hexadecimal String deriving Show
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
stringObject :: Parser u StringObject
stringObject =
Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
<|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>')
<?> "string object (literal or hexadecimal)"
where
literalString = many literalStringBlock
literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
normalChar = not . (`elem` ("\\()" :: String))
matchingParenthesis =
mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar =
BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3]
--
-- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = "/" `mappend` Output.string n
name :: Parser u Name
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
--
-- Array
--
array :: Parser u [DirectObject]
array =
char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
dictionary :: Parser u Dictionary
dictionary =
string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
--
-- Null
--
nullObject :: Parser u ()
nullObject = string "null" *> return () <?> "null object"
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
--
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
directObject :: Parser u DirectObject
directObject =
Boolean <$> boolean
<|> Reference <$> reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> number
<|> StringObject <$> stringObject
<|> NameObject <$> name
<|> Array <$> array
<|> Dictionary <$> dictionary
<|> const Null <$> nullObject
<?> "direct object"
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream"
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId objectId)
>> Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Int
, generation :: Int
} | Free {
nextFree :: Int
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small <?> "XRef entry") <* blank
where
inUse :: Int -> Int -> Parser u XRefEntry
inUse offset generation = char 'n' *> return (InUse {offset, generation})
free :: Int -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries)
xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
type XRefSection = [XRefSubSection]
--
-- Structure
--
data InputStructure = InputStructure {
startOffset :: Int
, inputStructure :: Structure
}
data Structure = Structure {
xrefSection :: XRefSection
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
Structure
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* EOL.parser)
updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
updateXrefs xrefSection offsets = (
updateSubSection <$> xrefSection
, offsets ! StartXRef
)
where
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
updateEntry firstObjectId index e@(InUse {}) =
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
updateEntry _ _ e = e
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: Map Int Object
} deriving Show
--
-- Content
--
data Content = Content {
occurrences :: [Occurrence]
, objects :: Map Int Object
, docStructure :: Structure
} deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
output (Content {occurrences, objects, docStructure}) =
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) ->
body
`mappend` "xref"
`nextLine` output xref
`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker
where
Structure {xrefSection, trailer} = docStructure