Hufflepdf/src/PDF/Object.hs

355 lines
9.2 KiB
Haskell
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, Flow(..)
, IndirectObjCoordinates(..)
, Name(..)
, Number(..)
, Object(..)
, Occurrence(..)
, Parser
, Structure(..)
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, blank
, dictionary
, directObject
, eofMarker
, integer
, line
, magicNumber
, structure
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64)
import Data.Map (Map, (!))
import qualified Data.Map as Map (elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
import PDF.Output (
OBuilder, Offset(..), Output(..)
, byteString, getOffsets, join, newLine, nextLine, saveOffset
)
import Text.Parsec
import Text.Printf (printf)
type Parser u = Parsec ByteString u
line :: String -> Parser u ()
line l = string l *> EOL.parser *> return ()
magicNumber :: String
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
whiteSpace :: Parser u ()
whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return ()
blank :: Parser u ()
blank = skipMany whiteSpace
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
regular :: Parser u Char
regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
integer :: (Read a, Num a) => Parser u a
integer = read <$> many1 digit <* whiteSpace
-------------------------------------
-- OBJECTS
-------------------------------------
--
-- Boolean
--
boolean :: Parser u Bool
boolean = (string "true" *> return True) <|> (string "false" *> return False)
--
-- Number
--
newtype Number = Number Float deriving Show
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: Parser u Number
number = Number . read <$>
(mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
where
sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> many1 digit <*> option "" floatPart
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
--
-- StringObject
--
data StringObject = Literal String | Hexadecimal String deriving Show
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
stringObject :: Parser u StringObject
stringObject =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where
literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
matchingParenthesis =
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
--
-- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = "/" `mappend` Output.string n
name :: Parser u Name
name = Name <$> (char '/' *> many regular)
--
-- Array
--
array :: Parser u [DirectObject]
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
dictionary :: Parser u Dictionary
dictionary =
try (string "<<" *> blank *> keyValPairs <* string ">>")
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `endBy` blank
--
-- Null
--
nullObject :: Parser u ()
nullObject = string "null" *> return ()
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
--
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
directObject :: Parser u DirectObject
directObject =
Boolean <$> try boolean
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> try number
<|> StringObject <$> try stringObject
<|> NameObject <$> try name
<|> Array <$> try array
<|> Dictionary <$> try dictionary
<|> const Null <$> try nullObject
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream"
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId objectId)
>> Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
} | Free {
nextFree :: Int64
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small) <* blank
where
inUse :: Int64 -> Int -> Parser u XRefEntry
inUse offset generation = char 'n' *> return (InUse {offset, generation})
free :: Int64 -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries)
xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
type XRefSection = [XRefSubSection]
--
-- Structure
--
data Structure = Structure {
startOffset :: Int64
, xrefSection :: XRefSection
, inputTrailer :: Dictionary
--, startXRef :: Int64
} deriving Show
structure :: Parser u Structure
structure =
Structure 0
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* EOL.parser)
-- <*> (line "startxref" *> integer)
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
xrefFromOffsets offsets = (
[]
, offsets ! StartXRef
)
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: Map Int Object
} deriving Show
--
-- Content
--
data Content = Content {
occurrences :: [Occurrence]
, objects :: Map Int Object
, trailer :: Dictionary
} deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
output (Content {occurrences, objects, trailer}) =
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) ->
body
`mappend` "xref"
`nextLine` "trailer"
--`nextLine` output xrefSection
--`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker