Hufflepdf/src/PDF/Object.hs

191 lines
5.1 KiB
Haskell
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE NamedFieldPuns #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, EOLStyle(..)
, IndirectObjCoordinates(..)
, Object(..)
, Occurrence(..)
, Parser
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, blank
, content
, dictionary
, directObject
, eol
, eolCharset
, integer
, line
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList)
import Text.Parsec
type Parser u = Parsec ByteString u
data EOLStyle = CR | LF | CRLF
type Dictionary = Map String DirectObject
data StringObj = Literal String | Hexadecimal String deriving Show
data DirectObject =
Boolean Bool
| Number Float
| String StringObj
| Name String
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
} | Free {
nextFree :: Int64
, generation :: Int
} deriving Show
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
type XRefSection = [XRefSubSection]
data Content = Content {
startOffset :: Int64
, body :: [Occurrence]
, objects :: Map Int Object
, xrefSection :: XRefSection
, trailer :: Dictionary
, startXrefPosition :: Int64
} deriving Show
eolCharset :: String
eolCharset = "\r\n"
eol :: Parser u EOLStyle
eol =
try (string "\r\n" >> return CRLF)
<|> (string "\r" >> return CR)
<|> (string "\n" >> return LF)
line :: String -> Parser u ()
line l = string l *> eol *> return ()
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
whiteSpace :: Parser u ()
whiteSpace = oneOf whiteSpaceCharset *> return () <|> eol *> return ()
blank :: Parser u ()
blank = skipMany whiteSpace
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
regular :: Parser u Char
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
integer :: (Read a, Num a) => Parser u a
integer = read <$> many1 digit <* whiteSpace
directObject :: Parser u DirectObject
directObject =
Boolean <$> try boolean
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
<|> Number <$> try number
<|> String <$> try stringObj
<|> Name <$> try name
<|> Array <$> try array
<|> Dictionary <$> try dictionary
<|> const Null <$> try nullObject
boolean :: Parser u Bool
boolean = (string "true" *> return True) <|> (string "false" *> return False)
number :: Parser u Float
number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
where
sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> many1 digit <*> option "" floatPart
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
stringObj :: Parser u StringObj
stringObj =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where
literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
matchingParenthesis =
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
name :: Parser u String
name = char '/' *> many regular
array :: Parser u [DirectObject]
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
dictionary :: Parser u Dictionary
dictionary =
try (string "<<" *> blank *> keyValPairs <* string ">>")
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `endBy` blank
nullObject :: Parser u ()
nullObject = string "null" *> return ()
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small) <* blank
where
inUse :: Int64 -> Int -> Parser u XRefEntry
inUse offset generation = char 'n' *> return (InUse {offset, generation})
free :: Int64 -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
content :: Parser u Content
content =
Content 0 [] Map.empty
<$> (line "xref" *> xrefSubSection `sepBy` eol)
<*> (line "trailer" *> dictionary <* eol)
<*> (line "startxref" *> integer)