Hufflepdf/src/PDF/Object.hs

195 lines
5.2 KiB
Haskell
Raw Normal View History

{-# LANGUAGE NamedFieldPuns #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, EOLStyle(..)
, IndirectObjCoordinates(..)
, Object(..)
, Occurrence(..)
, Parser
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, content
, dictionary
, directObject
, eol
, eolCharset
, integer
, line
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList)
import Text.Parsec
--import Text.Parsec.ByteString.Lazy (Parser)
type Parser u = Parsec ByteString u
data EOLStyle = CR | LF | CRLF
type Dictionary = Map String DirectObject
data StringObj = Literal String | Hexadecimal String deriving Show
data DirectObject =
Boolean Bool
| Number Float
| String StringObj
| Name String
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
} | Free {
nextFree :: Int64
, generation :: Int
} deriving Show
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
type XRefSection = [XRefSubSection]
data Content = Content {
body :: [Occurrence]
, objects :: Map Int Object
, xrefSection :: XRefSection
, trailer :: Dictionary
, startXrefPosition :: Int64
} deriving Show
eolCharset :: String
eolCharset = "\r\n"
eol :: Parser u EOLStyle
eol =
try (string "\r\n" >> return CRLF)
<|> (string "\r" >> return CR)
<|> (string "\n" >> return LF)
line :: String -> Parser u ()
line l = string l *> eol *> return ()
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
whiteSpace :: Parser u ()
whiteSpace = oneOf whiteSpaceCharset *> return () <|> eol *> return ()
blank :: Parser u ()
blank = skipMany whiteSpace
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
{-
delimiter :: Parser u Char
delimiter = oneOf delimiterCharset
-}
regular :: Parser u Char
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset
integer :: (Read a, Num a) => Parser u a
integer = read <$> many1 digit <* whiteSpace
directObject :: Parser u DirectObject
directObject =
Boolean <$> try boolean
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
<|> Number <$> try number
<|> String <$> try stringObj
<|> Name <$> try name
<|> Array <$> try array
<|> Dictionary <$> try dictionary
<|> const Null <$> try nullObject
boolean :: Parser u Bool
boolean = (string "true" *> return True) <|> (string "false" *> return False)
number :: Parser u Float
number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
where
sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> many1 digit <*> option "" floatPart
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
stringObj :: Parser u StringObj
stringObj =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where
literalStringBlock = many (noneOf "\\(") <|> matchingParenthesis <|> escapedChar
matchingParenthesis =
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
name :: Parser u String
name = char '/' *> many regular
array :: Parser u [DirectObject]
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
dictionary :: Parser u Dictionary
dictionary =
try (string "<<" *> blank *> keyValPairs <* string ">>")
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `endBy` blank
nullObject :: Parser u ()
nullObject = string "null" *> return ()
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small) <* blank
where
inUse :: Int64 -> Int -> Parser u XRefEntry
inUse offset generation = char 'n' *> return (InUse {offset, generation})
free :: Int64 -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
content :: Parser u Content
content =
Content [] Map.empty
<$> (line "xref" *> xrefSubSection `sepBy` eol)
<*> (line "trailer" *> dictionary <* eol)
<*> (line "startxref" *> integer)