Hufflepdf/src/PDF/Object.hs

364 lines
9.6 KiB
Haskell
Raw Normal View History

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, Flow(..)
, IndirectObjCoordinates(..)
2019-05-18 09:01:13 +02:00
, InputStructure(..)
, Name(..)
, Number(..)
, Object(..)
, Occurrence(..)
, Parser
, Structure(..)
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, blank
, dictionary
, directObject
, eofMarker
, integer
, line
, magicNumber
, structure
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64)
2019-05-18 09:01:13 +02:00
import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map (elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
import PDF.Output (
OBuilder, Offset(..), Output(..)
, byteString, getOffsets, join, newLine, nextLine, saveOffset
)
import Text.Parsec
import Text.Printf (printf)
type Parser u = Parsec ByteString u
line :: String -> Parser u ()
line l = string l *> EOL.parser *> return ()
magicNumber :: String
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
whiteSpace :: Parser u ()
whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return ()
blank :: Parser u ()
blank = skipMany whiteSpace
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
regular :: Parser u Char
regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
integer :: (Read a, Num a) => Parser u a
integer = read <$> many1 digit <* whiteSpace
-------------------------------------
-- OBJECTS
-------------------------------------
--
-- Boolean
--
boolean :: Parser u Bool
boolean = (string "true" *> return True) <|> (string "false" *> return False)
--
-- Number
--
newtype Number = Number Float deriving Show
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: Parser u Number
number = Number . read <$>
(mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
where
sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> many1 digit <*> option "" floatPart
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
--
-- StringObject
--
data StringObject = Literal String | Hexadecimal String deriving Show
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
stringObject :: Parser u StringObject
stringObject =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where
literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
matchingParenthesis =
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
--
-- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = "/" `mappend` Output.string n
name :: Parser u Name
name = Name <$> (char '/' *> many regular)
--
-- Array
--
array :: Parser u [DirectObject]
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
dictionary :: Parser u Dictionary
dictionary =
try (string "<<" *> blank *> keyValPairs <* string ">>")
where
keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `endBy` blank
--
-- Null
--
nullObject :: Parser u ()
nullObject = string "null" *> return ()
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
--
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
2019-05-16 22:41:14 +02:00
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
directObject :: Parser u DirectObject
directObject =
Boolean <$> try boolean
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> try number
<|> StringObject <$> try stringObject
<|> NameObject <$> try name
<|> Array <$> try array
<|> Dictionary <$> try dictionary
<|> const Null <$> try nullObject
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
2019-05-16 22:41:14 +02:00
`mappend` "endstream"
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
2019-05-16 22:41:14 +02:00
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId objectId)
>> Output.string (printf "%d %d obj" objectId versionNumber)
2019-05-16 22:41:14 +02:00
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
} | Free {
nextFree :: Int64
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
(inUse big small <|> free big small) <* blank
where
inUse :: Int64 -> Int -> Parser u XRefEntry
inUse offset generation = char 'n' *> return (InUse {offset, generation})
free :: Int64 -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
2019-05-16 22:41:14 +02:00
`nextLine` output (Map.elems entries)
xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
type XRefSection = [XRefSubSection]
--
-- Structure
--
2019-05-18 09:01:13 +02:00
data InputStructure = InputStructure {
startOffset :: Int64
2019-05-18 09:01:13 +02:00
, inputStructure :: Structure
}
data Structure = Structure {
xrefSection :: XRefSection
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
2019-05-18 09:01:13 +02:00
Structure
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* EOL.parser)
2019-05-18 09:01:13 +02:00
updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64)
updateXrefs xrefSection offsets = (
updateSubSection <$> xrefSection
, offsets ! StartXRef
)
2019-05-18 09:01:13 +02:00
where
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
updateEntry firstObjectId index e@(InUse {}) =
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
updateEntry _ _ e = e
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: Map Int Object
} deriving Show
--
-- Content
--
data Content = Content {
occurrences :: [Occurrence]
, objects :: Map Int Object
2019-05-18 09:01:13 +02:00
, docStructure :: Structure
} deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
2019-05-18 09:01:13 +02:00
output (Content {occurrences, objects, docStructure}) =
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) ->
body
`mappend` "xref"
2019-05-18 09:01:13 +02:00
`nextLine` output xref
`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker
2019-05-18 09:01:13 +02:00
where
Structure {xrefSection, trailer} = docStructure