355 lines
9.2 KiB
Haskell
355 lines
9.2 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
||
{-# LANGUAGE NamedFieldPuns #-}
|
||
{-# LANGUAGE FlexibleInstances #-}
|
||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||
module PDF.Object (
|
||
Content(..)
|
||
, DirectObject(..)
|
||
, Flow(..)
|
||
, IndirectObjCoordinates(..)
|
||
, Name(..)
|
||
, Number(..)
|
||
, Object(..)
|
||
, Occurrence(..)
|
||
, Parser
|
||
, Structure(..)
|
||
, XRefEntry(..)
|
||
, XRefSection
|
||
, XRefSubSection(..)
|
||
, blank
|
||
, dictionary
|
||
, directObject
|
||
, eofMarker
|
||
, integer
|
||
, line
|
||
, magicNumber
|
||
, structure
|
||
) where
|
||
|
||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||
import Data.Int (Int64)
|
||
import Data.Map (Map, (!))
|
||
import qualified Data.Map as Map (elems, fromList, toList)
|
||
import qualified PDF.EOL as EOL (charset, parser)
|
||
import qualified PDF.Output as Output (string)
|
||
import PDF.Output (
|
||
OBuilder, Offset(..), Output(..)
|
||
, byteString, getOffsets, join, newLine, nextLine, saveOffset
|
||
)
|
||
import Text.Parsec
|
||
import Text.Printf (printf)
|
||
|
||
type Parser u = Parsec ByteString u
|
||
|
||
line :: String -> Parser u ()
|
||
line l = string l *> EOL.parser *> return ()
|
||
|
||
magicNumber :: String
|
||
magicNumber = "%PDF-"
|
||
|
||
eofMarker :: ByteString
|
||
eofMarker = "%%EOF"
|
||
|
||
whiteSpaceCharset :: String
|
||
whiteSpaceCharset = "\0\t\12 "
|
||
|
||
whiteSpace :: Parser u ()
|
||
whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return ()
|
||
|
||
blank :: Parser u ()
|
||
blank = skipMany whiteSpace
|
||
|
||
delimiterCharset :: String
|
||
delimiterCharset = "()<>[]{}/%"
|
||
|
||
regular :: Parser u Char
|
||
regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
|
||
|
||
integer :: (Read a, Num a) => Parser u a
|
||
integer = read <$> many1 digit <* whiteSpace
|
||
|
||
-------------------------------------
|
||
-- OBJECTS
|
||
-------------------------------------
|
||
|
||
--
|
||
-- Boolean
|
||
--
|
||
boolean :: Parser u Bool
|
||
boolean = (string "true" *> return True) <|> (string "false" *> return False)
|
||
|
||
--
|
||
-- Number
|
||
--
|
||
newtype Number = Number Float deriving Show
|
||
|
||
instance Output Number where
|
||
output (Number f) = Output.string $
|
||
case properFraction f of
|
||
(n, 0) -> printf "%d" (n :: Int)
|
||
_ -> printf "%f" f
|
||
|
||
number :: Parser u Number
|
||
number = Number . read <$>
|
||
(mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
|
||
where
|
||
sign = string "-" <|> option "" (char '+' >> return "")
|
||
integerPart = mappend <$> many1 digit <*> option "" floatPart
|
||
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
|
||
|
||
--
|
||
-- StringObject
|
||
--
|
||
data StringObject = Literal String | Hexadecimal String deriving Show
|
||
|
||
instance Output StringObject where
|
||
output (Literal s) = Output.string (printf "(%s)" s)
|
||
output (Hexadecimal s) = Output.string (printf "<%s>" s)
|
||
|
||
stringObject :: Parser u StringObject
|
||
stringObject =
|
||
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
||
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
||
where
|
||
literalStringBlock = many1 (noneOf "\\()") <|> matchingParenthesis <|> escapedChar
|
||
matchingParenthesis =
|
||
(++) <$> ((:) <$> char '(' <*> literalStringBlock) <*> string ")"
|
||
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
||
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
|
||
|
||
--
|
||
-- Name
|
||
--
|
||
newtype Name = Name String deriving (Eq, Ord, Show)
|
||
|
||
instance Output Name where
|
||
output (Name n) = "/" `mappend` Output.string n
|
||
|
||
name :: Parser u Name
|
||
name = Name <$> (char '/' *> many regular)
|
||
|
||
--
|
||
-- Array
|
||
--
|
||
array :: Parser u [DirectObject]
|
||
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
|
||
|
||
--
|
||
-- Dictionary
|
||
--
|
||
type Dictionary = Map Name DirectObject
|
||
|
||
instance Output Dictionary where
|
||
output dict =
|
||
"<<" `mappend` keyValues `mappend` ">>"
|
||
where
|
||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
||
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
|
||
|
||
dictionary :: Parser u Dictionary
|
||
dictionary =
|
||
try (string "<<" *> blank *> keyValPairs <* string ">>")
|
||
where
|
||
keyVal = (,) <$> name <* blank <*> directObject
|
||
keyValPairs = Map.fromList <$> keyVal `endBy` blank
|
||
|
||
--
|
||
-- Null
|
||
--
|
||
nullObject :: Parser u ()
|
||
nullObject = string "null" *> return ()
|
||
|
||
--
|
||
-- Reference
|
||
--
|
||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||
objectId :: Int
|
||
, versionNumber :: Int
|
||
} deriving Show
|
||
|
||
reference :: Parser u IndirectObjCoordinates
|
||
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
|
||
|
||
--
|
||
-- DirectObject
|
||
--
|
||
data DirectObject =
|
||
Boolean Bool
|
||
| NumberObject Number
|
||
| StringObject StringObject
|
||
| NameObject Name
|
||
| Array [DirectObject]
|
||
| Dictionary Dictionary
|
||
| Null
|
||
| Reference IndirectObjCoordinates
|
||
deriving Show
|
||
|
||
instance Output DirectObject where
|
||
output (Boolean b) = output b
|
||
output (NumberObject n) = output n
|
||
output (StringObject s) = output s
|
||
output (NameObject n) = output n
|
||
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||
output (Dictionary d) = output d
|
||
output (Null) = "null"
|
||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||
Output.string (printf "%d %d R" objectId versionNumber)
|
||
|
||
directObject :: Parser u DirectObject
|
||
directObject =
|
||
Boolean <$> try boolean
|
||
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
|
||
<|> NumberObject <$> try number
|
||
<|> StringObject <$> try stringObject
|
||
<|> NameObject <$> try name
|
||
<|> Array <$> try array
|
||
<|> Dictionary <$> try dictionary
|
||
<|> const Null <$> try nullObject
|
||
|
||
--
|
||
-- Object
|
||
--
|
||
data Object =
|
||
Direct DirectObject
|
||
| Stream {
|
||
header :: Dictionary
|
||
, streamContent :: ByteString
|
||
}
|
||
deriving Show
|
||
|
||
instance Output Object where
|
||
output (Direct d) = output d
|
||
output (Stream {header, streamContent}) =
|
||
output header
|
||
`nextLine` "stream"
|
||
`nextLine` byteString streamContent
|
||
`mappend` "endstream"
|
||
|
||
|
||
--
|
||
-- Occurrence
|
||
--
|
||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||
|
||
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||
outputOccurrence _ (Comment c) =
|
||
Output.string (printf "%%%s" c) `mappend` newLine
|
||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||
saveOffset (ObjectId objectId)
|
||
>> Output.string (printf "%d %d obj" objectId versionNumber)
|
||
`nextLine` output (objects ! objectId)
|
||
`nextLine` "endobj" `mappend` newLine
|
||
|
||
-------------------------------------
|
||
-- XREF TABLE
|
||
-------------------------------------
|
||
|
||
--
|
||
-- XRefEntry
|
||
--
|
||
data XRefEntry = InUse {
|
||
offset :: Int64
|
||
, generation :: Int
|
||
} | Free {
|
||
nextFree :: Int64
|
||
, generation :: Int
|
||
} deriving Show
|
||
|
||
instance Output XRefEntry where
|
||
output (InUse {offset, generation}) =
|
||
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
||
output (Free {nextFree, generation}) =
|
||
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
||
|
||
entry :: Parser u XRefEntry
|
||
entry = do
|
||
(big, small) <- (,) <$> integer <*> integer
|
||
(inUse big small <|> free big small) <* blank
|
||
where
|
||
inUse :: Int64 -> Int -> Parser u XRefEntry
|
||
inUse offset generation = char 'n' *> return (InUse {offset, generation})
|
||
free :: Int64 -> Int -> Parser u XRefEntry
|
||
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
||
|
||
|
||
--
|
||
-- XRefSubSection
|
||
--
|
||
data XRefSubSection = XRefSubSection {
|
||
firstObjectId :: Int
|
||
, entriesNumber :: Int
|
||
, entries :: Map Int XRefEntry
|
||
} deriving Show
|
||
|
||
instance Output XRefSubSection where
|
||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||
`nextLine` output (Map.elems entries)
|
||
|
||
xrefSubSection :: Parser u XRefSubSection
|
||
xrefSubSection = do
|
||
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
|
||
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
||
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
||
|
||
type XRefSection = [XRefSubSection]
|
||
|
||
--
|
||
-- Structure
|
||
--
|
||
data Structure = Structure {
|
||
startOffset :: Int64
|
||
, xrefSection :: XRefSection
|
||
, inputTrailer :: Dictionary
|
||
--, startXRef :: Int64
|
||
} deriving Show
|
||
|
||
structure :: Parser u Structure
|
||
structure =
|
||
Structure 0
|
||
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
||
<*> (line "trailer" *> dictionary <* EOL.parser)
|
||
-- <*> (line "startxref" *> integer)
|
||
|
||
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
|
||
xrefFromOffsets offsets = (
|
||
[]
|
||
, offsets ! StartXRef
|
||
)
|
||
|
||
--
|
||
-- Flow
|
||
--
|
||
data Flow = Flow {
|
||
occurrencesStack :: [Occurrence]
|
||
, tmpObjects :: Map Int Object
|
||
} deriving Show
|
||
|
||
--
|
||
-- Content
|
||
--
|
||
data Content = Content {
|
||
occurrences :: [Occurrence]
|
||
, objects :: Map Int Object
|
||
, trailer :: Dictionary
|
||
} deriving Show
|
||
|
||
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
||
outputBody (occurrences, objects) =
|
||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||
|
||
instance Output Content where
|
||
output (Content {occurrences, objects, trailer}) =
|
||
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
|
||
>>= \(body, (xref, startXRef)) ->
|
||
body
|
||
`mappend` "xref"
|
||
`nextLine` "trailer"
|
||
--`nextLine` output xrefSection
|
||
--`mappend` "trailer"
|
||
`nextLine` output trailer
|
||
`nextLine` "startxref"
|
||
`nextLine` (Output.string (printf "%d" startXRef))
|
||
`nextLine` byteString eofMarker
|