Hufflepdf/src/PDF/Object.hs

382 lines
11 KiB
Haskell
Raw Normal View History

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object (
Dictionary
, DirectObject(..)
, Flow(..)
, IndexedObjects
, IndirectObjCoordinates(..)
2019-05-18 09:01:13 +02:00
, InputStructure(..)
, Name(..)
, Number(..)
, Object(..)
, Occurrence(..)
, StringObject(..)
, Structure(..)
, XRefEntry(..)
, XRefSection
, array
, blank
, dictionary
, directObject
, eofMarker
, integer
, line
, magicNumber
, name
, number
, outputBody
, regular
, stringObject
, structure
, toByteString
) where
import Control.Applicative ((<|>), many)
import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (concat)
import qualified Data.ByteString.Char8 as Char8 (
cons, length, pack, singleton, snoc, unpack
)
import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
import Data.Map (Map, (!))
import qualified Data.Map as Map (
delete, empty, fromList, lookup, minViewWithKey, toList, union
)
import qualified Data.Set as Set (fromList, member)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (line, string)
import PDF.Output (
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
, byteString, getObjectId, getOffset, join, newLine, saveOffset
)
import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
import Text.Printf (printf)
line :: MonadParser m => String -> m ()
2019-11-27 18:05:47 +01:00
line l = (string (Char8.pack l) *> blank *> return ()) <?> printf "line «%s»" l
2019-05-24 10:48:09 +02:00
magicNumber :: ByteString
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 "
blank :: MonadParser m => m ()
blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure ()
delimiterCharset :: String
delimiterCharset = "()<>[]{}/%"
2019-05-24 10:48:09 +02:00
regular :: Char -> Bool
regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
integer :: MonadParser m => m Int
integer = decNumber <* blank <?> "decimal integer"
-------------------------------------
-- OBJECTS
-------------------------------------
type IndexedObjects = Map ObjectId Object
--
-- Boolean
--
boolean :: MonadParser m => m Bool
2019-05-24 10:48:09 +02:00
boolean =
(string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
--
-- Number
--
newtype Number = Number Double deriving (Eq, Show)
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: MonadParser m => m Number
number = Number <$> (sign <*> value) <?> "number"
where
sign = (string "-" *> return negate) <|> option id (char '+' >> return id)
value = floatNumber <|> (char '.' *> afterPoint)
afterPoint = read . ("0." ++) . Char8.unpack <$> takeAll (`Set.member` digits)
digits = Set.fromList ['0' .. '9']
--
-- StringObject
--
data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show)
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s))
output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n))
stringObject :: MonadParser m => m StringObject
stringObject =
Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
<|> Hexadecimal . roundBytes <$> (char '<' *> hexNumber <* char '>')
2019-05-24 10:48:09 +02:00
<?> "string object (literal or hexadecimal)"
where
2019-05-24 10:48:09 +02:00
literalString = many literalStringBlock
literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
normalChar = not . (`elem` ("\\()" :: String))
matchingParenthesis =
mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")"
2019-05-24 10:48:09 +02:00
escapedChar =
Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode)
octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3]
roundBytes (B16Int bs)
| Char8.length bs `mod` 2 == 1 = B16Int (bs `Char8.snoc` '0')
| otherwise = B16Int bs
toByteString :: StringObject -> ByteString
toByteString (Hexadecimal h) = b16ToBytes h
toByteString (Literal s) = unescape s
--
-- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = Output.string ('/':n)
name :: MonadParser m => m Name
name = Name . Char8.unpack <$> (char '/' *> takeAll regular) <?> "name"
--
-- Array
--
array :: MonadParser m => m [DirectObject]
2019-05-24 10:48:09 +02:00
array =
char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output aDictionary =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = mconcat [output key, " ", output val]
dictionary :: MonadParser m => m Dictionary
dictionary =
2019-05-24 10:48:09 +02:00
string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
where
keyVal = (,) <$> name <* blank <*> directObject
2019-05-24 10:48:09 +02:00
keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
--
-- Null
--
nullObject :: MonadParser m => m ()
2019-05-24 10:48:09 +02:00
nullObject = string "null" *> return () <?> "null object"
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: ObjectId
, versionNumber :: Int
} deriving Show
reference :: MonadParser m => m IndirectObjCoordinates
reference = IndirectObjCoordinates
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
--
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = mconcat ["[", join " " a, "]"]
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
directObject :: MonadParser m => m DirectObject
directObject = (peek >>= dispatch) <?> "direct object"
where
dispatch 't' = Boolean <$> boolean
dispatch 'f' = Boolean <$> boolean
dispatch '(' = StringObject <$> stringObject
dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary
dispatch '/' = NameObject <$> name
dispatch '[' = Array <$> array
dispatch 'n' = nullObject *> return Null
dispatch _ =
Reference <$> reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> number
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) = mconcat [
output header, newLine
, Output.line "stream"
, byteString streamContent
, "endstream"
]
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) = Output.line c
2019-05-16 22:41:14 +02:00
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (Object objectId) >> mconcat [
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
, output (objects ! objectId), newLine
, Output.line "endobj"
]
2019-05-16 22:41:14 +02:00
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Offset
, generation :: Int
} | Free {
nextFree :: ObjectId
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.line (printf "%010d %05d n " (getOffset offset) generation)
output (Free {nextFree, generation}) =
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
entry :: Parser u XRefEntry
entry = do
(big, small) <- (,) <$> integer <*> integer
2019-05-24 10:48:09 +02:00
(inUse big small <|> free big small <?> "XRef entry") <* blank
where
2019-05-24 10:48:09 +02:00
inUse :: Int -> Int -> Parser u XRefEntry
inUse big generation =
char 'n' *> return (InUse {offset = Offset big, generation})
2019-05-24 10:48:09 +02:00
free :: Int -> Int -> Parser u XRefEntry
free big generation =
char 'f' *> return (Free {nextFree = ObjectId big, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: ObjectId
, entries :: [XRefEntry]
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entries}) =
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
`mappend` output entries
xRefSubSection :: Parser u XRefSubSection
xRefSubSection = do
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- count entriesNumber entry
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
type XRefSection = Map ObjectId XRefEntry
instance Output XRefSection where
output = output . sections
where
sections tmp =
case Map.minViewWithKey tmp of
Nothing -> []
Just ((objectId@(ObjectId value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest
section firstObjectId stack nextValue tmp =
let nextId = ObjectId nextValue in
case Map.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry ->
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
xRefSection :: Parser u XRefSection
xRefSection = foldr addSubsection Map.empty <$>
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
where
addSubsection (XRefSubSection {firstObjectId, entries}) =
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
--
-- Structure
--
2019-05-18 09:01:13 +02:00
data InputStructure = InputStructure {
2019-05-24 10:48:09 +02:00
startOffset :: Int
2019-05-18 09:01:13 +02:00
, inputStructure :: Structure
}
data Structure = Structure {
xRef :: XRefSection
2019-05-18 09:01:13 +02:00
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
2019-05-18 09:01:13 +02:00
Structure
<$> xRefSection
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: IndexedObjects
} deriving Show