Simplify XRef structure, clarify integer types and remove nextLine

This commit is contained in:
Tissevert 2019-09-20 22:39:14 +02:00
parent dd79cb3fc7
commit 699f830a45
4 changed files with 148 additions and 116 deletions

View File

@ -19,21 +19,21 @@ import PDF.Object (
, Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, string)
import PDF.Output (Output(..), nextLine)
import qualified PDF.Output as Output (render, line)
import PDF.Output (Output(..))
import PDF.Parser (Parser, runParser, string, takeAll)
import Text.Printf (printf)
data Document = Document {
pdfVersion :: String
, eolStyle :: EOL.Style
, contents :: [Content]
, updates :: [Content]
} deriving Show
instance Output Document where
output (Document {pdfVersion, contents}) =
Output.string (printf "%%PDF-%s" pdfVersion)
`nextLine` output contents
output (Document {pdfVersion, updates}) =
Output.line (printf "%%PDF-%s" pdfVersion)
`mappend` output updates
render :: Document -> Lazy.ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document
@ -99,5 +99,5 @@ parseDocument input = do
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
startXref <- readStartXref eolStyle input
structuresRead <- readStructures startXref input
let contents = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, contents}
let updates = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, updates}

View File

@ -14,9 +14,10 @@ import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
, Structure(..), XRefEntry(..), XRefSection
, blank, dictionary, directObject, integer, line
)
import PDF.Output (ObjectId(..), Offset(..))
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
data UserState = UserState {
@ -30,7 +31,7 @@ type SParser = Parser UserState
modifyFlow :: (Flow -> Flow) -> SParser ()
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
addObject :: Int -> Object -> SParser ()
addObject :: ObjectId -> Object -> SParser ()
addObject objectId newObject = modifyFlow $ \flow -> flow {
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
}
@ -45,29 +46,20 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
where
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
lookupOffset :: Int -> XRefSection -> Maybe Int
lookupOffset _ [] = Nothing
lookupOffset objectId (xrefSubSection:others) =
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
let index = objectId - firstObjectId in
if index >= 0 && index < entriesNumber
then
case Map.lookup index entries of
Just (InUse {offset}) -> Just offset
_ -> Nothing
else lookupOffset objectId others
getOffset :: Int -> SParser Int
getOffset objectId = do
lookupOffset :: ObjectId -> SParser Offset
lookupOffset objectId = do
table <- gets xreferences
case lookupOffset objectId table of
case Map.lookup objectId table >>= entryOffset of
Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
where
entryOffset (InUse {offset}) = Just offset
entryOffset _ = Nothing
loadNumber :: Int -> SParser Float
loadNumber :: ObjectId -> SParser Float
loadNumber objectId = do
offset <- getOffset objectId
offset <- getOffset <$> lookupOffset objectId
objectStart <- BS.drop offset <$> gets input
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
objectValue <- (!objectId) . tmpObjects <$> gets flow
@ -104,7 +96,7 @@ object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do
objectId <- integer
objectId <- ObjectId <$> integer
coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue
@ -126,7 +118,7 @@ populate input structure =
}
where
docStructure = inputStructure structure
xreferences = xrefSection docStructure
xreferences = xRef docStructure
initialState = UserState {
input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty

View File

@ -14,7 +14,6 @@ module PDF.Object (
, Structure(..)
, XRefEntry(..)
, XRefSection
, XRefSubSection(..)
, blank
, dictionary
, directObject
@ -31,12 +30,15 @@ import qualified Data.ByteString.Char8 as BS (
concat, cons, pack, singleton, unpack
)
import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map (elems, fromList, toList)
import qualified Data.Map as Map (
delete, empty, fromList, lookup, minViewWithKey, toList, union
)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
import qualified PDF.Output as Output (concat, line, string)
import PDF.Output (
OBuilder, Offset(..), Output(..)
, byteString, getOffsets, join, newLine, nextLine, saveOffset
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
, byteString, getObjectId, getOffset, getOffsets, join, newLine
, saveOffset
)
import PDF.Parser (
Parser, (<?>)
@ -73,6 +75,8 @@ integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
-- OBJECTS
-------------------------------------
type IndexedObjects = Map ObjectId Object
--
-- Boolean
--
@ -130,7 +134,7 @@ stringObject =
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = "/" `mappend` Output.string n
output (Name n) = Output.string ('/':n)
name :: Parser u Name
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
@ -153,7 +157,7 @@ instance Output Dictionary where
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
outputKeyVal (key, val) = Output.concat [output key, " ", output val]
dictionary :: Parser u Dictionary
dictionary =
@ -172,12 +176,13 @@ nullObject = string "null" *> return () <?> "null object"
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
objectId :: ObjectId
, versionNumber :: Int
} deriving Show
reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
reference = IndirectObjCoordinates
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
--
-- DirectObject
@ -198,11 +203,11 @@ instance Output DirectObject where
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Array a) = Output.concat ["[", join " " a, "]"]
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
directObject :: Parser u DirectObject
directObject =
@ -229,25 +234,26 @@ data Object =
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream"
output (Stream {header, streamContent}) = Output.concat [
output header, newLine
, Output.line "stream"
, byteString streamContent
, "endstream"
]
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string c `mappend` newLine
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) = Output.line c
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId objectId)
>> Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
saveOffset (Object objectId) >> Output.concat [
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
, output (objects ! objectId), newLine
, Output.line "endobj"
]
-------------------------------------
-- XREF TABLE
@ -257,18 +263,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Int
offset :: Offset
, generation :: Int
} | Free {
nextFree :: Int
nextFree :: ObjectId
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
Output.line (printf "%010d %05d n " (getOffset offset) generation)
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
entry :: Parser u XRefEntry
entry = do
@ -276,31 +282,56 @@ entry = do
(inUse big small <|> free big small <?> "XRef entry") <* blank
where
inUse :: Int -> Int -> Parser u XRefEntry
inUse offset generation = char 'n' *> return (InUse {offset, generation})
inUse big generation =
char 'n' *> return (InUse {offset = Offset big, generation})
free :: Int -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
free big generation =
char 'f' *> return (Free {nextFree = ObjectId big, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
firstObjectId :: ObjectId
, entries :: [XRefEntry]
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries)
output (XRefSubSection {firstObjectId, entries}) =
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
`mappend` output entries
xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
xRefSubSection :: Parser u XRefSubSection
xRefSubSection = do
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- count entriesNumber entry
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
type XRefSection = [XRefSubSection]
type XRefSection = Map ObjectId XRefEntry
instance Output XRefSection where
output = output . sections
where
sections tmp =
case Map.minViewWithKey tmp of
Nothing -> []
Just ((objectId@(ObjectId value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest
section firstObjectId stack nextValue tmp =
let nextId = ObjectId nextValue in
case Map.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry ->
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
xRefSection :: Parser u XRefSection
xRefSection = foldr addSubsection Map.empty <$>
(line "xref" *> xRefSubSection `sepBy` EOL.parser)
where
addSubsection (XRefSubSection {firstObjectId, entries}) =
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
--
-- Structure
@ -311,34 +342,28 @@ data InputStructure = InputStructure {
}
data Structure = Structure {
xrefSection :: XRefSection
xRef :: XRefSection
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
Structure
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<$> xRefSection
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
updateXrefs xrefSection offsets = (
updateSubSection <$> xrefSection
, offsets ! StartXRef
)
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
where
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
updateEntry firstObjectId index e@(InUse {}) =
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
updateEntry _ _ e = e
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
updateEntry _ e = e
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: Map Int Object
, tmpObjects :: IndexedObjects
} deriving Show
--
@ -346,25 +371,26 @@ data Flow = Flow {
--
data Content = Content {
occurrences :: [Occurrence]
, objects :: Map Int Object
, objects :: IndexedObjects
, docStructure :: Structure
} deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
output (Content {occurrences, objects, docStructure}) =
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) ->
body
`mappend` "xref"
`nextLine` output xref
`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) -> Output.concat [
body
, Output.line "xref"
, output xref
, Output.line "trailer"
, output trailer, newLine
, Output.line "startxref"
, Output.line (printf "%d" (getOffset startXRef))
, byteString eofMarker
]
where
Structure {xrefSection, trailer} = docStructure
Structure {xRef, trailer} = docStructure

View File

@ -2,17 +2,21 @@
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module PDF.Output (
OBuilder
, ObjectId(..)
, OContext(..)
, Offset(..)
, Output(..)
, Resource(..)
, byteString
, char
, concat
, getOffsets
, join
, line
, newLine
, nextLine
, saveOffset
, string
, render
@ -28,10 +32,14 @@ import qualified Data.Map as Map (singleton)
import Data.String (IsString(..))
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
import qualified PDF.EOL as EOL (Style(..))
import Prelude hiding (concat)
data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
newtype Offset = Offset {getOffset :: Int} deriving (Show)
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
type OBuilder = OContext Builder
instance Functor OContext where
@ -44,20 +52,23 @@ instance Applicative OContext where
instance Monad OContext where
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
saveOffset :: Offset -> OContext ()
saveOffset offset = OContext $
get >>= tell . Map.singleton offset
saveOffset :: Resource -> OContext ()
saveOffset resource = OContext $
get >>= tell . Map.singleton resource
lift :: (a -> Builder) -> a -> OBuilder
lift f a = return (f a)
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
getOffsets (OContext builder) =
OContext (listen builder >>= \(a, w) -> return (return a, w))
append :: OBuilder -> OBuilder -> OBuilder
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
concat :: [OBuilder] -> OBuilder
concat = foldl mappend mempty
#if MIN_VERSION_base(4,11,0)
instance Semigroup OBuilder where
(<>) = append
@ -83,7 +94,7 @@ instance Output Bool where
output True = string "true"
instance Output a => Output [a] where
output = foldl mappend mempty . fmap output
output = concat . fmap output
join :: Output a => String -> [a] -> OBuilder
join _ [] = mempty
@ -91,27 +102,30 @@ join _ [a] = output a
join separator (a:as) =
output a `mappend` string separator `mappend` (join separator as)
newLine :: OBuilder
newLine = OContext $ buildEOL =<< ask
where
buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
offset :: (Int -> Int) -> OContext ()
offset f = OContext $ modify $ \(Offset o) -> Offset (f o)
nextLine :: OBuilder -> OBuilder -> OBuilder
nextLine a b = a `mappend` newLine `mappend` b
newLine :: OBuilder
newLine = buildEOL =<< OContext ask
where
buildEOL EOL.CR = return (char8 '\r') <* offset (+1)
buildEOL EOL.LF = return (char8 '\n') <* offset (+1)
buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2)
char :: Char -> OBuilder
char c = lift char8 c <* OContext (modify (+1))
char c = lift char8 c <* offset (+1)
string :: String -> OBuilder
string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
string s = lift string8 s <* offset (+ toEnum (length s))
line :: String -> OBuilder
line l = string l `mappend` newLine
byteString :: ByteString -> OBuilder
byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
byteString bs = lift B.byteString bs <* offset (+ BS.length bs)
render :: Output a => EOL.Style -> a -> Lazy.ByteString
render eolStyle a =
let OContext builder = output a in
let (outputByteString, _, _) = runRWS builder eolStyle 0 in
let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in
toLazyByteString outputByteString