From 699f830a456b029f05a42db057ef66f8b5d4fee7 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Fri, 20 Sep 2019 22:39:14 +0200 Subject: [PATCH] Simplify XRef structure, clarify integer types and remove nextLine --- src/PDF.hs | 16 ++--- src/PDF/Body.hs | 34 ++++------ src/PDF/Object.hs | 160 +++++++++++++++++++++++++++------------------- src/PDF/Output.hs | 54 ++++++++++------ 4 files changed, 148 insertions(+), 116 deletions(-) diff --git a/src/PDF.hs b/src/PDF.hs index 6c3dffd..755365f 100644 --- a/src/PDF.hs +++ b/src/PDF.hs @@ -19,21 +19,21 @@ import PDF.Object ( , Structure(..) , eofMarker, magicNumber, structure ) -import qualified PDF.Output as Output (render, string) -import PDF.Output (Output(..), nextLine) +import qualified PDF.Output as Output (render, line) +import PDF.Output (Output(..)) import PDF.Parser (Parser, runParser, string, takeAll) import Text.Printf (printf) data Document = Document { pdfVersion :: String , eolStyle :: EOL.Style - , contents :: [Content] + , updates :: [Content] } deriving Show instance Output Document where - output (Document {pdfVersion, contents}) = - Output.string (printf "%%PDF-%s" pdfVersion) - `nextLine` output contents + output (Document {pdfVersion, updates}) = + Output.line (printf "%%PDF-%s" pdfVersion) + `mappend` output updates render :: Document -> Lazy.ByteString render document@(Document {eolStyle}) = Output.render eolStyle document @@ -99,5 +99,5 @@ parseDocument input = do (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input startXref <- readStartXref eolStyle input structuresRead <- readStructures startXref input - let contents = populate input <$> structuresRead - return $ Document {pdfVersion, eolStyle, contents} + let updates = populate input <$> structuresRead + return $ Document {pdfVersion, eolStyle, updates} diff --git a/src/PDF/Body.hs b/src/PDF/Body.hs index f9ad4fd..2f07bba 100644 --- a/src/PDF/Body.hs +++ b/src/PDF/Body.hs @@ -14,9 +14,10 @@ import qualified PDF.EOL as EOL (charset, parser) import PDF.Object ( Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..) , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..) - , Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..) + , Structure(..), XRefEntry(..), XRefSection , blank, dictionary, directObject, integer, line ) +import PDF.Output (ObjectId(..), Offset(..)) import PDF.Parser (Parser, (), block, char, on, option, runParser, takeAll) data UserState = UserState { @@ -30,7 +31,7 @@ type SParser = Parser UserState modifyFlow :: (Flow -> Flow) -> SParser () modifyFlow f = modify $ \state -> state {flow = f $ flow state} -addObject :: Int -> Object -> SParser () +addObject :: ObjectId -> Object -> SParser () addObject objectId newObject = modifyFlow $ \flow -> flow { tmpObjects = Map.insert objectId newObject $ tmpObjects flow } @@ -45,29 +46,20 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser) where afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset)) -lookupOffset :: Int -> XRefSection -> Maybe Int -lookupOffset _ [] = Nothing -lookupOffset objectId (xrefSubSection:others) = - let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in - let index = objectId - firstObjectId in - if index >= 0 && index < entriesNumber - then - case Map.lookup index entries of - Just (InUse {offset}) -> Just offset - _ -> Nothing - else lookupOffset objectId others - -getOffset :: Int -> SParser Int -getOffset objectId = do +lookupOffset :: ObjectId -> SParser Offset +lookupOffset objectId = do table <- gets xreferences - case lookupOffset objectId table of + case Map.lookup objectId table >>= entryOffset of Nothing -> fail $ "obj " ++ show objectId ++ " is referenced but missing in XRef table" Just offset -> return offset + where + entryOffset (InUse {offset}) = Just offset + entryOffset _ = Nothing -loadNumber :: Int -> SParser Float +loadNumber :: ObjectId -> SParser Float loadNumber objectId = do - offset <- getOffset objectId + offset <- getOffset <$> lookupOffset objectId objectStart <- BS.drop offset <$> gets input indirectObjCoordinates `on` (objectStart :: ByteString) >> return () objectValue <- (!objectId) . tmpObjects <$> gets flow @@ -104,7 +96,7 @@ object = streamObject <|> Direct <$> directObject indirectObjCoordinates :: SParser IndirectObjCoordinates indirectObjCoordinates = do - objectId <- integer + objectId <- ObjectId <$> integer coordinates <- IndirectObjCoordinates objectId <$> integer objectValue <- line "obj" *> object <* blank <* line "endobj" addObject objectId objectValue @@ -126,7 +118,7 @@ populate input structure = } where docStructure = inputStructure structure - xreferences = xrefSection docStructure + xreferences = xRef docStructure initialState = UserState { input, xreferences, flow = Flow { occurrencesStack = [], tmpObjects = Map.empty diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 947ff75..caba73b 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -14,7 +14,6 @@ module PDF.Object ( , Structure(..) , XRefEntry(..) , XRefSection - , XRefSubSection(..) , blank , dictionary , directObject @@ -31,12 +30,15 @@ import qualified Data.ByteString.Char8 as BS ( concat, cons, pack, singleton, unpack ) import Data.Map (Map, (!), mapWithKey) -import qualified Data.Map as Map (elems, fromList, toList) +import qualified Data.Map as Map ( + delete, empty, fromList, lookup, minViewWithKey, toList, union + ) import qualified PDF.EOL as EOL (charset, parser) -import qualified PDF.Output as Output (string) +import qualified PDF.Output as Output (concat, line, string) import PDF.Output ( - OBuilder, Offset(..), Output(..) - , byteString, getOffsets, join, newLine, nextLine, saveOffset + OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) + , byteString, getObjectId, getOffset, getOffsets, join, newLine + , saveOffset ) import PDF.Parser ( Parser, () @@ -73,6 +75,8 @@ integer = read . BS.unpack <$> decNumber <* blank "decimal integer" -- OBJECTS ------------------------------------- +type IndexedObjects = Map ObjectId Object + -- -- Boolean -- @@ -130,7 +134,7 @@ stringObject = newtype Name = Name String deriving (Eq, Ord, Show) instance Output Name where - output (Name n) = "/" `mappend` Output.string n + output (Name n) = Output.string ('/':n) name :: Parser u Name name = Name . BS.unpack <$> (char '/' *> takeAll regular) "name" @@ -153,7 +157,7 @@ instance Output Dictionary where where keyValues = join " " $ outputKeyVal <$> Map.toList dict outputKeyVal :: (Name, DirectObject) -> OBuilder - outputKeyVal (key, val) = output key `mappend` " " `mappend` output val + outputKeyVal (key, val) = Output.concat [output key, " ", output val] dictionary :: Parser u Dictionary dictionary = @@ -172,12 +176,13 @@ nullObject = string "null" *> return () "null object" -- Reference -- data IndirectObjCoordinates = IndirectObjCoordinates { - objectId :: Int + objectId :: ObjectId , versionNumber :: Int } deriving Show reference :: Parser u IndirectObjCoordinates -reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' "reference to an object" +reference = IndirectObjCoordinates + <$> (fmap ObjectId integer) <*> integer <* char 'R' "reference to an object" -- -- DirectObject @@ -198,11 +203,11 @@ instance Output DirectObject where output (NumberObject n) = output n output (StringObject s) = output s output (NameObject n) = output n - output (Array a) = "[" `mappend` join " " a `mappend` "]" + output (Array a) = Output.concat ["[", join " " a, "]"] output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = - Output.string (printf "%d %d R" objectId versionNumber) + Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) directObject :: Parser u DirectObject directObject = @@ -229,25 +234,26 @@ data Object = instance Output Object where output (Direct d) = output d - output (Stream {header, streamContent}) = - output header - `nextLine` "stream" - `nextLine` byteString streamContent - `mappend` "endstream" + output (Stream {header, streamContent}) = Output.concat [ + output header, newLine + , Output.line "stream" + , byteString streamContent + , "endstream" + ] -- -- Occurrence -- data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show -outputOccurrence :: Map Int Object -> Occurrence -> OBuilder -outputOccurrence _ (Comment c) = - Output.string c `mappend` newLine +outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder +outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = - saveOffset (ObjectId objectId) - >> Output.string (printf "%d %d obj" objectId versionNumber) - `nextLine` output (objects ! objectId) - `nextLine` "endobj" `mappend` newLine + saveOffset (Object objectId) >> Output.concat [ + Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) + , output (objects ! objectId), newLine + , Output.line "endobj" + ] ------------------------------------- -- XREF TABLE @@ -257,18 +263,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum -- XRefEntry -- data XRefEntry = InUse { - offset :: Int + offset :: Offset , generation :: Int } | Free { - nextFree :: Int + nextFree :: ObjectId , generation :: Int } deriving Show instance Output XRefEntry where output (InUse {offset, generation}) = - Output.string (printf "%010d %05d n " offset generation) `mappend` newLine + Output.line (printf "%010d %05d n " (getOffset offset) generation) output (Free {nextFree, generation}) = - Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine + Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) entry :: Parser u XRefEntry entry = do @@ -276,31 +282,56 @@ entry = do (inUse big small <|> free big small "XRef entry") <* blank where inUse :: Int -> Int -> Parser u XRefEntry - inUse offset generation = char 'n' *> return (InUse {offset, generation}) + inUse big generation = + char 'n' *> return (InUse {offset = Offset big, generation}) free :: Int -> Int -> Parser u XRefEntry - free nextFree generation = char 'f' *> return (Free {nextFree, generation}) + free big generation = + char 'f' *> return (Free {nextFree = ObjectId big, generation}) -- -- XRefSubSection -- data XRefSubSection = XRefSubSection { - firstObjectId :: Int - , entriesNumber :: Int - , entries :: Map Int XRefEntry + firstObjectId :: ObjectId + , entries :: [XRefEntry] } deriving Show instance Output XRefSubSection where - output (XRefSubSection {firstObjectId, entriesNumber, entries}) = - Output.string (printf "%d %d" firstObjectId entriesNumber) - `nextLine` output (Map.elems entries) + output (XRefSubSection {firstObjectId, entries}) = + Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) + `mappend` output entries -xrefSubSection :: Parser u XRefSubSection -xrefSubSection = do - (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" - entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry - return $ XRefSubSection {firstObjectId, entriesNumber, entries} +xRefSubSection :: Parser u XRefSubSection +xRefSubSection = do + (firstId, entriesNumber) <- (,) <$> integer <*> integer "XRef subsection" + entries <- count entriesNumber entry + return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} -type XRefSection = [XRefSubSection] +type XRefSection = Map ObjectId XRefEntry + +instance Output XRefSection where + output = output . sections + where + sections tmp = + case Map.minViewWithKey tmp of + Nothing -> [] + Just ((objectId@(ObjectId value), firstEntry), rest) -> + let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in + subSection : sections sndRest + section firstObjectId stack nextValue tmp = + let nextId = ObjectId nextValue in + case Map.lookup nextId tmp of + Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) + Just nextEntry -> + section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp) + + +xRefSection :: Parser u XRefSection +xRefSection = foldr addSubsection Map.empty <$> + (line "xref" *> xRefSubSection `sepBy` EOL.parser) + where + addSubsection (XRefSubSection {firstObjectId, entries}) = + Map.union . Map.fromList $ zip ([firstObjectId..]) entries -- -- Structure @@ -311,34 +342,28 @@ data InputStructure = InputStructure { } data Structure = Structure { - xrefSection :: XRefSection + xRef :: XRefSection , trailer :: Dictionary } deriving Show structure :: Parser u Structure structure = Structure - <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) + <$> xRefSection <*> (string "trailer" *> blank *> dictionary <* EOL.parser) -updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int) -updateXrefs xrefSection offsets = ( - updateSubSection <$> xrefSection - , offsets ! StartXRef - ) +updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset) +updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef) where - updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) = - subSection {entries = mapWithKey (updateEntry firstObjectId) entries} - updateEntry firstObjectId index e@(InUse {}) = - e {offset = offsets ! (ObjectId $ firstObjectId + index)} - updateEntry _ _ e = e + updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)} + updateEntry _ e = e -- -- Flow -- data Flow = Flow { occurrencesStack :: [Occurrence] - , tmpObjects :: Map Int Object + , tmpObjects :: IndexedObjects } deriving Show -- @@ -346,25 +371,26 @@ data Flow = Flow { -- data Content = Content { occurrences :: [Occurrence] - , objects :: Map Int Object + , objects :: IndexedObjects , docStructure :: Structure } deriving Show -outputBody :: ([Occurrence], Map Int Object) -> OBuilder +outputBody :: ([Occurrence], IndexedObjects) -> OBuilder outputBody (occurrences, objects) = output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef instance Output Content where output (Content {occurrences, objects, docStructure}) = - fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects)) - >>= \(body, (xref, startXRef)) -> - body - `mappend` "xref" - `nextLine` output xref - `mappend` "trailer" - `nextLine` output trailer - `nextLine` "startxref" - `nextLine` (Output.string (printf "%d" startXRef)) - `nextLine` byteString eofMarker + fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects)) + >>= \(body, (xref, startXRef)) -> Output.concat [ + body + , Output.line "xref" + , output xref + , Output.line "trailer" + , output trailer, newLine + , Output.line "startxref" + , Output.line (printf "%d" (getOffset startXRef)) + , byteString eofMarker + ] where - Structure {xrefSection, trailer} = docStructure + Structure {xRef, trailer} = docStructure diff --git a/src/PDF/Output.hs b/src/PDF/Output.hs index 27a77f1..82cbe99 100644 --- a/src/PDF/Output.hs +++ b/src/PDF/Output.hs @@ -2,17 +2,21 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} module PDF.Output ( OBuilder + , ObjectId(..) , OContext(..) , Offset(..) , Output(..) + , Resource(..) , byteString , char + , concat , getOffsets , join + , line , newLine - , nextLine , saveOffset , string , render @@ -28,10 +32,14 @@ import qualified Data.Map as Map (singleton) import Data.String (IsString(..)) import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell) import qualified PDF.EOL as EOL (Style(..)) +import Prelude hiding (concat) -data Offset = StartXRef | ObjectId Int deriving (Eq, Ord) +newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show) +newtype Offset = Offset {getOffset :: Int} deriving (Show) -newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a) +data Resource = StartXRef | Object ObjectId deriving (Eq, Ord) + +newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a) type OBuilder = OContext Builder instance Functor OContext where @@ -44,20 +52,23 @@ instance Applicative OContext where instance Monad OContext where (>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y)) -saveOffset :: Offset -> OContext () -saveOffset offset = OContext $ - get >>= tell . Map.singleton offset +saveOffset :: Resource -> OContext () +saveOffset resource = OContext $ + get >>= tell . Map.singleton resource lift :: (a -> Builder) -> a -> OBuilder lift f a = return (f a) -getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int) +getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset) getOffsets (OContext builder) = OContext (listen builder >>= \(a, w) -> return (return a, w)) append :: OBuilder -> OBuilder -> OBuilder append (OContext a) (OContext b) = OContext (mappend <$> a <*> b) +concat :: [OBuilder] -> OBuilder +concat = foldl mappend mempty + #if MIN_VERSION_base(4,11,0) instance Semigroup OBuilder where (<>) = append @@ -83,7 +94,7 @@ instance Output Bool where output True = string "true" instance Output a => Output [a] where - output = foldl mappend mempty . fmap output + output = concat . fmap output join :: Output a => String -> [a] -> OBuilder join _ [] = mempty @@ -91,27 +102,30 @@ join _ [a] = output a join separator (a:as) = output a `mappend` string separator `mappend` (join separator as) -newLine :: OBuilder -newLine = OContext $ buildEOL =<< ask - where - buildEOL EOL.CR = return (char8 '\r') <* modify (+1) - buildEOL EOL.LF = return (char8 '\n') <* modify (+1) - buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2) +offset :: (Int -> Int) -> OContext () +offset f = OContext $ modify $ \(Offset o) -> Offset (f o) -nextLine :: OBuilder -> OBuilder -> OBuilder -nextLine a b = a `mappend` newLine `mappend` b +newLine :: OBuilder +newLine = buildEOL =<< OContext ask + where + buildEOL EOL.CR = return (char8 '\r') <* offset (+1) + buildEOL EOL.LF = return (char8 '\n') <* offset (+1) + buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2) char :: Char -> OBuilder -char c = lift char8 c <* OContext (modify (+1)) +char c = lift char8 c <* offset (+1) string :: String -> OBuilder -string s = lift string8 s <* OContext (modify (+ toEnum (length s))) +string s = lift string8 s <* offset (+ toEnum (length s)) + +line :: String -> OBuilder +line l = string l `mappend` newLine byteString :: ByteString -> OBuilder -byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs)) +byteString bs = lift B.byteString bs <* offset (+ BS.length bs) render :: Output a => EOL.Style -> a -> Lazy.ByteString render eolStyle a = let OContext builder = output a in - let (outputByteString, _, _) = runRWS builder eolStyle 0 in + let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in toLazyByteString outputByteString