Improve getObj example to catch no-existing ObjectId and default to listing existing ObjectIds when none is provided

Fix a bug preventing Hufflepdf from reading objects with a ' ' after the obj keyword
Release 0.2.0.0
2019-11-29 11:53:08 +01:00 · 2019-11-27 18:01:19 +01:00 · 2019-10-14 10:16:14 +02:00 · 2019-09-22 01:40:39 +02:00 · 2019-09-22 01:37:28 +02:00 · 2019-09-22 01:34:17 +02:00
9 changed files with 293 additions and 120 deletions
--- a/ChangeLog.md
+++ b/ChangeLog.md
@ -1,5 +1,13 @@
 # Revision history for Hufflepdf
 ## 0.2.0.1  -- 2019-11-27
 * Fix bug discovered while running Hufflepdf on a PDF output from pdftk : magic keywords like `obj`, `stream` or `xref` can have spaces after them before the EOL
 ## 0.2.0.0  -- 2019-10-14
 * Implement PDF's multilayer update mechanism
 ## 0.1.1.1  -- 2019-05-31
 * Fix a bug preventing files with the «trailer» keyword on the same line as the dictionary that follows it to be parsed
--- a/Hufflepdf.cabal
+++ b/Hufflepdf.cabal
@ -2,7 +2,7 @@
 -- documentation, see http://haskell.org/cabal/users-guide/
 name:                Hufflepdf
-version:             0.1.1.1
+version:             0.2.0.1
 synopsis:            A PDF parser
 -- description:         
 license:             BSD3
@ -20,6 +20,7 @@ library
                     , PDF.EOL
                     , PDF.Object
                     , PDF.Output
                     , PDF.Update
  other-modules:       Data.ByteString.Char8.Util
                     , PDF.Body
                     , PDF.Parser
@ -32,3 +33,21 @@ library
  hs-source-dirs:      src
  ghc-options:         -Wall
  default-language:    Haskell2010
 executable             equivalent
  main-is:             examples/equivalent.hs
  build-depends:       base
                     , bytestring
                     , Hufflepdf
  ghc-options:         -Wall
  default-language:    Haskell2010
 executable             getObj
  main-is:             examples/getObj.hs
  build-depends:       base
                     , bytestring
                     , containers
                     , Hufflepdf
                     , zlib
  ghc-options:         -Wall
  default-language:    Haskell2010
--- a/examples/equivalent.hs
+++ b/examples/equivalent.hs
@ -0,0 +1,13 @@
 import qualified Data.ByteString.Char8 as BS (readFile)
 import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
 import PDF (parseDocument, render)
 import System.Environment (getArgs)
 import System.IO (hPutStrLn, stderr)
 main :: IO ()
 main = do
  [inputFile, outputFile] <- getArgs
  result <- parseDocument <$> BS.readFile inputFile
  case result of
    Left parseError -> hPutStrLn stderr parseError
    Right doc -> Lazy.writeFile outputFile $ render doc
--- a/examples/getObj.hs
+++ b/examples/getObj.hs
@ -0,0 +1,53 @@
 {-# LANGUAGE NamedFieldPuns #-}
 import Codec.Compression.Zlib (decompress)
 import Data.ByteString.Lazy.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (readFile)
 import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
 import Data.Map ((!?))
 import qualified Data.Map as Map (keys, lookup)
 import PDF (Document(..), parseDocument)
 import qualified PDF.EOL as EOL (Style)
 import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
 import PDF.Output (ObjectId(..))
 import qualified PDF.Output as Output (render)
 import PDF.Update (unify)
 import System.Environment (getArgs, getProgName)
 import System.Exit (die)
 import Text.Printf (printf)
 display :: EOL.Style -> Object -> ByteString
 display eolStyle d@(Direct _) = Output.render eolStyle d
 display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
  case Map.lookup (Name "Filter") header of
    Just (NameObject (Name "FlateDecode")) -> Stream {
          header
        , streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
      }
    _ -> s
 extractObject :: ObjectId -> Document -> Either String ByteString
 extractObject objectId (Document {eolStyle, updates}) =
  case objects content !? objectId of
    Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
    Just o -> Right $ display eolStyle o
  where
    content = unify updates
 listObjectIds :: Document -> Either String [String]
 listObjectIds =
  Right . prependTitle . toString . Map.keys . objects . unify . updates
  where
    toString = fmap (show . getObjectId)
    prependTitle = ("ObjectIds defined in this PDF:":)
 main :: IO ()
 main = do
  (inputFile, getData) <- parse =<< getArgs
  input <- BS.readFile inputFile
  either die id $ (parseDocument input >>= getData)
  where
    parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
    parse [inputFile, objectId] = return
      (inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
    parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
--- a/src/PDF.hs
+++ b/src/PDF.hs
@ -19,21 +19,21 @@ import PDF.Object (
    , Structure(..)
    , eofMarker, magicNumber, structure
  )
-import qualified PDF.Output as Output (render, string)
+import qualified PDF.Output as Output (render, line)
-import PDF.Output (Output(..), nextLine)
+import PDF.Output (Output(..))
 import PDF.Parser (Parser, runParser, string, takeAll)
 import Text.Printf (printf)
 data Document = Document {
      pdfVersion :: String
    , eolStyle :: EOL.Style
-    , contents :: [Content]
+    , updates :: [Content]
  } deriving Show
 instance Output Document where
-  output (Document {pdfVersion, contents}) =
+  output (Document {pdfVersion, updates}) =
-      Output.string (printf "%%PDF-%s" pdfVersion)
+      Output.line (printf "%%PDF-%s" pdfVersion)
-    `nextLine` output contents
+    `mappend` output updates
 render :: Document -> Lazy.ByteString
 render document@(Document {eolStyle}) = Output.render eolStyle document
@ -99,5 +99,5 @@ parseDocument input = do
  (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
  startXref <- readStartXref eolStyle input
  structuresRead <- readStructures startXref input
-  let contents = populate input <$> structuresRead
+  let updates = populate input <$> structuresRead
-  return $ Document {pdfVersion, eolStyle, contents}
+  return $ Document {pdfVersion, eolStyle, updates}
--- a/src/PDF/Body.hs
+++ b/src/PDF/Body.hs
@ -14,9 +14,10 @@ import qualified PDF.EOL as EOL (charset, parser)
 import PDF.Object (
      Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
    , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
-    , Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
+    , Structure(..), XRefEntry(..), XRefSection
    , blank, dictionary, directObject, integer, line
  )
 import PDF.Output (ObjectId(..), Offset(..))
 import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
 data UserState = UserState {
@ -30,7 +31,7 @@ type SParser = Parser UserState
 modifyFlow :: (Flow -> Flow) -> SParser ()
 modifyFlow f = modify $ \state -> state {flow = f $ flow state}
-addObject :: Int -> Object -> SParser ()
+addObject :: ObjectId -> Object -> SParser ()
 addObject objectId newObject = modifyFlow $ \flow -> flow {
    tmpObjects = Map.insert objectId newObject $ tmpObjects flow
  }
@ -45,29 +46,20 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
  where
    afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
-lookupOffset :: Int -> XRefSection -> Maybe Int
+lookupOffset :: ObjectId -> SParser Offset
-lookupOffset _ [] = Nothing
+lookupOffset objectId = do
 lookupOffset objectId (xrefSubSection:others) =
  let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
  let index = objectId - firstObjectId in
  if index >= 0 && index < entriesNumber
  then
    case Map.lookup index entries of
      Just (InUse {offset}) -> Just offset
      _ -> Nothing
  else lookupOffset objectId others
 getOffset :: Int -> SParser Int
 getOffset objectId = do
  table <- gets xreferences
-  case lookupOffset objectId table of
+  case Map.lookup objectId table >>= entryOffset of
    Nothing -> fail $
      "obj " ++ show objectId ++ " is referenced but missing in XRef table"
    Just offset -> return offset
  where
    entryOffset (InUse {offset}) = Just offset
    entryOffset _ = Nothing
-loadNumber :: Int -> SParser Float
+loadNumber :: ObjectId -> SParser Double
 loadNumber objectId = do
-  offset <- getOffset objectId
+  offset <- getOffset <$> lookupOffset objectId
  objectStart <- BS.drop offset <$> gets input
  indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
  objectValue <- (!objectId) . tmpObjects <$> gets flow
@ -78,7 +70,7 @@ loadNumber objectId = do
 invalidValue :: Object -> String
 invalidValue v = "Invalid value " ++ show v
-getSize :: Maybe DirectObject -> SParser Float
+getSize :: Maybe DirectObject -> SParser Double
 getSize Nothing = fail "Missing '/Length' key on stream"
 getSize (Just (NumberObject (Number size))) = return size
 getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
@ -104,7 +96,7 @@ object = streamObject <|> Direct <$> directObject
 indirectObjCoordinates :: SParser IndirectObjCoordinates
 indirectObjCoordinates = do
-  objectId <- integer
+  objectId <- ObjectId <$> integer
  coordinates <- IndirectObjCoordinates objectId <$> integer
  objectValue <- line "obj" *> object <* blank <* line "endobj"
  addObject objectId objectValue
@ -126,7 +118,7 @@ populate input structure =
        }
  where
    docStructure = inputStructure structure
-    xreferences = xrefSection docStructure
+    xreferences = xRef docStructure
    initialState = UserState {
        input, xreferences, flow = Flow {
            occurrencesStack = [], tmpObjects = Map.empty
--- a/src/PDF/Object.hs
+++ b/src/PDF/Object.hs
@ -5,6 +5,7 @@ module PDF.Object (
      Content(..)
    , DirectObject(..)
    , Flow(..)
    , IndexedObjects
    , IndirectObjCoordinates(..)
    , InputStructure(..)
    , Name(..)
@ -14,7 +15,6 @@ module PDF.Object (
    , Structure(..)
    , XRefEntry(..)
    , XRefSection
    , XRefSubSection(..)
    , blank
    , dictionary
    , directObject
@ -31,12 +31,15 @@ import qualified Data.ByteString.Char8 as BS (
    concat, cons, pack, singleton, unpack
  )
 import Data.Map (Map, (!), mapWithKey)
-import qualified Data.Map as Map (elems, fromList, toList)
+import qualified Data.Map as Map (
    delete, empty, fromList, lookup, minViewWithKey, toList, union
  )
 import qualified PDF.EOL as EOL (charset, parser)
-import qualified PDF.Output as Output (string)
+import qualified PDF.Output as Output (concat, line, string)
 import PDF.Output (
-      OBuilder, Offset(..), Output(..)
+      OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
-    , byteString, getOffsets, join, newLine, nextLine, saveOffset
+    , byteString, getObjectId, getOffset, getOffsets, join, newLine
    , saveOffset
  )
 import PDF.Parser (
      Parser, (<?>)
@ -46,7 +49,7 @@ import PDF.Parser (
 import Text.Printf (printf)
 line :: String -> Parser u ()
-line l = (string (BS.pack l) *> EOL.parser *> return ()) <?> printf "line «%s»" l
+line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
 magicNumber :: ByteString
 magicNumber = "%PDF-"
@ -73,6 +76,8 @@ integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
 --          OBJECTS
 -------------------------------------
 type IndexedObjects = Map ObjectId Object
 --
 -- Boolean
 --
@ -83,7 +88,7 @@ boolean =
 --
 -- Number
 --
-newtype Number = Number Float deriving Show
+newtype Number = Number Double deriving Show
 instance Output Number where
  output (Number f) = Output.string $
@ -130,7 +135,7 @@ stringObject =
 newtype Name = Name String deriving (Eq, Ord, Show)
 instance Output Name where
-  output (Name n) = "/" `mappend` Output.string n
+  output (Name n) = Output.string ('/':n)
 name :: Parser u Name
 name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
@ -153,7 +158,7 @@ instance Output Dictionary where
    where
      keyValues = join " " $ outputKeyVal <$> Map.toList dict
      outputKeyVal :: (Name, DirectObject) -> OBuilder
-      outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
+      outputKeyVal (key, val) = Output.concat [output key, " ", output val]
 dictionary :: Parser u Dictionary
 dictionary =
@ -172,12 +177,13 @@ nullObject = string "null" *> return () <?> "null object"
 -- Reference
 --
 data IndirectObjCoordinates = IndirectObjCoordinates {
-      objectId :: Int
+      objectId :: ObjectId
    , versionNumber :: Int
  } deriving Show
 reference :: Parser u IndirectObjCoordinates
-reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
+reference = IndirectObjCoordinates
  <$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
 --
 -- DirectObject
@ -198,11 +204,11 @@ instance Output DirectObject where
  output (NumberObject n) = output n
  output (StringObject s) = output s
  output (NameObject n) = output n
-  output (Array a) = "[" `mappend` join " " a `mappend` "]"
+  output (Array a) = Output.concat ["[", join " " a, "]"]
  output (Dictionary d) = output d
  output (Null) = "null"
  output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
-    Output.string (printf "%d %d R" objectId versionNumber)
+    Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
 directObject :: Parser u DirectObject
 directObject =
@ -229,25 +235,26 @@ data Object =
 instance Output Object where
  output (Direct d) = output d
-  output (Stream {header, streamContent}) =
+  output (Stream {header, streamContent}) = Output.concat [
-      output header
+        output header, newLine
-    `nextLine` "stream"
+      , Output.line "stream"
-    `nextLine` byteString streamContent
+      , byteString streamContent
-    `mappend` "endstream"
+      , "endstream"
    ]
 --
 -- Occurrence
 --
 data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
-outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
+outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
-outputOccurrence _ (Comment c) =
+outputOccurrence _ (Comment c) = Output.line c
  Output.string c `mappend` newLine
 outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
-    saveOffset (ObjectId objectId)
+  saveOffset (Object objectId) >> Output.concat [
-  >> Output.string (printf "%d %d obj" objectId versionNumber)
+        Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
-  `nextLine` output (objects ! objectId)
+      , output (objects ! objectId), newLine
-  `nextLine` "endobj" `mappend` newLine
+      , Output.line "endobj"
    ]
 -------------------------------------
 --          XREF TABLE
@ -257,18 +264,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
 -- XRefEntry
 --
 data XRefEntry = InUse {
-      offset :: Int
+      offset :: Offset
    , generation :: Int
  } | Free {
-      nextFree :: Int
+      nextFree :: ObjectId
    , generation :: Int
  } deriving Show
 instance Output XRefEntry where
  output (InUse {offset, generation}) =
-      Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
+      Output.line (printf "%010d %05d n " (getOffset offset) generation)
  output (Free {nextFree, generation}) =
-      Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
+      Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
 entry :: Parser u XRefEntry
 entry = do
@ -276,31 +283,56 @@ entry = do
  (inUse big small <|> free big small <?> "XRef entry") <* blank
  where
    inUse :: Int -> Int -> Parser u XRefEntry
-    inUse offset generation = char 'n' *> return (InUse {offset, generation})
+    inUse big generation =
      char 'n' *> return (InUse {offset = Offset big, generation})
    free :: Int -> Int -> Parser u XRefEntry
-    free nextFree generation = char 'f' *> return (Free {nextFree, generation})
+    free big generation =
      char 'f' *> return (Free {nextFree = ObjectId big, generation})
 --
 -- XRefSubSection
 --
 data XRefSubSection = XRefSubSection {
-      firstObjectId :: Int
+      firstObjectId :: ObjectId
-    , entriesNumber :: Int
+    , entries :: [XRefEntry]
    , entries :: Map Int XRefEntry
  } deriving Show
 instance Output XRefSubSection where
-  output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
+  output (XRefSubSection {firstObjectId, entries}) =
-      Output.string (printf "%d %d" firstObjectId entriesNumber)
+      Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
-    `nextLine` output (Map.elems entries)
+    `mappend` output entries
-xrefSubSection :: Parser u XRefSubSection
+xRefSubSection :: Parser u XRefSubSection
-xrefSubSection = do
+xRefSubSection = do
-  (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
+  (firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
-  entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
+  entries <- count entriesNumber entry
-  return $ XRefSubSection {firstObjectId, entriesNumber, entries}
+  return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
-type XRefSection = [XRefSubSection]
+type XRefSection = Map ObjectId XRefEntry
 instance Output XRefSection where
  output = output . sections
    where
      sections tmp =
        case Map.minViewWithKey tmp of
          Nothing -> []
          Just ((objectId@(ObjectId value), firstEntry), rest) ->
            let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
            subSection : sections sndRest
      section firstObjectId stack nextValue tmp =
        let nextId = ObjectId nextValue in
        case Map.lookup nextId tmp of
          Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
          Just nextEntry ->
            section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
 xRefSection :: Parser u XRefSection
 xRefSection = foldr addSubsection Map.empty <$>
  (line "xref" *> xRefSubSection `sepBy` many EOL.parser)
  where
    addSubsection (XRefSubSection {firstObjectId, entries}) =
      Map.union . Map.fromList $ zip ([firstObjectId..]) entries
 --
 -- Structure
@ -311,34 +343,28 @@ data InputStructure = InputStructure {
  }
 data Structure = Structure {
-      xrefSection :: XRefSection
+      xRef :: XRefSection
    , trailer :: Dictionary
  } deriving Show
 structure :: Parser u Structure
 structure =
    Structure
-  <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
+  <$> xRefSection
  <*> (string "trailer" *> blank *> dictionary <* EOL.parser)
-updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
+updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
-updateXrefs xrefSection offsets = (
+updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
      updateSubSection <$> xrefSection
    , offsets ! StartXRef
  )
  where
-    updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
+    updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
-      subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
+    updateEntry _ e = e
    updateEntry firstObjectId index e@(InUse {}) =
      e {offset = offsets ! (ObjectId $ firstObjectId + index)}
    updateEntry _ _ e = e
 --
 -- Flow
 --
 data Flow = Flow {
      occurrencesStack :: [Occurrence]
-    , tmpObjects :: Map Int Object
+    , tmpObjects :: IndexedObjects
  } deriving Show
 --
@ -346,25 +372,26 @@ data Flow = Flow {
 --
 data Content = Content {
      occurrences :: [Occurrence]
-    , objects :: Map Int Object
+    , objects :: IndexedObjects
    , docStructure :: Structure
  } deriving Show
-outputBody :: ([Occurrence], Map Int Object) -> OBuilder
+outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
 outputBody (occurrences, objects) =
  output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
 instance Output Content where
  output (Content {occurrences, objects, docStructure}) =
-    fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
+    fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
-    >>= \(body, (xref, startXRef)) ->
+    >>= \(body, (xref, startXRef)) -> Output.concat [
-        body
+          body
-      `mappend` "xref"
+        , Output.line "xref"
-      `nextLine` output xref
+        , output xref
-      `mappend` "trailer"
+        , Output.line "trailer"
-      `nextLine` output trailer
+        , output trailer, newLine
-      `nextLine` "startxref"
+        , Output.line "startxref"
-      `nextLine` (Output.string (printf "%d" startXRef))
+        , Output.line (printf "%d" (getOffset startXRef))
-      `nextLine` byteString eofMarker
+        , byteString eofMarker
      ]
    where
-      Structure {xrefSection, trailer} = docStructure
+      Structure {xRef, trailer} = docStructure
--- a/src/PDF/Output.hs
+++ b/src/PDF/Output.hs
@ -2,17 +2,21 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE AllowAmbiguousTypes #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 module PDF.Output (
      OBuilder
    , ObjectId(..)
    , OContext(..)
    , Offset(..)
    , Output(..)
    , Resource(..)
    , byteString
    , char
    , concat
    , getOffsets
    , join
    , line
    , newLine
    , nextLine
    , saveOffset
    , string
    , render
@ -28,10 +32,14 @@ import qualified Data.Map as Map (singleton)
 import Data.String (IsString(..))
 import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
 import qualified PDF.EOL as EOL (Style(..))
 import Prelude hiding (concat)
-data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
+newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
 newtype Offset = Offset {getOffset :: Int} deriving (Show)
-newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
+data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
 newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
 type OBuilder = OContext Builder
 instance Functor OContext where
@ -44,20 +52,23 @@ instance Applicative OContext where
 instance Monad OContext where
  (>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
-saveOffset :: Offset -> OContext ()
+saveOffset :: Resource -> OContext ()
-saveOffset offset = OContext $
+saveOffset resource = OContext $
-  get >>= tell . Map.singleton offset
+  get >>= tell . Map.singleton resource
 lift :: (a -> Builder) -> a -> OBuilder
 lift f a = return (f a)
-getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
+getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
 getOffsets (OContext builder) =
  OContext (listen builder >>= \(a, w) -> return (return a, w))
 append :: OBuilder -> OBuilder -> OBuilder
 append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
 concat :: [OBuilder] -> OBuilder
 concat = foldl mappend mempty
 #if MIN_VERSION_base(4,11,0)
 instance Semigroup OBuilder where
  (<>) = append
@ -83,7 +94,7 @@ instance Output Bool where
  output True = string "true"
 instance Output a => Output [a] where
-  output = foldl mappend mempty . fmap output
+  output = concat . fmap output
 join :: Output a => String -> [a] -> OBuilder
 join _ [] = mempty
@ -91,27 +102,30 @@ join _ [a] = output a
 join separator (a:as) =
  output a `mappend` string separator `mappend` (join separator as)
-newLine :: OBuilder
+offset :: (Int -> Int) -> OContext ()
-newLine = OContext $ buildEOL =<< ask
+offset f = OContext $ modify $ \(Offset o) -> Offset (f o)
  where
    buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
    buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
    buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
-nextLine :: OBuilder -> OBuilder -> OBuilder
+newLine :: OBuilder
-nextLine a b = a `mappend` newLine `mappend` b
+newLine = buildEOL =<< OContext ask
  where
    buildEOL EOL.CR = return (char8 '\r') <* offset (+1)
    buildEOL EOL.LF = return (char8 '\n') <* offset (+1)
    buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2)
 char :: Char -> OBuilder
-char c = lift char8 c <* OContext (modify (+1))
+char c = lift char8 c <* offset (+1)
 string :: String -> OBuilder
-string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
+string s = lift string8 s <* offset (+ toEnum (length s))
 line :: String -> OBuilder
 line l = string l `mappend` newLine
 byteString :: ByteString -> OBuilder
-byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
+byteString bs = lift B.byteString bs <* offset (+ BS.length bs)
 render :: Output a => EOL.Style -> a -> Lazy.ByteString
 render eolStyle a =
  let OContext builder = output a in
-  let (outputByteString, _, _) = runRWS builder eolStyle 0 in
+  let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in
  toLazyByteString outputByteString
--- a/src/PDF/Update.hs
+++ b/src/PDF/Update.hs
@ -0,0 +1,47 @@
 {-# LANGUAGE NamedFieldPuns #-}
 module PDF.Update (
    unify
  ) where
 import Data.Map (member)
 import qualified Data.Map as Map (empty, union)
 import PDF.Object (
      Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
    , Structure(..)
  )
 emptyContent :: Content
 emptyContent = Content {
      docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
    , objects = Map.empty
    , occurrences = []
  }
 unify :: [Content] -> Content
 unify = foldl complete emptyContent
  where
    complete tmpContent older =
      let mergedObjects = Map.union (objects tmpContent) (objects older) in
      Content {
          docStructure =
            unifyDocStructure (docStructure tmpContent) (docStructure older)
        , objects = mergedObjects
        , occurrences = 
            unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
      }
 unifyDocStructure :: Structure -> Structure -> Structure
 unifyDocStructure update original = Structure {
      xRef = Map.union (xRef update) (xRef original)
    , trailer = Map.union (trailer update) (trailer original)
  }
 unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
 unifyOccurrences objects update = foldr addOlder update
  where
    addOlder occurrence@(Comment _) existing = occurrence : existing
    addOlder occurrence@(Indirect indirect) existing =
      if objectId indirect `member` objects
      then occurrence : existing
      else existing
Author	SHA1	Message	Date
Tissevert	7eca875900	Improve getObj example to catch no-existing ObjectId and default to listing existing ObjectIds when none is provided	2019-11-29 11:53:08 +01:00
Tissevert	380c1e439b	Fix a bug preventing Hufflepdf from reading objects with a ' ' after the `obj` keyword	2019-11-27 18:01:19 +01:00
Tissevert	d6994f0813	Release 0.2.0.0	2019-10-14 10:16:14 +02:00
Tissevert	68f90d20e2	Implement PDF's multilayer updates and use it in getObj to display only the current version of the object taken into account instead of the concatenation of all its versions	2019-09-22 01:40:39 +02:00
Tissevert	3a39c75e6a	Stop requiring an empty line between subsections in a xref section	2019-09-22 01:37:28 +02:00
Tissevert	29c5823f34	Fix precision bug caused by using Floats to represent PDF Number values sometimes used to represent a byte offset within a file	2019-09-22 01:34:17 +02:00
Tissevert	9ab010de61	Add to example programs to show how the lib can be used	2019-09-20 22:42:17 +02:00
Tissevert	699f830a45	Simplify XRef structure, clarify integer types and remove nextLine	2019-09-20 22:39:14 +02:00