Compare commits

..

No commits in common. "main" and "v0.1.1.0" have entirely different histories.

9 changed files with 125 additions and 306 deletions

View file

@ -1,19 +1,5 @@
# Revision history for Hufflepdf # Revision history for Hufflepdf
## 0.2.0.1 -- 2019-11-27
* Fix bug discovered while running Hufflepdf on a PDF output from pdftk : magic keywords like `obj`, `stream` or `xref` can have spaces after them before the EOL
## 0.2.0.0 -- 2019-10-14
* Implement PDF's multilayer update mechanism
## 0.1.1.1 -- 2019-05-31
* Fix a bug preventing files with the «trailer» keyword on the same line as the dictionary that follows it to be parsed
* Improve error messages
* Support empty lines as comments
## 0.1.1.0 -- 2019-05-23 ## 0.1.1.0 -- 2019-05-23
* Rewrite the parser using attoparsec instead of parsec (about 1.5x faster) * Rewrite the parser using attoparsec instead of parsec (about 1.5x faster)

View file

@ -2,7 +2,7 @@
-- documentation, see http://haskell.org/cabal/users-guide/ -- documentation, see http://haskell.org/cabal/users-guide/
name: Hufflepdf name: Hufflepdf
version: 0.2.0.1 version: 0.1.0.0
synopsis: A PDF parser synopsis: A PDF parser
-- description: -- description:
license: BSD3 license: BSD3
@ -20,7 +20,6 @@ library
, PDF.EOL , PDF.EOL
, PDF.Object , PDF.Object
, PDF.Output , PDF.Output
, PDF.Update
other-modules: Data.ByteString.Char8.Util other-modules: Data.ByteString.Char8.Util
, PDF.Body , PDF.Body
, PDF.Parser , PDF.Parser
@ -33,21 +32,3 @@ library
hs-source-dirs: src hs-source-dirs: src
ghc-options: -Wall ghc-options: -Wall
default-language: Haskell2010 default-language: Haskell2010
executable equivalent
main-is: examples/equivalent.hs
build-depends: base
, bytestring
, Hufflepdf
ghc-options: -Wall
default-language: Haskell2010
executable getObj
main-is: examples/getObj.hs
build-depends: base
, bytestring
, containers
, Hufflepdf
, zlib
ghc-options: -Wall
default-language: Haskell2010

View file

@ -1,13 +0,0 @@
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
import PDF (parseDocument, render)
import System.Environment (getArgs)
import System.IO (hPutStrLn, stderr)
main :: IO ()
main = do
[inputFile, outputFile] <- getArgs
result <- parseDocument <$> BS.readFile inputFile
case result of
Left parseError -> hPutStrLn stderr parseError
Right doc -> Lazy.writeFile outputFile $ render doc

View file

@ -1,53 +0,0 @@
{-# LANGUAGE NamedFieldPuns #-}
import Codec.Compression.Zlib (decompress)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
import Data.Map ((!?))
import qualified Data.Map as Map (keys, lookup)
import PDF (Document(..), parseDocument)
import qualified PDF.EOL as EOL (Style)
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
import PDF.Output (ObjectId(..))
import qualified PDF.Output as Output (render)
import PDF.Update (unify)
import System.Environment (getArgs, getProgName)
import System.Exit (die)
import Text.Printf (printf)
display :: EOL.Style -> Object -> ByteString
display eolStyle d@(Direct _) = Output.render eolStyle d
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
case Map.lookup (Name "Filter") header of
Just (NameObject (Name "FlateDecode")) -> Stream {
header
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
}
_ -> s
extractObject :: ObjectId -> Document -> Either String ByteString
extractObject objectId (Document {eolStyle, updates}) =
case objects content !? objectId of
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
Just o -> Right $ display eolStyle o
where
content = unify updates
listObjectIds :: Document -> Either String [String]
listObjectIds =
Right . prependTitle . toString . Map.keys . objects . unify . updates
where
toString = fmap (show . getObjectId)
prependTitle = ("ObjectIds defined in this PDF:":)
main :: IO ()
main = do
(inputFile, getData) <- parse =<< getArgs
input <- BS.readFile inputFile
either die id $ (parseDocument input >>= getData)
where
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
parse [inputFile, objectId] = return
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName

View file

@ -19,21 +19,21 @@ import PDF.Object (
, Structure(..) , Structure(..)
, eofMarker, magicNumber, structure , eofMarker, magicNumber, structure
) )
import qualified PDF.Output as Output (render, line) import qualified PDF.Output as Output (render, string)
import PDF.Output (Output(..)) import PDF.Output (Output(..), nextLine)
import PDF.Parser (Parser, runParser, string, takeAll) import PDF.Parser (Parser, runParser, string, takeAll)
import Text.Printf (printf) import Text.Printf (printf)
data Document = Document { data Document = Document {
pdfVersion :: String pdfVersion :: String
, eolStyle :: EOL.Style , eolStyle :: EOL.Style
, updates :: [Content] , contents :: [Content]
} deriving Show } deriving Show
instance Output Document where instance Output Document where
output (Document {pdfVersion, updates}) = output (Document {pdfVersion, contents}) =
Output.line (printf "%%PDF-%s" pdfVersion) Output.string (printf "%%PDF-%s" pdfVersion)
`mappend` output updates `nextLine` output contents
render :: Document -> Lazy.ByteString render :: Document -> Lazy.ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document render document@(Document {eolStyle}) = Output.render eolStyle document
@ -99,5 +99,5 @@ parseDocument input = do
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
startXref <- readStartXref eolStyle input startXref <- readStartXref eolStyle input
structuresRead <- readStructures startXref input structuresRead <- readStructures startXref input
let updates = populate input <$> structuresRead let contents = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, updates} return $ Document {pdfVersion, eolStyle, contents}

View file

@ -7,18 +7,17 @@ module PDF.Body (
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Control.Monad.State (get, gets, modify) import Control.Monad.State (get, gets, modify)
import Data.ByteString.Char8 (ByteString) import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack) import qualified Data.ByteString.Char8 as BS (drop, unpack)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, lookup) import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object ( import PDF.Object (
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..) Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..) , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
, Structure(..), XRefEntry(..), XRefSection , Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line , blank, dictionary, directObject, integer, line
) )
import PDF.Output (ObjectId(..), Offset(..)) import PDF.Parser (Parser, block, char, on, runParser, takeAll)
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
data UserState = UserState { data UserState = UserState {
input :: ByteString input :: ByteString
@ -31,7 +30,7 @@ type SParser = Parser UserState
modifyFlow :: (Flow -> Flow) -> SParser () modifyFlow :: (Flow -> Flow) -> SParser ()
modifyFlow f = modify $ \state -> state {flow = f $ flow state} modifyFlow f = modify $ \state -> state {flow = f $ flow state}
addObject :: ObjectId -> Object -> SParser () addObject :: Int -> Object -> SParser ()
addObject objectId newObject = modifyFlow $ \flow -> flow { addObject objectId newObject = modifyFlow $ \flow -> flow {
tmpObjects = Map.insert objectId newObject $ tmpObjects flow tmpObjects = Map.insert objectId newObject $ tmpObjects flow
} }
@ -42,24 +41,32 @@ pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
} }
comment :: Parser u String comment :: Parser u String
comment = BS.unpack <$> (option "" afterPercent <* EOL.parser) comment = BS.unpack <$>
where (char '%' *> takeAll (not . (`elem` EOL.charset)) <* EOL.parser)
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
lookupOffset :: ObjectId -> SParser Offset lookupOffset :: Int -> XRefSection -> Maybe Int
lookupOffset objectId = do lookupOffset _ [] = Nothing
lookupOffset objectId (xrefSubSection:others) =
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
let index = objectId - firstObjectId in
if index >= 0 && index < entriesNumber
then
case Map.lookup index entries of
Just (InUse {offset}) -> Just offset
_ -> Nothing
else lookupOffset objectId others
getOffset :: Int -> SParser Int
getOffset objectId = do
table <- gets xreferences table <- gets xreferences
case Map.lookup objectId table >>= entryOffset of case lookupOffset objectId table of
Nothing -> fail $ Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table" "obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset Just offset -> return offset
where
entryOffset (InUse {offset}) = Just offset
entryOffset _ = Nothing
loadNumber :: ObjectId -> SParser Double loadNumber :: Int -> SParser Float
loadNumber objectId = do loadNumber objectId = do
offset <- getOffset <$> lookupOffset objectId offset <- getOffset objectId
objectStart <- BS.drop offset <$> gets input objectStart <- BS.drop offset <$> gets input
indirectObjCoordinates `on` (objectStart :: ByteString) >> return () indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
objectValue <- (!objectId) . tmpObjects <$> gets flow objectValue <- (!objectId) . tmpObjects <$> gets flow
@ -70,7 +77,7 @@ loadNumber objectId = do
invalidValue :: Object -> String invalidValue :: Object -> String
invalidValue v = "Invalid value " ++ show v invalidValue v = "Invalid value " ++ show v
getSize :: Maybe DirectObject -> SParser Double getSize :: Maybe DirectObject -> SParser Float
getSize Nothing = fail "Missing '/Length' key on stream" getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (NumberObject (Number size))) = return size getSize (Just (NumberObject (Number size))) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
@ -96,15 +103,14 @@ object = streamObject <|> Direct <$> directObject
indirectObjCoordinates :: SParser IndirectObjCoordinates indirectObjCoordinates :: SParser IndirectObjCoordinates
indirectObjCoordinates = do indirectObjCoordinates = do
objectId <- ObjectId <$> integer objectId <- integer
coordinates <- IndirectObjCoordinates objectId <$> integer coordinates <- IndirectObjCoordinates objectId <$> integer
objectValue <- line "obj" *> object <* blank <* line "endobj" objectValue <- line "obj" *> object <* blank <* line "endobj"
addObject objectId objectValue addObject objectId objectValue
return coordinates return coordinates
occurrence :: SParser Occurrence occurrence :: SParser Occurrence
occurrence = occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"
populate :: ByteString -> InputStructure -> Content populate :: ByteString -> InputStructure -> Content
populate input structure = populate input structure =
@ -118,7 +124,7 @@ populate input structure =
} }
where where
docStructure = inputStructure structure docStructure = inputStructure structure
xreferences = xRef docStructure xreferences = xrefSection docStructure
initialState = UserState { initialState = UserState {
input, xreferences, flow = Flow { input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty occurrencesStack = [], tmpObjects = Map.empty

View file

@ -5,7 +5,6 @@ module PDF.Object (
Content(..) Content(..)
, DirectObject(..) , DirectObject(..)
, Flow(..) , Flow(..)
, IndexedObjects
, IndirectObjCoordinates(..) , IndirectObjCoordinates(..)
, InputStructure(..) , InputStructure(..)
, Name(..) , Name(..)
@ -15,6 +14,7 @@ module PDF.Object (
, Structure(..) , Structure(..)
, XRefEntry(..) , XRefEntry(..)
, XRefSection , XRefSection
, XRefSubSection(..)
, blank , blank
, dictionary , dictionary
, directObject , directObject
@ -31,15 +31,12 @@ import qualified Data.ByteString.Char8 as BS (
concat, cons, pack, singleton, unpack concat, cons, pack, singleton, unpack
) )
import Data.Map (Map, (!), mapWithKey) import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map ( import qualified Data.Map as Map (elems, fromList, toList)
delete, empty, fromList, lookup, minViewWithKey, toList, union
)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (concat, line, string) import qualified PDF.Output as Output (string)
import PDF.Output ( import PDF.Output (
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..) OBuilder, Offset(..), Output(..)
, byteString, getObjectId, getOffset, getOffsets, join, newLine , byteString, getOffsets, join, newLine, nextLine, saveOffset
, saveOffset
) )
import PDF.Parser ( import PDF.Parser (
Parser, (<?>) Parser, (<?>)
@ -49,7 +46,7 @@ import PDF.Parser (
import Text.Printf (printf) import Text.Printf (printf)
line :: String -> Parser u () line :: String -> Parser u ()
line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l line l = string (BS.pack l) *> EOL.parser *> return () <?> printf "line «%s»" l
magicNumber :: ByteString magicNumber :: ByteString
magicNumber = "%PDF-" magicNumber = "%PDF-"
@ -76,8 +73,6 @@ integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
-- OBJECTS -- OBJECTS
------------------------------------- -------------------------------------
type IndexedObjects = Map ObjectId Object
-- --
-- Boolean -- Boolean
-- --
@ -88,7 +83,7 @@ boolean =
-- --
-- Number -- Number
-- --
newtype Number = Number Double deriving Show newtype Number = Number Float deriving Show
instance Output Number where instance Output Number where
output (Number f) = Output.string $ output (Number f) = Output.string $
@ -135,7 +130,7 @@ stringObject =
newtype Name = Name String deriving (Eq, Ord, Show) newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where instance Output Name where
output (Name n) = Output.string ('/':n) output (Name n) = "/" `mappend` Output.string n
name :: Parser u Name name :: Parser u Name
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name" name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
@ -158,7 +153,7 @@ instance Output Dictionary where
where where
keyValues = join " " $ outputKeyVal <$> Map.toList dict keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = Output.concat [output key, " ", output val] outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
dictionary :: Parser u Dictionary dictionary :: Parser u Dictionary
dictionary = dictionary =
@ -177,13 +172,12 @@ nullObject = string "null" *> return () <?> "null object"
-- Reference -- Reference
-- --
data IndirectObjCoordinates = IndirectObjCoordinates { data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: ObjectId objectId :: Int
, versionNumber :: Int , versionNumber :: Int
} deriving Show } deriving Show
reference :: Parser u IndirectObjCoordinates reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
-- --
-- DirectObject -- DirectObject
@ -204,11 +198,11 @@ instance Output DirectObject where
output (NumberObject n) = output n output (NumberObject n) = output n
output (StringObject s) = output s output (StringObject s) = output s
output (NameObject n) = output n output (NameObject n) = output n
output (Array a) = Output.concat ["[", join " " a, "]"] output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d output (Dictionary d) = output d
output (Null) = "null" output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber) Output.string (printf "%d %d R" objectId versionNumber)
directObject :: Parser u DirectObject directObject :: Parser u DirectObject
directObject = directObject =
@ -235,26 +229,25 @@ data Object =
instance Output Object where instance Output Object where
output (Direct d) = output d output (Direct d) = output d
output (Stream {header, streamContent}) = Output.concat [ output (Stream {header, streamContent}) =
output header, newLine output header
, Output.line "stream" `nextLine` "stream"
, byteString streamContent `nextLine` byteString streamContent
, "endstream" `mappend` "endstream"
]
-- --
-- Occurrence -- Occurrence
-- --
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) = Output.line c outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (Object objectId) >> Output.concat [ saveOffset (ObjectId objectId)
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber) >> Output.string (printf "%d %d obj" objectId versionNumber)
, output (objects ! objectId), newLine `nextLine` output (objects ! objectId)
, Output.line "endobj" `nextLine` "endobj" `mappend` newLine
]
------------------------------------- -------------------------------------
-- XREF TABLE -- XREF TABLE
@ -264,18 +257,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
-- XRefEntry -- XRefEntry
-- --
data XRefEntry = InUse { data XRefEntry = InUse {
offset :: Offset offset :: Int
, generation :: Int , generation :: Int
} | Free { } | Free {
nextFree :: ObjectId nextFree :: Int
, generation :: Int , generation :: Int
} deriving Show } deriving Show
instance Output XRefEntry where instance Output XRefEntry where
output (InUse {offset, generation}) = output (InUse {offset, generation}) =
Output.line (printf "%010d %05d n " (getOffset offset) generation) Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) = output (Free {nextFree, generation}) =
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation) Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
entry :: Parser u XRefEntry entry :: Parser u XRefEntry
entry = do entry = do
@ -283,56 +276,31 @@ entry = do
(inUse big small <|> free big small <?> "XRef entry") <* blank (inUse big small <|> free big small <?> "XRef entry") <* blank
where where
inUse :: Int -> Int -> Parser u XRefEntry inUse :: Int -> Int -> Parser u XRefEntry
inUse big generation = inUse offset generation = char 'n' *> return (InUse {offset, generation})
char 'n' *> return (InUse {offset = Offset big, generation})
free :: Int -> Int -> Parser u XRefEntry free :: Int -> Int -> Parser u XRefEntry
free big generation = free nextFree generation = char 'f' *> return (Free {nextFree, generation})
char 'f' *> return (Free {nextFree = ObjectId big, generation})
-- --
-- XRefSubSection -- XRefSubSection
-- --
data XRefSubSection = XRefSubSection { data XRefSubSection = XRefSubSection {
firstObjectId :: ObjectId firstObjectId :: Int
, entries :: [XRefEntry] , entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show } deriving Show
instance Output XRefSubSection where instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entries}) = output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries)) Output.string (printf "%d %d" firstObjectId entriesNumber)
`mappend` output entries `nextLine` output (Map.elems entries)
xRefSubSection :: Parser u XRefSubSection xrefSubSection :: Parser u XRefSubSection
xRefSubSection = do xrefSubSection = do
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection" (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
entries <- count entriesNumber entry entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries} return $ XRefSubSection {firstObjectId, entriesNumber, entries}
type XRefSection = Map ObjectId XRefEntry type XRefSection = [XRefSubSection]
instance Output XRefSection where
output = output . sections
where
sections tmp =
case Map.minViewWithKey tmp of
Nothing -> []
Just ((objectId@(ObjectId value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest
section firstObjectId stack nextValue tmp =
let nextId = ObjectId nextValue in
case Map.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry ->
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
xRefSection :: Parser u XRefSection
xRefSection = foldr addSubsection Map.empty <$>
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
where
addSubsection (XRefSubSection {firstObjectId, entries}) =
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
-- --
-- Structure -- Structure
@ -343,28 +311,34 @@ data InputStructure = InputStructure {
} }
data Structure = Structure { data Structure = Structure {
xRef :: XRefSection xrefSection :: XRefSection
, trailer :: Dictionary , trailer :: Dictionary
} deriving Show } deriving Show
structure :: Parser u Structure structure :: Parser u Structure
structure = structure =
Structure Structure
<$> xRefSection <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (string "trailer" *> blank *> dictionary <* EOL.parser) <*> (line "trailer" *> dictionary <* EOL.parser)
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset) updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef) updateXrefs xrefSection offsets = (
updateSubSection <$> xrefSection
, offsets ! StartXRef
)
where where
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)} updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
updateEntry _ e = e subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
updateEntry firstObjectId index e@(InUse {}) =
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
updateEntry _ _ e = e
-- --
-- Flow -- Flow
-- --
data Flow = Flow { data Flow = Flow {
occurrencesStack :: [Occurrence] occurrencesStack :: [Occurrence]
, tmpObjects :: IndexedObjects , tmpObjects :: Map Int Object
} deriving Show } deriving Show
-- --
@ -372,26 +346,25 @@ data Flow = Flow {
-- --
data Content = Content { data Content = Content {
occurrences :: [Occurrence] occurrences :: [Occurrence]
, objects :: IndexedObjects , objects :: Map Int Object
, docStructure :: Structure , docStructure :: Structure
} deriving Show } deriving Show
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder outputBody :: ([Occurrence], Map Int Object) -> OBuilder
outputBody (occurrences, objects) = outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where instance Output Content where
output (Content {occurrences, objects, docStructure}) = output (Content {occurrences, objects, docStructure}) =
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects)) fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) -> Output.concat [ >>= \(body, (xref, startXRef)) ->
body body
, Output.line "xref" `mappend` "xref"
, output xref `nextLine` output xref
, Output.line "trailer" `mappend` "trailer"
, output trailer, newLine `nextLine` output trailer
, Output.line "startxref" `nextLine` "startxref"
, Output.line (printf "%d" (getOffset startXRef)) `nextLine` (Output.string (printf "%d" startXRef))
, byteString eofMarker `nextLine` byteString eofMarker
]
where where
Structure {xRef, trailer} = docStructure Structure {xrefSection, trailer} = docStructure

View file

@ -2,21 +2,17 @@
{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module PDF.Output ( module PDF.Output (
OBuilder OBuilder
, ObjectId(..)
, OContext(..) , OContext(..)
, Offset(..) , Offset(..)
, Output(..) , Output(..)
, Resource(..)
, byteString , byteString
, char , char
, concat
, getOffsets , getOffsets
, join , join
, line
, newLine , newLine
, nextLine
, saveOffset , saveOffset
, string , string
, render , render
@ -32,14 +28,10 @@ import qualified Data.Map as Map (singleton)
import Data.String (IsString(..)) import Data.String (IsString(..))
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell) import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
import qualified PDF.EOL as EOL (Style(..)) import qualified PDF.EOL as EOL (Style(..))
import Prelude hiding (concat)
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show) data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
newtype Offset = Offset {getOffset :: Int} deriving (Show)
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord) newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
type OBuilder = OContext Builder type OBuilder = OContext Builder
instance Functor OContext where instance Functor OContext where
@ -52,23 +44,20 @@ instance Applicative OContext where
instance Monad OContext where instance Monad OContext where
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y)) (>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
saveOffset :: Resource -> OContext () saveOffset :: Offset -> OContext ()
saveOffset resource = OContext $ saveOffset offset = OContext $
get >>= tell . Map.singleton resource get >>= tell . Map.singleton offset
lift :: (a -> Builder) -> a -> OBuilder lift :: (a -> Builder) -> a -> OBuilder
lift f a = return (f a) lift f a = return (f a)
getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset) getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
getOffsets (OContext builder) = getOffsets (OContext builder) =
OContext (listen builder >>= \(a, w) -> return (return a, w)) OContext (listen builder >>= \(a, w) -> return (return a, w))
append :: OBuilder -> OBuilder -> OBuilder append :: OBuilder -> OBuilder -> OBuilder
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b) append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
concat :: [OBuilder] -> OBuilder
concat = foldl mappend mempty
#if MIN_VERSION_base(4,11,0) #if MIN_VERSION_base(4,11,0)
instance Semigroup OBuilder where instance Semigroup OBuilder where
(<>) = append (<>) = append
@ -94,7 +83,7 @@ instance Output Bool where
output True = string "true" output True = string "true"
instance Output a => Output [a] where instance Output a => Output [a] where
output = concat . fmap output output = foldl mappend mempty . fmap output
join :: Output a => String -> [a] -> OBuilder join :: Output a => String -> [a] -> OBuilder
join _ [] = mempty join _ [] = mempty
@ -102,30 +91,27 @@ join _ [a] = output a
join separator (a:as) = join separator (a:as) =
output a `mappend` string separator `mappend` (join separator as) output a `mappend` string separator `mappend` (join separator as)
offset :: (Int -> Int) -> OContext ()
offset f = OContext $ modify $ \(Offset o) -> Offset (f o)
newLine :: OBuilder newLine :: OBuilder
newLine = buildEOL =<< OContext ask newLine = OContext $ buildEOL =<< ask
where where
buildEOL EOL.CR = return (char8 '\r') <* offset (+1) buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
buildEOL EOL.LF = return (char8 '\n') <* offset (+1) buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2) buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
nextLine :: OBuilder -> OBuilder -> OBuilder
nextLine a b = a `mappend` newLine `mappend` b
char :: Char -> OBuilder char :: Char -> OBuilder
char c = lift char8 c <* offset (+1) char c = lift char8 c <* OContext (modify (+1))
string :: String -> OBuilder string :: String -> OBuilder
string s = lift string8 s <* offset (+ toEnum (length s)) string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
line :: String -> OBuilder
line l = string l `mappend` newLine
byteString :: ByteString -> OBuilder byteString :: ByteString -> OBuilder
byteString bs = lift B.byteString bs <* offset (+ BS.length bs) byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
render :: Output a => EOL.Style -> a -> Lazy.ByteString render :: Output a => EOL.Style -> a -> Lazy.ByteString
render eolStyle a = render eolStyle a =
let OContext builder = output a in let OContext builder = output a in
let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in let (outputByteString, _, _) = runRWS builder eolStyle 0 in
toLazyByteString outputByteString toLazyByteString outputByteString

View file

@ -1,47 +0,0 @@
{-# LANGUAGE NamedFieldPuns #-}
module PDF.Update (
unify
) where
import Data.Map (member)
import qualified Data.Map as Map (empty, union)
import PDF.Object (
Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
, Structure(..)
)
emptyContent :: Content
emptyContent = Content {
docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
, objects = Map.empty
, occurrences = []
}
unify :: [Content] -> Content
unify = foldl complete emptyContent
where
complete tmpContent older =
let mergedObjects = Map.union (objects tmpContent) (objects older) in
Content {
docStructure =
unifyDocStructure (docStructure tmpContent) (docStructure older)
, objects = mergedObjects
, occurrences =
unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
}
unifyDocStructure :: Structure -> Structure -> Structure
unifyDocStructure update original = Structure {
xRef = Map.union (xRef update) (xRef original)
, trailer = Map.union (trailer update) (trailer original)
}
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
unifyOccurrences objects update = foldr addOlder update
where
addOlder occurrence@(Comment _) existing = occurrence : existing
addOlder occurrence@(Indirect indirect) existing =
if objectId indirect `member` objects
then occurrence : existing
else existing