Compare commits
No commits in common. "main" and "v0.1.1.1" have entirely different histories.
9 changed files with 119 additions and 292 deletions
|
@ -1,13 +1,5 @@
|
||||||
# Revision history for Hufflepdf
|
# Revision history for Hufflepdf
|
||||||
|
|
||||||
## 0.2.0.1 -- 2019-11-27
|
|
||||||
|
|
||||||
* Fix bug discovered while running Hufflepdf on a PDF output from pdftk : magic keywords like `obj`, `stream` or `xref` can have spaces after them before the EOL
|
|
||||||
|
|
||||||
## 0.2.0.0 -- 2019-10-14
|
|
||||||
|
|
||||||
* Implement PDF's multilayer update mechanism
|
|
||||||
|
|
||||||
## 0.1.1.1 -- 2019-05-31
|
## 0.1.1.1 -- 2019-05-31
|
||||||
|
|
||||||
* Fix a bug preventing files with the «trailer» keyword on the same line as the dictionary that follows it to be parsed
|
* Fix a bug preventing files with the «trailer» keyword on the same line as the dictionary that follows it to be parsed
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
-- documentation, see http://haskell.org/cabal/users-guide/
|
-- documentation, see http://haskell.org/cabal/users-guide/
|
||||||
|
|
||||||
name: Hufflepdf
|
name: Hufflepdf
|
||||||
version: 0.2.0.1
|
version: 0.1.1.1
|
||||||
synopsis: A PDF parser
|
synopsis: A PDF parser
|
||||||
-- description:
|
-- description:
|
||||||
license: BSD3
|
license: BSD3
|
||||||
|
@ -20,7 +20,6 @@ library
|
||||||
, PDF.EOL
|
, PDF.EOL
|
||||||
, PDF.Object
|
, PDF.Object
|
||||||
, PDF.Output
|
, PDF.Output
|
||||||
, PDF.Update
|
|
||||||
other-modules: Data.ByteString.Char8.Util
|
other-modules: Data.ByteString.Char8.Util
|
||||||
, PDF.Body
|
, PDF.Body
|
||||||
, PDF.Parser
|
, PDF.Parser
|
||||||
|
@ -33,21 +32,3 @@ library
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
ghc-options: -Wall
|
ghc-options: -Wall
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable equivalent
|
|
||||||
main-is: examples/equivalent.hs
|
|
||||||
build-depends: base
|
|
||||||
, bytestring
|
|
||||||
, Hufflepdf
|
|
||||||
ghc-options: -Wall
|
|
||||||
default-language: Haskell2010
|
|
||||||
|
|
||||||
executable getObj
|
|
||||||
main-is: examples/getObj.hs
|
|
||||||
build-depends: base
|
|
||||||
, bytestring
|
|
||||||
, containers
|
|
||||||
, Hufflepdf
|
|
||||||
, zlib
|
|
||||||
ghc-options: -Wall
|
|
||||||
default-language: Haskell2010
|
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
|
||||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
|
|
||||||
import PDF (parseDocument, render)
|
|
||||||
import System.Environment (getArgs)
|
|
||||||
import System.IO (hPutStrLn, stderr)
|
|
||||||
|
|
||||||
main :: IO ()
|
|
||||||
main = do
|
|
||||||
[inputFile, outputFile] <- getArgs
|
|
||||||
result <- parseDocument <$> BS.readFile inputFile
|
|
||||||
case result of
|
|
||||||
Left parseError -> hPutStrLn stderr parseError
|
|
||||||
Right doc -> Lazy.writeFile outputFile $ render doc
|
|
|
@ -1,53 +0,0 @@
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
|
||||||
|
|
||||||
import Codec.Compression.Zlib (decompress)
|
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
|
||||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
|
||||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
|
|
||||||
import Data.Map ((!?))
|
|
||||||
import qualified Data.Map as Map (keys, lookup)
|
|
||||||
import PDF (Document(..), parseDocument)
|
|
||||||
import qualified PDF.EOL as EOL (Style)
|
|
||||||
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
|
|
||||||
import PDF.Output (ObjectId(..))
|
|
||||||
import qualified PDF.Output as Output (render)
|
|
||||||
import PDF.Update (unify)
|
|
||||||
import System.Environment (getArgs, getProgName)
|
|
||||||
import System.Exit (die)
|
|
||||||
import Text.Printf (printf)
|
|
||||||
|
|
||||||
display :: EOL.Style -> Object -> ByteString
|
|
||||||
display eolStyle d@(Direct _) = Output.render eolStyle d
|
|
||||||
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
|
|
||||||
case Map.lookup (Name "Filter") header of
|
|
||||||
Just (NameObject (Name "FlateDecode")) -> Stream {
|
|
||||||
header
|
|
||||||
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
|
|
||||||
}
|
|
||||||
_ -> s
|
|
||||||
|
|
||||||
extractObject :: ObjectId -> Document -> Either String ByteString
|
|
||||||
extractObject objectId (Document {eolStyle, updates}) =
|
|
||||||
case objects content !? objectId of
|
|
||||||
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
|
|
||||||
Just o -> Right $ display eolStyle o
|
|
||||||
where
|
|
||||||
content = unify updates
|
|
||||||
|
|
||||||
listObjectIds :: Document -> Either String [String]
|
|
||||||
listObjectIds =
|
|
||||||
Right . prependTitle . toString . Map.keys . objects . unify . updates
|
|
||||||
where
|
|
||||||
toString = fmap (show . getObjectId)
|
|
||||||
prependTitle = ("ObjectIds defined in this PDF:":)
|
|
||||||
|
|
||||||
main :: IO ()
|
|
||||||
main = do
|
|
||||||
(inputFile, getData) <- parse =<< getArgs
|
|
||||||
input <- BS.readFile inputFile
|
|
||||||
either die id $ (parseDocument input >>= getData)
|
|
||||||
where
|
|
||||||
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
|
|
||||||
parse [inputFile, objectId] = return
|
|
||||||
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
|
|
||||||
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
|
|
16
src/PDF.hs
16
src/PDF.hs
|
@ -19,21 +19,21 @@ import PDF.Object (
|
||||||
, Structure(..)
|
, Structure(..)
|
||||||
, eofMarker, magicNumber, structure
|
, eofMarker, magicNumber, structure
|
||||||
)
|
)
|
||||||
import qualified PDF.Output as Output (render, line)
|
import qualified PDF.Output as Output (render, string)
|
||||||
import PDF.Output (Output(..))
|
import PDF.Output (Output(..), nextLine)
|
||||||
import PDF.Parser (Parser, runParser, string, takeAll)
|
import PDF.Parser (Parser, runParser, string, takeAll)
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
data Document = Document {
|
data Document = Document {
|
||||||
pdfVersion :: String
|
pdfVersion :: String
|
||||||
, eolStyle :: EOL.Style
|
, eolStyle :: EOL.Style
|
||||||
, updates :: [Content]
|
, contents :: [Content]
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output Document where
|
instance Output Document where
|
||||||
output (Document {pdfVersion, updates}) =
|
output (Document {pdfVersion, contents}) =
|
||||||
Output.line (printf "%%PDF-%s" pdfVersion)
|
Output.string (printf "%%PDF-%s" pdfVersion)
|
||||||
`mappend` output updates
|
`nextLine` output contents
|
||||||
|
|
||||||
render :: Document -> Lazy.ByteString
|
render :: Document -> Lazy.ByteString
|
||||||
render document@(Document {eolStyle}) = Output.render eolStyle document
|
render document@(Document {eolStyle}) = Output.render eolStyle document
|
||||||
|
@ -99,5 +99,5 @@ parseDocument input = do
|
||||||
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
|
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
|
||||||
startXref <- readStartXref eolStyle input
|
startXref <- readStartXref eolStyle input
|
||||||
structuresRead <- readStructures startXref input
|
structuresRead <- readStructures startXref input
|
||||||
let updates = populate input <$> structuresRead
|
let contents = populate input <$> structuresRead
|
||||||
return $ Document {pdfVersion, eolStyle, updates}
|
return $ Document {pdfVersion, eolStyle, contents}
|
||||||
|
|
|
@ -14,10 +14,9 @@ import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
||||||
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
||||||
, Structure(..), XRefEntry(..), XRefSection
|
, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||||
, blank, dictionary, directObject, integer, line
|
, blank, dictionary, directObject, integer, line
|
||||||
)
|
)
|
||||||
import PDF.Output (ObjectId(..), Offset(..))
|
|
||||||
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
|
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
|
||||||
|
|
||||||
data UserState = UserState {
|
data UserState = UserState {
|
||||||
|
@ -31,7 +30,7 @@ type SParser = Parser UserState
|
||||||
modifyFlow :: (Flow -> Flow) -> SParser ()
|
modifyFlow :: (Flow -> Flow) -> SParser ()
|
||||||
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
||||||
|
|
||||||
addObject :: ObjectId -> Object -> SParser ()
|
addObject :: Int -> Object -> SParser ()
|
||||||
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
||||||
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
||||||
}
|
}
|
||||||
|
@ -46,20 +45,29 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
|
||||||
where
|
where
|
||||||
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
|
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
|
||||||
|
|
||||||
lookupOffset :: ObjectId -> SParser Offset
|
lookupOffset :: Int -> XRefSection -> Maybe Int
|
||||||
lookupOffset objectId = do
|
lookupOffset _ [] = Nothing
|
||||||
|
lookupOffset objectId (xrefSubSection:others) =
|
||||||
|
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
|
||||||
|
let index = objectId - firstObjectId in
|
||||||
|
if index >= 0 && index < entriesNumber
|
||||||
|
then
|
||||||
|
case Map.lookup index entries of
|
||||||
|
Just (InUse {offset}) -> Just offset
|
||||||
|
_ -> Nothing
|
||||||
|
else lookupOffset objectId others
|
||||||
|
|
||||||
|
getOffset :: Int -> SParser Int
|
||||||
|
getOffset objectId = do
|
||||||
table <- gets xreferences
|
table <- gets xreferences
|
||||||
case Map.lookup objectId table >>= entryOffset of
|
case lookupOffset objectId table of
|
||||||
Nothing -> fail $
|
Nothing -> fail $
|
||||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||||
Just offset -> return offset
|
Just offset -> return offset
|
||||||
where
|
|
||||||
entryOffset (InUse {offset}) = Just offset
|
|
||||||
entryOffset _ = Nothing
|
|
||||||
|
|
||||||
loadNumber :: ObjectId -> SParser Double
|
loadNumber :: Int -> SParser Float
|
||||||
loadNumber objectId = do
|
loadNumber objectId = do
|
||||||
offset <- getOffset <$> lookupOffset objectId
|
offset <- getOffset objectId
|
||||||
objectStart <- BS.drop offset <$> gets input
|
objectStart <- BS.drop offset <$> gets input
|
||||||
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
|
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
|
||||||
objectValue <- (!objectId) . tmpObjects <$> gets flow
|
objectValue <- (!objectId) . tmpObjects <$> gets flow
|
||||||
|
@ -70,7 +78,7 @@ loadNumber objectId = do
|
||||||
invalidValue :: Object -> String
|
invalidValue :: Object -> String
|
||||||
invalidValue v = "Invalid value " ++ show v
|
invalidValue v = "Invalid value " ++ show v
|
||||||
|
|
||||||
getSize :: Maybe DirectObject -> SParser Double
|
getSize :: Maybe DirectObject -> SParser Float
|
||||||
getSize Nothing = fail "Missing '/Length' key on stream"
|
getSize Nothing = fail "Missing '/Length' key on stream"
|
||||||
getSize (Just (NumberObject (Number size))) = return size
|
getSize (Just (NumberObject (Number size))) = return size
|
||||||
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
||||||
|
@ -96,7 +104,7 @@ object = streamObject <|> Direct <$> directObject
|
||||||
|
|
||||||
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
||||||
indirectObjCoordinates = do
|
indirectObjCoordinates = do
|
||||||
objectId <- ObjectId <$> integer
|
objectId <- integer
|
||||||
coordinates <- IndirectObjCoordinates objectId <$> integer
|
coordinates <- IndirectObjCoordinates objectId <$> integer
|
||||||
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
||||||
addObject objectId objectValue
|
addObject objectId objectValue
|
||||||
|
@ -118,7 +126,7 @@ populate input structure =
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
docStructure = inputStructure structure
|
docStructure = inputStructure structure
|
||||||
xreferences = xRef docStructure
|
xreferences = xrefSection docStructure
|
||||||
initialState = UserState {
|
initialState = UserState {
|
||||||
input, xreferences, flow = Flow {
|
input, xreferences, flow = Flow {
|
||||||
occurrencesStack = [], tmpObjects = Map.empty
|
occurrencesStack = [], tmpObjects = Map.empty
|
||||||
|
|
|
@ -5,7 +5,6 @@ module PDF.Object (
|
||||||
Content(..)
|
Content(..)
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
, Flow(..)
|
, Flow(..)
|
||||||
, IndexedObjects
|
|
||||||
, IndirectObjCoordinates(..)
|
, IndirectObjCoordinates(..)
|
||||||
, InputStructure(..)
|
, InputStructure(..)
|
||||||
, Name(..)
|
, Name(..)
|
||||||
|
@ -15,6 +14,7 @@ module PDF.Object (
|
||||||
, Structure(..)
|
, Structure(..)
|
||||||
, XRefEntry(..)
|
, XRefEntry(..)
|
||||||
, XRefSection
|
, XRefSection
|
||||||
|
, XRefSubSection(..)
|
||||||
, blank
|
, blank
|
||||||
, dictionary
|
, dictionary
|
||||||
, directObject
|
, directObject
|
||||||
|
@ -31,15 +31,12 @@ import qualified Data.ByteString.Char8 as BS (
|
||||||
concat, cons, pack, singleton, unpack
|
concat, cons, pack, singleton, unpack
|
||||||
)
|
)
|
||||||
import Data.Map (Map, (!), mapWithKey)
|
import Data.Map (Map, (!), mapWithKey)
|
||||||
import qualified Data.Map as Map (
|
import qualified Data.Map as Map (elems, fromList, toList)
|
||||||
delete, empty, fromList, lookup, minViewWithKey, toList, union
|
|
||||||
)
|
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import qualified PDF.Output as Output (concat, line, string)
|
import qualified PDF.Output as Output (string)
|
||||||
import PDF.Output (
|
import PDF.Output (
|
||||||
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
|
OBuilder, Offset(..), Output(..)
|
||||||
, byteString, getObjectId, getOffset, getOffsets, join, newLine
|
, byteString, getOffsets, join, newLine, nextLine, saveOffset
|
||||||
, saveOffset
|
|
||||||
)
|
)
|
||||||
import PDF.Parser (
|
import PDF.Parser (
|
||||||
Parser, (<?>)
|
Parser, (<?>)
|
||||||
|
@ -49,7 +46,7 @@ import PDF.Parser (
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
line :: String -> Parser u ()
|
line :: String -> Parser u ()
|
||||||
line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
|
line l = (string (BS.pack l) *> EOL.parser *> return ()) <?> printf "line «%s»" l
|
||||||
|
|
||||||
magicNumber :: ByteString
|
magicNumber :: ByteString
|
||||||
magicNumber = "%PDF-"
|
magicNumber = "%PDF-"
|
||||||
|
@ -76,8 +73,6 @@ integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
|
||||||
-- OBJECTS
|
-- OBJECTS
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
type IndexedObjects = Map ObjectId Object
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Boolean
|
-- Boolean
|
||||||
--
|
--
|
||||||
|
@ -88,7 +83,7 @@ boolean =
|
||||||
--
|
--
|
||||||
-- Number
|
-- Number
|
||||||
--
|
--
|
||||||
newtype Number = Number Double deriving Show
|
newtype Number = Number Float deriving Show
|
||||||
|
|
||||||
instance Output Number where
|
instance Output Number where
|
||||||
output (Number f) = Output.string $
|
output (Number f) = Output.string $
|
||||||
|
@ -135,7 +130,7 @@ stringObject =
|
||||||
newtype Name = Name String deriving (Eq, Ord, Show)
|
newtype Name = Name String deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
instance Output Name where
|
instance Output Name where
|
||||||
output (Name n) = Output.string ('/':n)
|
output (Name n) = "/" `mappend` Output.string n
|
||||||
|
|
||||||
name :: Parser u Name
|
name :: Parser u Name
|
||||||
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
|
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
|
||||||
|
@ -158,7 +153,7 @@ instance Output Dictionary where
|
||||||
where
|
where
|
||||||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||||||
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
||||||
outputKeyVal (key, val) = Output.concat [output key, " ", output val]
|
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
|
||||||
|
|
||||||
dictionary :: Parser u Dictionary
|
dictionary :: Parser u Dictionary
|
||||||
dictionary =
|
dictionary =
|
||||||
|
@ -177,13 +172,12 @@ nullObject = string "null" *> return () <?> "null object"
|
||||||
-- Reference
|
-- Reference
|
||||||
--
|
--
|
||||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||||
objectId :: ObjectId
|
objectId :: Int
|
||||||
, versionNumber :: Int
|
, versionNumber :: Int
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
reference :: Parser u IndirectObjCoordinates
|
reference :: Parser u IndirectObjCoordinates
|
||||||
reference = IndirectObjCoordinates
|
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
|
||||||
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- DirectObject
|
-- DirectObject
|
||||||
|
@ -204,11 +198,11 @@ instance Output DirectObject where
|
||||||
output (NumberObject n) = output n
|
output (NumberObject n) = output n
|
||||||
output (StringObject s) = output s
|
output (StringObject s) = output s
|
||||||
output (NameObject n) = output n
|
output (NameObject n) = output n
|
||||||
output (Array a) = Output.concat ["[", join " " a, "]"]
|
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||||||
output (Dictionary d) = output d
|
output (Dictionary d) = output d
|
||||||
output (Null) = "null"
|
output (Null) = "null"
|
||||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
|
Output.string (printf "%d %d R" objectId versionNumber)
|
||||||
|
|
||||||
directObject :: Parser u DirectObject
|
directObject :: Parser u DirectObject
|
||||||
directObject =
|
directObject =
|
||||||
|
@ -235,26 +229,25 @@ data Object =
|
||||||
|
|
||||||
instance Output Object where
|
instance Output Object where
|
||||||
output (Direct d) = output d
|
output (Direct d) = output d
|
||||||
output (Stream {header, streamContent}) = Output.concat [
|
output (Stream {header, streamContent}) =
|
||||||
output header, newLine
|
output header
|
||||||
, Output.line "stream"
|
`nextLine` "stream"
|
||||||
, byteString streamContent
|
`nextLine` byteString streamContent
|
||||||
, "endstream"
|
`mappend` "endstream"
|
||||||
]
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Occurrence
|
-- Occurrence
|
||||||
--
|
--
|
||||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||||
|
|
||||||
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
|
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||||||
outputOccurrence _ (Comment c) = Output.line c
|
outputOccurrence _ (Comment c) =
|
||||||
|
Output.string c `mappend` newLine
|
||||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
saveOffset (Object objectId) >> Output.concat [
|
saveOffset (ObjectId objectId)
|
||||||
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
|
>> Output.string (printf "%d %d obj" objectId versionNumber)
|
||||||
, output (objects ! objectId), newLine
|
`nextLine` output (objects ! objectId)
|
||||||
, Output.line "endobj"
|
`nextLine` "endobj" `mappend` newLine
|
||||||
]
|
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
-- XREF TABLE
|
-- XREF TABLE
|
||||||
|
@ -264,18 +257,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
|
||||||
-- XRefEntry
|
-- XRefEntry
|
||||||
--
|
--
|
||||||
data XRefEntry = InUse {
|
data XRefEntry = InUse {
|
||||||
offset :: Offset
|
offset :: Int
|
||||||
, generation :: Int
|
, generation :: Int
|
||||||
} | Free {
|
} | Free {
|
||||||
nextFree :: ObjectId
|
nextFree :: Int
|
||||||
, generation :: Int
|
, generation :: Int
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output XRefEntry where
|
instance Output XRefEntry where
|
||||||
output (InUse {offset, generation}) =
|
output (InUse {offset, generation}) =
|
||||||
Output.line (printf "%010d %05d n " (getOffset offset) generation)
|
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
||||||
output (Free {nextFree, generation}) =
|
output (Free {nextFree, generation}) =
|
||||||
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
|
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
||||||
|
|
||||||
entry :: Parser u XRefEntry
|
entry :: Parser u XRefEntry
|
||||||
entry = do
|
entry = do
|
||||||
|
@ -283,56 +276,31 @@ entry = do
|
||||||
(inUse big small <|> free big small <?> "XRef entry") <* blank
|
(inUse big small <|> free big small <?> "XRef entry") <* blank
|
||||||
where
|
where
|
||||||
inUse :: Int -> Int -> Parser u XRefEntry
|
inUse :: Int -> Int -> Parser u XRefEntry
|
||||||
inUse big generation =
|
inUse offset generation = char 'n' *> return (InUse {offset, generation})
|
||||||
char 'n' *> return (InUse {offset = Offset big, generation})
|
|
||||||
free :: Int -> Int -> Parser u XRefEntry
|
free :: Int -> Int -> Parser u XRefEntry
|
||||||
free big generation =
|
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
||||||
char 'f' *> return (Free {nextFree = ObjectId big, generation})
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- XRefSubSection
|
-- XRefSubSection
|
||||||
--
|
--
|
||||||
data XRefSubSection = XRefSubSection {
|
data XRefSubSection = XRefSubSection {
|
||||||
firstObjectId :: ObjectId
|
firstObjectId :: Int
|
||||||
, entries :: [XRefEntry]
|
, entriesNumber :: Int
|
||||||
|
, entries :: Map Int XRefEntry
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output XRefSubSection where
|
instance Output XRefSubSection where
|
||||||
output (XRefSubSection {firstObjectId, entries}) =
|
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||||||
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
|
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||||||
`mappend` output entries
|
`nextLine` output (Map.elems entries)
|
||||||
|
|
||||||
xRefSubSection :: Parser u XRefSubSection
|
xrefSubSection :: Parser u XRefSubSection
|
||||||
xRefSubSection = do
|
xrefSubSection = do
|
||||||
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
||||||
entries <- count entriesNumber entry
|
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
||||||
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
|
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
||||||
|
|
||||||
type XRefSection = Map ObjectId XRefEntry
|
type XRefSection = [XRefSubSection]
|
||||||
|
|
||||||
instance Output XRefSection where
|
|
||||||
output = output . sections
|
|
||||||
where
|
|
||||||
sections tmp =
|
|
||||||
case Map.minViewWithKey tmp of
|
|
||||||
Nothing -> []
|
|
||||||
Just ((objectId@(ObjectId value), firstEntry), rest) ->
|
|
||||||
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
|
|
||||||
subSection : sections sndRest
|
|
||||||
section firstObjectId stack nextValue tmp =
|
|
||||||
let nextId = ObjectId nextValue in
|
|
||||||
case Map.lookup nextId tmp of
|
|
||||||
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
|
|
||||||
Just nextEntry ->
|
|
||||||
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
|
|
||||||
|
|
||||||
|
|
||||||
xRefSection :: Parser u XRefSection
|
|
||||||
xRefSection = foldr addSubsection Map.empty <$>
|
|
||||||
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
|
|
||||||
where
|
|
||||||
addSubsection (XRefSubSection {firstObjectId, entries}) =
|
|
||||||
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Structure
|
-- Structure
|
||||||
|
@ -343,28 +311,34 @@ data InputStructure = InputStructure {
|
||||||
}
|
}
|
||||||
|
|
||||||
data Structure = Structure {
|
data Structure = Structure {
|
||||||
xRef :: XRefSection
|
xrefSection :: XRefSection
|
||||||
, trailer :: Dictionary
|
, trailer :: Dictionary
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
structure :: Parser u Structure
|
structure :: Parser u Structure
|
||||||
structure =
|
structure =
|
||||||
Structure
|
Structure
|
||||||
<$> xRefSection
|
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
||||||
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
|
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
|
||||||
|
|
||||||
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
|
updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
|
||||||
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
|
updateXrefs xrefSection offsets = (
|
||||||
|
updateSubSection <$> xrefSection
|
||||||
|
, offsets ! StartXRef
|
||||||
|
)
|
||||||
where
|
where
|
||||||
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
|
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
|
||||||
updateEntry _ e = e
|
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
|
||||||
|
updateEntry firstObjectId index e@(InUse {}) =
|
||||||
|
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
|
||||||
|
updateEntry _ _ e = e
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Flow
|
-- Flow
|
||||||
--
|
--
|
||||||
data Flow = Flow {
|
data Flow = Flow {
|
||||||
occurrencesStack :: [Occurrence]
|
occurrencesStack :: [Occurrence]
|
||||||
, tmpObjects :: IndexedObjects
|
, tmpObjects :: Map Int Object
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
--
|
--
|
||||||
|
@ -372,26 +346,25 @@ data Flow = Flow {
|
||||||
--
|
--
|
||||||
data Content = Content {
|
data Content = Content {
|
||||||
occurrences :: [Occurrence]
|
occurrences :: [Occurrence]
|
||||||
, objects :: IndexedObjects
|
, objects :: Map Int Object
|
||||||
, docStructure :: Structure
|
, docStructure :: Structure
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
|
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
||||||
outputBody (occurrences, objects) =
|
outputBody (occurrences, objects) =
|
||||||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||||
|
|
||||||
instance Output Content where
|
instance Output Content where
|
||||||
output (Content {occurrences, objects, docStructure}) =
|
output (Content {occurrences, objects, docStructure}) =
|
||||||
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
|
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
|
||||||
>>= \(body, (xref, startXRef)) -> Output.concat [
|
>>= \(body, (xref, startXRef)) ->
|
||||||
body
|
body
|
||||||
, Output.line "xref"
|
`mappend` "xref"
|
||||||
, output xref
|
`nextLine` output xref
|
||||||
, Output.line "trailer"
|
`mappend` "trailer"
|
||||||
, output trailer, newLine
|
`nextLine` output trailer
|
||||||
, Output.line "startxref"
|
`nextLine` "startxref"
|
||||||
, Output.line (printf "%d" (getOffset startXRef))
|
`nextLine` (Output.string (printf "%d" startXRef))
|
||||||
, byteString eofMarker
|
`nextLine` byteString eofMarker
|
||||||
]
|
|
||||||
where
|
where
|
||||||
Structure {xRef, trailer} = docStructure
|
Structure {xrefSection, trailer} = docStructure
|
||||||
|
|
|
@ -2,21 +2,17 @@
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE AllowAmbiguousTypes #-}
|
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
||||||
module PDF.Output (
|
module PDF.Output (
|
||||||
OBuilder
|
OBuilder
|
||||||
, ObjectId(..)
|
|
||||||
, OContext(..)
|
, OContext(..)
|
||||||
, Offset(..)
|
, Offset(..)
|
||||||
, Output(..)
|
, Output(..)
|
||||||
, Resource(..)
|
|
||||||
, byteString
|
, byteString
|
||||||
, char
|
, char
|
||||||
, concat
|
|
||||||
, getOffsets
|
, getOffsets
|
||||||
, join
|
, join
|
||||||
, line
|
|
||||||
, newLine
|
, newLine
|
||||||
|
, nextLine
|
||||||
, saveOffset
|
, saveOffset
|
||||||
, string
|
, string
|
||||||
, render
|
, render
|
||||||
|
@ -32,14 +28,10 @@ import qualified Data.Map as Map (singleton)
|
||||||
import Data.String (IsString(..))
|
import Data.String (IsString(..))
|
||||||
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
||||||
import qualified PDF.EOL as EOL (Style(..))
|
import qualified PDF.EOL as EOL (Style(..))
|
||||||
import Prelude hiding (concat)
|
|
||||||
|
|
||||||
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
|
data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
|
||||||
newtype Offset = Offset {getOffset :: Int} deriving (Show)
|
|
||||||
|
|
||||||
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
|
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
|
||||||
|
|
||||||
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
|
|
||||||
type OBuilder = OContext Builder
|
type OBuilder = OContext Builder
|
||||||
|
|
||||||
instance Functor OContext where
|
instance Functor OContext where
|
||||||
|
@ -52,23 +44,20 @@ instance Applicative OContext where
|
||||||
instance Monad OContext where
|
instance Monad OContext where
|
||||||
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
||||||
|
|
||||||
saveOffset :: Resource -> OContext ()
|
saveOffset :: Offset -> OContext ()
|
||||||
saveOffset resource = OContext $
|
saveOffset offset = OContext $
|
||||||
get >>= tell . Map.singleton resource
|
get >>= tell . Map.singleton offset
|
||||||
|
|
||||||
lift :: (a -> Builder) -> a -> OBuilder
|
lift :: (a -> Builder) -> a -> OBuilder
|
||||||
lift f a = return (f a)
|
lift f a = return (f a)
|
||||||
|
|
||||||
getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
|
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
|
||||||
getOffsets (OContext builder) =
|
getOffsets (OContext builder) =
|
||||||
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
||||||
|
|
||||||
append :: OBuilder -> OBuilder -> OBuilder
|
append :: OBuilder -> OBuilder -> OBuilder
|
||||||
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
||||||
|
|
||||||
concat :: [OBuilder] -> OBuilder
|
|
||||||
concat = foldl mappend mempty
|
|
||||||
|
|
||||||
#if MIN_VERSION_base(4,11,0)
|
#if MIN_VERSION_base(4,11,0)
|
||||||
instance Semigroup OBuilder where
|
instance Semigroup OBuilder where
|
||||||
(<>) = append
|
(<>) = append
|
||||||
|
@ -94,7 +83,7 @@ instance Output Bool where
|
||||||
output True = string "true"
|
output True = string "true"
|
||||||
|
|
||||||
instance Output a => Output [a] where
|
instance Output a => Output [a] where
|
||||||
output = concat . fmap output
|
output = foldl mappend mempty . fmap output
|
||||||
|
|
||||||
join :: Output a => String -> [a] -> OBuilder
|
join :: Output a => String -> [a] -> OBuilder
|
||||||
join _ [] = mempty
|
join _ [] = mempty
|
||||||
|
@ -102,30 +91,27 @@ join _ [a] = output a
|
||||||
join separator (a:as) =
|
join separator (a:as) =
|
||||||
output a `mappend` string separator `mappend` (join separator as)
|
output a `mappend` string separator `mappend` (join separator as)
|
||||||
|
|
||||||
offset :: (Int -> Int) -> OContext ()
|
|
||||||
offset f = OContext $ modify $ \(Offset o) -> Offset (f o)
|
|
||||||
|
|
||||||
newLine :: OBuilder
|
newLine :: OBuilder
|
||||||
newLine = buildEOL =<< OContext ask
|
newLine = OContext $ buildEOL =<< ask
|
||||||
where
|
where
|
||||||
buildEOL EOL.CR = return (char8 '\r') <* offset (+1)
|
buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
|
||||||
buildEOL EOL.LF = return (char8 '\n') <* offset (+1)
|
buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
|
||||||
buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2)
|
buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
|
||||||
|
|
||||||
|
nextLine :: OBuilder -> OBuilder -> OBuilder
|
||||||
|
nextLine a b = a `mappend` newLine `mappend` b
|
||||||
|
|
||||||
char :: Char -> OBuilder
|
char :: Char -> OBuilder
|
||||||
char c = lift char8 c <* offset (+1)
|
char c = lift char8 c <* OContext (modify (+1))
|
||||||
|
|
||||||
string :: String -> OBuilder
|
string :: String -> OBuilder
|
||||||
string s = lift string8 s <* offset (+ toEnum (length s))
|
string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
|
||||||
|
|
||||||
line :: String -> OBuilder
|
|
||||||
line l = string l `mappend` newLine
|
|
||||||
|
|
||||||
byteString :: ByteString -> OBuilder
|
byteString :: ByteString -> OBuilder
|
||||||
byteString bs = lift B.byteString bs <* offset (+ BS.length bs)
|
byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
|
||||||
|
|
||||||
render :: Output a => EOL.Style -> a -> Lazy.ByteString
|
render :: Output a => EOL.Style -> a -> Lazy.ByteString
|
||||||
render eolStyle a =
|
render eolStyle a =
|
||||||
let OContext builder = output a in
|
let OContext builder = output a in
|
||||||
let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in
|
let (outputByteString, _, _) = runRWS builder eolStyle 0 in
|
||||||
toLazyByteString outputByteString
|
toLazyByteString outputByteString
|
||||||
|
|
|
@ -1,47 +0,0 @@
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
|
||||||
module PDF.Update (
|
|
||||||
unify
|
|
||||||
) where
|
|
||||||
|
|
||||||
import Data.Map (member)
|
|
||||||
import qualified Data.Map as Map (empty, union)
|
|
||||||
import PDF.Object (
|
|
||||||
Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
|
|
||||||
, Structure(..)
|
|
||||||
)
|
|
||||||
|
|
||||||
emptyContent :: Content
|
|
||||||
emptyContent = Content {
|
|
||||||
docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
|
|
||||||
, objects = Map.empty
|
|
||||||
, occurrences = []
|
|
||||||
}
|
|
||||||
|
|
||||||
unify :: [Content] -> Content
|
|
||||||
unify = foldl complete emptyContent
|
|
||||||
where
|
|
||||||
complete tmpContent older =
|
|
||||||
let mergedObjects = Map.union (objects tmpContent) (objects older) in
|
|
||||||
Content {
|
|
||||||
docStructure =
|
|
||||||
unifyDocStructure (docStructure tmpContent) (docStructure older)
|
|
||||||
, objects = mergedObjects
|
|
||||||
, occurrences =
|
|
||||||
unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
|
|
||||||
}
|
|
||||||
|
|
||||||
unifyDocStructure :: Structure -> Structure -> Structure
|
|
||||||
unifyDocStructure update original = Structure {
|
|
||||||
xRef = Map.union (xRef update) (xRef original)
|
|
||||||
, trailer = Map.union (trailer update) (trailer original)
|
|
||||||
}
|
|
||||||
|
|
||||||
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
|
|
||||||
unifyOccurrences objects update = foldr addOlder update
|
|
||||||
where
|
|
||||||
addOlder occurrence@(Comment _) existing = occurrence : existing
|
|
||||||
addOlder occurrence@(Indirect indirect) existing =
|
|
||||||
if objectId indirect `member` objects
|
|
||||||
then occurrence : existing
|
|
||||||
else existing
|
|
||||||
|
|
Loading…
Reference in a new issue