Compare commits
12 commits
Author | SHA1 | Date | |
---|---|---|---|
7eca875900 | |||
380c1e439b | |||
d6994f0813 | |||
68f90d20e2 | |||
3a39c75e6a | |||
29c5823f34 | |||
9ab010de61 | |||
699f830a45 | |||
dd79cb3fc7 | |||
264b0dc92b | |||
9dac275f68 | |||
85e4eb9273 |
9 changed files with 307 additions and 126 deletions
14
ChangeLog.md
14
ChangeLog.md
|
@ -1,5 +1,19 @@
|
||||||
# Revision history for Hufflepdf
|
# Revision history for Hufflepdf
|
||||||
|
|
||||||
|
## 0.2.0.1 -- 2019-11-27
|
||||||
|
|
||||||
|
* Fix bug discovered while running Hufflepdf on a PDF output from pdftk : magic keywords like `obj`, `stream` or `xref` can have spaces after them before the EOL
|
||||||
|
|
||||||
|
## 0.2.0.0 -- 2019-10-14
|
||||||
|
|
||||||
|
* Implement PDF's multilayer update mechanism
|
||||||
|
|
||||||
|
## 0.1.1.1 -- 2019-05-31
|
||||||
|
|
||||||
|
* Fix a bug preventing files with the «trailer» keyword on the same line as the dictionary that follows it to be parsed
|
||||||
|
* Improve error messages
|
||||||
|
* Support empty lines as comments
|
||||||
|
|
||||||
## 0.1.1.0 -- 2019-05-23
|
## 0.1.1.0 -- 2019-05-23
|
||||||
|
|
||||||
* Rewrite the parser using attoparsec instead of parsec (about 1.5x faster)
|
* Rewrite the parser using attoparsec instead of parsec (about 1.5x faster)
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
-- documentation, see http://haskell.org/cabal/users-guide/
|
-- documentation, see http://haskell.org/cabal/users-guide/
|
||||||
|
|
||||||
name: Hufflepdf
|
name: Hufflepdf
|
||||||
version: 0.1.0.0
|
version: 0.2.0.1
|
||||||
synopsis: A PDF parser
|
synopsis: A PDF parser
|
||||||
-- description:
|
-- description:
|
||||||
license: BSD3
|
license: BSD3
|
||||||
|
@ -20,6 +20,7 @@ library
|
||||||
, PDF.EOL
|
, PDF.EOL
|
||||||
, PDF.Object
|
, PDF.Object
|
||||||
, PDF.Output
|
, PDF.Output
|
||||||
|
, PDF.Update
|
||||||
other-modules: Data.ByteString.Char8.Util
|
other-modules: Data.ByteString.Char8.Util
|
||||||
, PDF.Body
|
, PDF.Body
|
||||||
, PDF.Parser
|
, PDF.Parser
|
||||||
|
@ -32,3 +33,21 @@ library
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
ghc-options: -Wall
|
ghc-options: -Wall
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
executable equivalent
|
||||||
|
main-is: examples/equivalent.hs
|
||||||
|
build-depends: base
|
||||||
|
, bytestring
|
||||||
|
, Hufflepdf
|
||||||
|
ghc-options: -Wall
|
||||||
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
executable getObj
|
||||||
|
main-is: examples/getObj.hs
|
||||||
|
build-depends: base
|
||||||
|
, bytestring
|
||||||
|
, containers
|
||||||
|
, Hufflepdf
|
||||||
|
, zlib
|
||||||
|
ghc-options: -Wall
|
||||||
|
default-language: Haskell2010
|
||||||
|
|
13
examples/equivalent.hs
Normal file
13
examples/equivalent.hs
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
|
||||||
|
import PDF (parseDocument, render)
|
||||||
|
import System.Environment (getArgs)
|
||||||
|
import System.IO (hPutStrLn, stderr)
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
[inputFile, outputFile] <- getArgs
|
||||||
|
result <- parseDocument <$> BS.readFile inputFile
|
||||||
|
case result of
|
||||||
|
Left parseError -> hPutStrLn stderr parseError
|
||||||
|
Right doc -> Lazy.writeFile outputFile $ render doc
|
53
examples/getObj.hs
Normal file
53
examples/getObj.hs
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
|
|
||||||
|
import Codec.Compression.Zlib (decompress)
|
||||||
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
|
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
|
||||||
|
import Data.Map ((!?))
|
||||||
|
import qualified Data.Map as Map (keys, lookup)
|
||||||
|
import PDF (Document(..), parseDocument)
|
||||||
|
import qualified PDF.EOL as EOL (Style)
|
||||||
|
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
|
||||||
|
import PDF.Output (ObjectId(..))
|
||||||
|
import qualified PDF.Output as Output (render)
|
||||||
|
import PDF.Update (unify)
|
||||||
|
import System.Environment (getArgs, getProgName)
|
||||||
|
import System.Exit (die)
|
||||||
|
import Text.Printf (printf)
|
||||||
|
|
||||||
|
display :: EOL.Style -> Object -> ByteString
|
||||||
|
display eolStyle d@(Direct _) = Output.render eolStyle d
|
||||||
|
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
|
||||||
|
case Map.lookup (Name "Filter") header of
|
||||||
|
Just (NameObject (Name "FlateDecode")) -> Stream {
|
||||||
|
header
|
||||||
|
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
|
||||||
|
}
|
||||||
|
_ -> s
|
||||||
|
|
||||||
|
extractObject :: ObjectId -> Document -> Either String ByteString
|
||||||
|
extractObject objectId (Document {eolStyle, updates}) =
|
||||||
|
case objects content !? objectId of
|
||||||
|
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
|
||||||
|
Just o -> Right $ display eolStyle o
|
||||||
|
where
|
||||||
|
content = unify updates
|
||||||
|
|
||||||
|
listObjectIds :: Document -> Either String [String]
|
||||||
|
listObjectIds =
|
||||||
|
Right . prependTitle . toString . Map.keys . objects . unify . updates
|
||||||
|
where
|
||||||
|
toString = fmap (show . getObjectId)
|
||||||
|
prependTitle = ("ObjectIds defined in this PDF:":)
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
(inputFile, getData) <- parse =<< getArgs
|
||||||
|
input <- BS.readFile inputFile
|
||||||
|
either die id $ (parseDocument input >>= getData)
|
||||||
|
where
|
||||||
|
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
|
||||||
|
parse [inputFile, objectId] = return
|
||||||
|
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
|
||||||
|
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
|
16
src/PDF.hs
16
src/PDF.hs
|
@ -19,21 +19,21 @@ import PDF.Object (
|
||||||
, Structure(..)
|
, Structure(..)
|
||||||
, eofMarker, magicNumber, structure
|
, eofMarker, magicNumber, structure
|
||||||
)
|
)
|
||||||
import qualified PDF.Output as Output (render, string)
|
import qualified PDF.Output as Output (render, line)
|
||||||
import PDF.Output (Output(..), nextLine)
|
import PDF.Output (Output(..))
|
||||||
import PDF.Parser (Parser, runParser, string, takeAll)
|
import PDF.Parser (Parser, runParser, string, takeAll)
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
data Document = Document {
|
data Document = Document {
|
||||||
pdfVersion :: String
|
pdfVersion :: String
|
||||||
, eolStyle :: EOL.Style
|
, eolStyle :: EOL.Style
|
||||||
, contents :: [Content]
|
, updates :: [Content]
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output Document where
|
instance Output Document where
|
||||||
output (Document {pdfVersion, contents}) =
|
output (Document {pdfVersion, updates}) =
|
||||||
Output.string (printf "%%PDF-%s" pdfVersion)
|
Output.line (printf "%%PDF-%s" pdfVersion)
|
||||||
`nextLine` output contents
|
`mappend` output updates
|
||||||
|
|
||||||
render :: Document -> Lazy.ByteString
|
render :: Document -> Lazy.ByteString
|
||||||
render document@(Document {eolStyle}) = Output.render eolStyle document
|
render document@(Document {eolStyle}) = Output.render eolStyle document
|
||||||
|
@ -99,5 +99,5 @@ parseDocument input = do
|
||||||
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
|
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
|
||||||
startXref <- readStartXref eolStyle input
|
startXref <- readStartXref eolStyle input
|
||||||
structuresRead <- readStructures startXref input
|
structuresRead <- readStructures startXref input
|
||||||
let contents = populate input <$> structuresRead
|
let updates = populate input <$> structuresRead
|
||||||
return $ Document {pdfVersion, eolStyle, contents}
|
return $ Document {pdfVersion, eolStyle, updates}
|
||||||
|
|
|
@ -7,17 +7,18 @@ module PDF.Body (
|
||||||
import Control.Applicative ((<|>))
|
import Control.Applicative ((<|>))
|
||||||
import Control.Monad.State (get, gets, modify)
|
import Control.Monad.State (get, gets, modify)
|
||||||
import Data.ByteString.Char8 (ByteString)
|
import Data.ByteString.Char8 (ByteString)
|
||||||
import qualified Data.ByteString.Char8 as BS (drop, unpack)
|
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
|
||||||
import Data.Map ((!))
|
import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (empty, insert, lookup)
|
import qualified Data.Map as Map (empty, insert, lookup)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
||||||
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
||||||
, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
|
, Structure(..), XRefEntry(..), XRefSection
|
||||||
, blank, dictionary, directObject, integer, line
|
, blank, dictionary, directObject, integer, line
|
||||||
)
|
)
|
||||||
import PDF.Parser (Parser, block, char, on, runParser, takeAll)
|
import PDF.Output (ObjectId(..), Offset(..))
|
||||||
|
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
|
||||||
|
|
||||||
data UserState = UserState {
|
data UserState = UserState {
|
||||||
input :: ByteString
|
input :: ByteString
|
||||||
|
@ -30,7 +31,7 @@ type SParser = Parser UserState
|
||||||
modifyFlow :: (Flow -> Flow) -> SParser ()
|
modifyFlow :: (Flow -> Flow) -> SParser ()
|
||||||
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
||||||
|
|
||||||
addObject :: Int -> Object -> SParser ()
|
addObject :: ObjectId -> Object -> SParser ()
|
||||||
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
||||||
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
||||||
}
|
}
|
||||||
|
@ -41,32 +42,24 @@ pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
|
||||||
}
|
}
|
||||||
|
|
||||||
comment :: Parser u String
|
comment :: Parser u String
|
||||||
comment = BS.unpack <$>
|
comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
|
||||||
(char '%' *> takeAll (not . (`elem` EOL.charset)) <* EOL.parser)
|
where
|
||||||
|
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
|
||||||
|
|
||||||
lookupOffset :: Int -> XRefSection -> Maybe Int
|
lookupOffset :: ObjectId -> SParser Offset
|
||||||
lookupOffset _ [] = Nothing
|
lookupOffset objectId = do
|
||||||
lookupOffset objectId (xrefSubSection:others) =
|
|
||||||
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
|
|
||||||
let index = objectId - firstObjectId in
|
|
||||||
if index >= 0 && index < entriesNumber
|
|
||||||
then
|
|
||||||
case Map.lookup index entries of
|
|
||||||
Just (InUse {offset}) -> Just offset
|
|
||||||
_ -> Nothing
|
|
||||||
else lookupOffset objectId others
|
|
||||||
|
|
||||||
getOffset :: Int -> SParser Int
|
|
||||||
getOffset objectId = do
|
|
||||||
table <- gets xreferences
|
table <- gets xreferences
|
||||||
case lookupOffset objectId table of
|
case Map.lookup objectId table >>= entryOffset of
|
||||||
Nothing -> fail $
|
Nothing -> fail $
|
||||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||||
Just offset -> return offset
|
Just offset -> return offset
|
||||||
|
where
|
||||||
|
entryOffset (InUse {offset}) = Just offset
|
||||||
|
entryOffset _ = Nothing
|
||||||
|
|
||||||
loadNumber :: Int -> SParser Float
|
loadNumber :: ObjectId -> SParser Double
|
||||||
loadNumber objectId = do
|
loadNumber objectId = do
|
||||||
offset <- getOffset objectId
|
offset <- getOffset <$> lookupOffset objectId
|
||||||
objectStart <- BS.drop offset <$> gets input
|
objectStart <- BS.drop offset <$> gets input
|
||||||
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
|
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
|
||||||
objectValue <- (!objectId) . tmpObjects <$> gets flow
|
objectValue <- (!objectId) . tmpObjects <$> gets flow
|
||||||
|
@ -77,7 +70,7 @@ loadNumber objectId = do
|
||||||
invalidValue :: Object -> String
|
invalidValue :: Object -> String
|
||||||
invalidValue v = "Invalid value " ++ show v
|
invalidValue v = "Invalid value " ++ show v
|
||||||
|
|
||||||
getSize :: Maybe DirectObject -> SParser Float
|
getSize :: Maybe DirectObject -> SParser Double
|
||||||
getSize Nothing = fail "Missing '/Length' key on stream"
|
getSize Nothing = fail "Missing '/Length' key on stream"
|
||||||
getSize (Just (NumberObject (Number size))) = return size
|
getSize (Just (NumberObject (Number size))) = return size
|
||||||
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
||||||
|
@ -103,14 +96,15 @@ object = streamObject <|> Direct <$> directObject
|
||||||
|
|
||||||
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
||||||
indirectObjCoordinates = do
|
indirectObjCoordinates = do
|
||||||
objectId <- integer
|
objectId <- ObjectId <$> integer
|
||||||
coordinates <- IndirectObjCoordinates objectId <$> integer
|
coordinates <- IndirectObjCoordinates objectId <$> integer
|
||||||
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
||||||
addObject objectId objectValue
|
addObject objectId objectValue
|
||||||
return coordinates
|
return coordinates
|
||||||
|
|
||||||
occurrence :: SParser Occurrence
|
occurrence :: SParser Occurrence
|
||||||
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
occurrence =
|
||||||
|
Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"
|
||||||
|
|
||||||
populate :: ByteString -> InputStructure -> Content
|
populate :: ByteString -> InputStructure -> Content
|
||||||
populate input structure =
|
populate input structure =
|
||||||
|
@ -124,7 +118,7 @@ populate input structure =
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
docStructure = inputStructure structure
|
docStructure = inputStructure structure
|
||||||
xreferences = xrefSection docStructure
|
xreferences = xRef docStructure
|
||||||
initialState = UserState {
|
initialState = UserState {
|
||||||
input, xreferences, flow = Flow {
|
input, xreferences, flow = Flow {
|
||||||
occurrencesStack = [], tmpObjects = Map.empty
|
occurrencesStack = [], tmpObjects = Map.empty
|
||||||
|
|
|
@ -5,6 +5,7 @@ module PDF.Object (
|
||||||
Content(..)
|
Content(..)
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
, Flow(..)
|
, Flow(..)
|
||||||
|
, IndexedObjects
|
||||||
, IndirectObjCoordinates(..)
|
, IndirectObjCoordinates(..)
|
||||||
, InputStructure(..)
|
, InputStructure(..)
|
||||||
, Name(..)
|
, Name(..)
|
||||||
|
@ -14,7 +15,6 @@ module PDF.Object (
|
||||||
, Structure(..)
|
, Structure(..)
|
||||||
, XRefEntry(..)
|
, XRefEntry(..)
|
||||||
, XRefSection
|
, XRefSection
|
||||||
, XRefSubSection(..)
|
|
||||||
, blank
|
, blank
|
||||||
, dictionary
|
, dictionary
|
||||||
, directObject
|
, directObject
|
||||||
|
@ -31,12 +31,15 @@ import qualified Data.ByteString.Char8 as BS (
|
||||||
concat, cons, pack, singleton, unpack
|
concat, cons, pack, singleton, unpack
|
||||||
)
|
)
|
||||||
import Data.Map (Map, (!), mapWithKey)
|
import Data.Map (Map, (!), mapWithKey)
|
||||||
import qualified Data.Map as Map (elems, fromList, toList)
|
import qualified Data.Map as Map (
|
||||||
|
delete, empty, fromList, lookup, minViewWithKey, toList, union
|
||||||
|
)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import qualified PDF.Output as Output (string)
|
import qualified PDF.Output as Output (concat, line, string)
|
||||||
import PDF.Output (
|
import PDF.Output (
|
||||||
OBuilder, Offset(..), Output(..)
|
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
|
||||||
, byteString, getOffsets, join, newLine, nextLine, saveOffset
|
, byteString, getObjectId, getOffset, getOffsets, join, newLine
|
||||||
|
, saveOffset
|
||||||
)
|
)
|
||||||
import PDF.Parser (
|
import PDF.Parser (
|
||||||
Parser, (<?>)
|
Parser, (<?>)
|
||||||
|
@ -46,7 +49,7 @@ import PDF.Parser (
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
line :: String -> Parser u ()
|
line :: String -> Parser u ()
|
||||||
line l = string (BS.pack l) *> EOL.parser *> return () <?> printf "line «%s»" l
|
line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
|
||||||
|
|
||||||
magicNumber :: ByteString
|
magicNumber :: ByteString
|
||||||
magicNumber = "%PDF-"
|
magicNumber = "%PDF-"
|
||||||
|
@ -73,6 +76,8 @@ integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
|
||||||
-- OBJECTS
|
-- OBJECTS
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
|
type IndexedObjects = Map ObjectId Object
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Boolean
|
-- Boolean
|
||||||
--
|
--
|
||||||
|
@ -83,7 +88,7 @@ boolean =
|
||||||
--
|
--
|
||||||
-- Number
|
-- Number
|
||||||
--
|
--
|
||||||
newtype Number = Number Float deriving Show
|
newtype Number = Number Double deriving Show
|
||||||
|
|
||||||
instance Output Number where
|
instance Output Number where
|
||||||
output (Number f) = Output.string $
|
output (Number f) = Output.string $
|
||||||
|
@ -130,7 +135,7 @@ stringObject =
|
||||||
newtype Name = Name String deriving (Eq, Ord, Show)
|
newtype Name = Name String deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
instance Output Name where
|
instance Output Name where
|
||||||
output (Name n) = "/" `mappend` Output.string n
|
output (Name n) = Output.string ('/':n)
|
||||||
|
|
||||||
name :: Parser u Name
|
name :: Parser u Name
|
||||||
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
|
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
|
||||||
|
@ -153,7 +158,7 @@ instance Output Dictionary where
|
||||||
where
|
where
|
||||||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||||||
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
||||||
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
|
outputKeyVal (key, val) = Output.concat [output key, " ", output val]
|
||||||
|
|
||||||
dictionary :: Parser u Dictionary
|
dictionary :: Parser u Dictionary
|
||||||
dictionary =
|
dictionary =
|
||||||
|
@ -172,12 +177,13 @@ nullObject = string "null" *> return () <?> "null object"
|
||||||
-- Reference
|
-- Reference
|
||||||
--
|
--
|
||||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||||
objectId :: Int
|
objectId :: ObjectId
|
||||||
, versionNumber :: Int
|
, versionNumber :: Int
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
reference :: Parser u IndirectObjCoordinates
|
reference :: Parser u IndirectObjCoordinates
|
||||||
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
|
reference = IndirectObjCoordinates
|
||||||
|
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
|
||||||
|
|
||||||
--
|
--
|
||||||
-- DirectObject
|
-- DirectObject
|
||||||
|
@ -198,11 +204,11 @@ instance Output DirectObject where
|
||||||
output (NumberObject n) = output n
|
output (NumberObject n) = output n
|
||||||
output (StringObject s) = output s
|
output (StringObject s) = output s
|
||||||
output (NameObject n) = output n
|
output (NameObject n) = output n
|
||||||
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
output (Array a) = Output.concat ["[", join " " a, "]"]
|
||||||
output (Dictionary d) = output d
|
output (Dictionary d) = output d
|
||||||
output (Null) = "null"
|
output (Null) = "null"
|
||||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
Output.string (printf "%d %d R" objectId versionNumber)
|
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
|
||||||
|
|
||||||
directObject :: Parser u DirectObject
|
directObject :: Parser u DirectObject
|
||||||
directObject =
|
directObject =
|
||||||
|
@ -229,25 +235,26 @@ data Object =
|
||||||
|
|
||||||
instance Output Object where
|
instance Output Object where
|
||||||
output (Direct d) = output d
|
output (Direct d) = output d
|
||||||
output (Stream {header, streamContent}) =
|
output (Stream {header, streamContent}) = Output.concat [
|
||||||
output header
|
output header, newLine
|
||||||
`nextLine` "stream"
|
, Output.line "stream"
|
||||||
`nextLine` byteString streamContent
|
, byteString streamContent
|
||||||
`mappend` "endstream"
|
, "endstream"
|
||||||
|
]
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Occurrence
|
-- Occurrence
|
||||||
--
|
--
|
||||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||||
|
|
||||||
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
|
||||||
outputOccurrence _ (Comment c) =
|
outputOccurrence _ (Comment c) = Output.line c
|
||||||
Output.string (printf "%%%s" c) `mappend` newLine
|
|
||||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
saveOffset (ObjectId objectId)
|
saveOffset (Object objectId) >> Output.concat [
|
||||||
>> Output.string (printf "%d %d obj" objectId versionNumber)
|
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
|
||||||
`nextLine` output (objects ! objectId)
|
, output (objects ! objectId), newLine
|
||||||
`nextLine` "endobj" `mappend` newLine
|
, Output.line "endobj"
|
||||||
|
]
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
-- XREF TABLE
|
-- XREF TABLE
|
||||||
|
@ -257,18 +264,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
|
||||||
-- XRefEntry
|
-- XRefEntry
|
||||||
--
|
--
|
||||||
data XRefEntry = InUse {
|
data XRefEntry = InUse {
|
||||||
offset :: Int
|
offset :: Offset
|
||||||
, generation :: Int
|
, generation :: Int
|
||||||
} | Free {
|
} | Free {
|
||||||
nextFree :: Int
|
nextFree :: ObjectId
|
||||||
, generation :: Int
|
, generation :: Int
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output XRefEntry where
|
instance Output XRefEntry where
|
||||||
output (InUse {offset, generation}) =
|
output (InUse {offset, generation}) =
|
||||||
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
Output.line (printf "%010d %05d n " (getOffset offset) generation)
|
||||||
output (Free {nextFree, generation}) =
|
output (Free {nextFree, generation}) =
|
||||||
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
|
||||||
|
|
||||||
entry :: Parser u XRefEntry
|
entry :: Parser u XRefEntry
|
||||||
entry = do
|
entry = do
|
||||||
|
@ -276,31 +283,56 @@ entry = do
|
||||||
(inUse big small <|> free big small <?> "XRef entry") <* blank
|
(inUse big small <|> free big small <?> "XRef entry") <* blank
|
||||||
where
|
where
|
||||||
inUse :: Int -> Int -> Parser u XRefEntry
|
inUse :: Int -> Int -> Parser u XRefEntry
|
||||||
inUse offset generation = char 'n' *> return (InUse {offset, generation})
|
inUse big generation =
|
||||||
|
char 'n' *> return (InUse {offset = Offset big, generation})
|
||||||
free :: Int -> Int -> Parser u XRefEntry
|
free :: Int -> Int -> Parser u XRefEntry
|
||||||
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
free big generation =
|
||||||
|
char 'f' *> return (Free {nextFree = ObjectId big, generation})
|
||||||
|
|
||||||
--
|
--
|
||||||
-- XRefSubSection
|
-- XRefSubSection
|
||||||
--
|
--
|
||||||
data XRefSubSection = XRefSubSection {
|
data XRefSubSection = XRefSubSection {
|
||||||
firstObjectId :: Int
|
firstObjectId :: ObjectId
|
||||||
, entriesNumber :: Int
|
, entries :: [XRefEntry]
|
||||||
, entries :: Map Int XRefEntry
|
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output XRefSubSection where
|
instance Output XRefSubSection where
|
||||||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
output (XRefSubSection {firstObjectId, entries}) =
|
||||||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
|
||||||
`nextLine` output (Map.elems entries)
|
`mappend` output entries
|
||||||
|
|
||||||
xrefSubSection :: Parser u XRefSubSection
|
xRefSubSection :: Parser u XRefSubSection
|
||||||
xrefSubSection = do
|
xRefSubSection = do
|
||||||
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
||||||
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
entries <- count entriesNumber entry
|
||||||
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
|
||||||
|
|
||||||
type XRefSection = [XRefSubSection]
|
type XRefSection = Map ObjectId XRefEntry
|
||||||
|
|
||||||
|
instance Output XRefSection where
|
||||||
|
output = output . sections
|
||||||
|
where
|
||||||
|
sections tmp =
|
||||||
|
case Map.minViewWithKey tmp of
|
||||||
|
Nothing -> []
|
||||||
|
Just ((objectId@(ObjectId value), firstEntry), rest) ->
|
||||||
|
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
|
||||||
|
subSection : sections sndRest
|
||||||
|
section firstObjectId stack nextValue tmp =
|
||||||
|
let nextId = ObjectId nextValue in
|
||||||
|
case Map.lookup nextId tmp of
|
||||||
|
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
|
||||||
|
Just nextEntry ->
|
||||||
|
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
|
||||||
|
|
||||||
|
|
||||||
|
xRefSection :: Parser u XRefSection
|
||||||
|
xRefSection = foldr addSubsection Map.empty <$>
|
||||||
|
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
|
||||||
|
where
|
||||||
|
addSubsection (XRefSubSection {firstObjectId, entries}) =
|
||||||
|
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Structure
|
-- Structure
|
||||||
|
@ -311,34 +343,28 @@ data InputStructure = InputStructure {
|
||||||
}
|
}
|
||||||
|
|
||||||
data Structure = Structure {
|
data Structure = Structure {
|
||||||
xrefSection :: XRefSection
|
xRef :: XRefSection
|
||||||
, trailer :: Dictionary
|
, trailer :: Dictionary
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
structure :: Parser u Structure
|
structure :: Parser u Structure
|
||||||
structure =
|
structure =
|
||||||
Structure
|
Structure
|
||||||
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
<$> xRefSection
|
||||||
<*> (line "trailer" *> dictionary <* EOL.parser)
|
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
|
||||||
|
|
||||||
updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
|
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
|
||||||
updateXrefs xrefSection offsets = (
|
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
|
||||||
updateSubSection <$> xrefSection
|
|
||||||
, offsets ! StartXRef
|
|
||||||
)
|
|
||||||
where
|
where
|
||||||
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
|
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
|
||||||
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
|
updateEntry _ e = e
|
||||||
updateEntry firstObjectId index e@(InUse {}) =
|
|
||||||
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
|
|
||||||
updateEntry _ _ e = e
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Flow
|
-- Flow
|
||||||
--
|
--
|
||||||
data Flow = Flow {
|
data Flow = Flow {
|
||||||
occurrencesStack :: [Occurrence]
|
occurrencesStack :: [Occurrence]
|
||||||
, tmpObjects :: Map Int Object
|
, tmpObjects :: IndexedObjects
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
--
|
--
|
||||||
|
@ -346,25 +372,26 @@ data Flow = Flow {
|
||||||
--
|
--
|
||||||
data Content = Content {
|
data Content = Content {
|
||||||
occurrences :: [Occurrence]
|
occurrences :: [Occurrence]
|
||||||
, objects :: Map Int Object
|
, objects :: IndexedObjects
|
||||||
, docStructure :: Structure
|
, docStructure :: Structure
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
|
||||||
outputBody (occurrences, objects) =
|
outputBody (occurrences, objects) =
|
||||||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||||
|
|
||||||
instance Output Content where
|
instance Output Content where
|
||||||
output (Content {occurrences, objects, docStructure}) =
|
output (Content {occurrences, objects, docStructure}) =
|
||||||
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
|
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
|
||||||
>>= \(body, (xref, startXRef)) ->
|
>>= \(body, (xref, startXRef)) -> Output.concat [
|
||||||
body
|
body
|
||||||
`mappend` "xref"
|
, Output.line "xref"
|
||||||
`nextLine` output xref
|
, output xref
|
||||||
`mappend` "trailer"
|
, Output.line "trailer"
|
||||||
`nextLine` output trailer
|
, output trailer, newLine
|
||||||
`nextLine` "startxref"
|
, Output.line "startxref"
|
||||||
`nextLine` (Output.string (printf "%d" startXRef))
|
, Output.line (printf "%d" (getOffset startXRef))
|
||||||
`nextLine` byteString eofMarker
|
, byteString eofMarker
|
||||||
|
]
|
||||||
where
|
where
|
||||||
Structure {xrefSection, trailer} = docStructure
|
Structure {xRef, trailer} = docStructure
|
||||||
|
|
|
@ -2,17 +2,21 @@
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE AllowAmbiguousTypes #-}
|
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||||
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
module PDF.Output (
|
module PDF.Output (
|
||||||
OBuilder
|
OBuilder
|
||||||
|
, ObjectId(..)
|
||||||
, OContext(..)
|
, OContext(..)
|
||||||
, Offset(..)
|
, Offset(..)
|
||||||
, Output(..)
|
, Output(..)
|
||||||
|
, Resource(..)
|
||||||
, byteString
|
, byteString
|
||||||
, char
|
, char
|
||||||
|
, concat
|
||||||
, getOffsets
|
, getOffsets
|
||||||
, join
|
, join
|
||||||
|
, line
|
||||||
, newLine
|
, newLine
|
||||||
, nextLine
|
|
||||||
, saveOffset
|
, saveOffset
|
||||||
, string
|
, string
|
||||||
, render
|
, render
|
||||||
|
@ -28,10 +32,14 @@ import qualified Data.Map as Map (singleton)
|
||||||
import Data.String (IsString(..))
|
import Data.String (IsString(..))
|
||||||
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
||||||
import qualified PDF.EOL as EOL (Style(..))
|
import qualified PDF.EOL as EOL (Style(..))
|
||||||
|
import Prelude hiding (concat)
|
||||||
|
|
||||||
data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
|
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
|
||||||
|
newtype Offset = Offset {getOffset :: Int} deriving (Show)
|
||||||
|
|
||||||
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
|
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
|
||||||
|
|
||||||
|
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
|
||||||
type OBuilder = OContext Builder
|
type OBuilder = OContext Builder
|
||||||
|
|
||||||
instance Functor OContext where
|
instance Functor OContext where
|
||||||
|
@ -44,20 +52,23 @@ instance Applicative OContext where
|
||||||
instance Monad OContext where
|
instance Monad OContext where
|
||||||
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
||||||
|
|
||||||
saveOffset :: Offset -> OContext ()
|
saveOffset :: Resource -> OContext ()
|
||||||
saveOffset offset = OContext $
|
saveOffset resource = OContext $
|
||||||
get >>= tell . Map.singleton offset
|
get >>= tell . Map.singleton resource
|
||||||
|
|
||||||
lift :: (a -> Builder) -> a -> OBuilder
|
lift :: (a -> Builder) -> a -> OBuilder
|
||||||
lift f a = return (f a)
|
lift f a = return (f a)
|
||||||
|
|
||||||
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
|
getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
|
||||||
getOffsets (OContext builder) =
|
getOffsets (OContext builder) =
|
||||||
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
||||||
|
|
||||||
append :: OBuilder -> OBuilder -> OBuilder
|
append :: OBuilder -> OBuilder -> OBuilder
|
||||||
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
||||||
|
|
||||||
|
concat :: [OBuilder] -> OBuilder
|
||||||
|
concat = foldl mappend mempty
|
||||||
|
|
||||||
#if MIN_VERSION_base(4,11,0)
|
#if MIN_VERSION_base(4,11,0)
|
||||||
instance Semigroup OBuilder where
|
instance Semigroup OBuilder where
|
||||||
(<>) = append
|
(<>) = append
|
||||||
|
@ -83,7 +94,7 @@ instance Output Bool where
|
||||||
output True = string "true"
|
output True = string "true"
|
||||||
|
|
||||||
instance Output a => Output [a] where
|
instance Output a => Output [a] where
|
||||||
output = foldl mappend mempty . fmap output
|
output = concat . fmap output
|
||||||
|
|
||||||
join :: Output a => String -> [a] -> OBuilder
|
join :: Output a => String -> [a] -> OBuilder
|
||||||
join _ [] = mempty
|
join _ [] = mempty
|
||||||
|
@ -91,27 +102,30 @@ join _ [a] = output a
|
||||||
join separator (a:as) =
|
join separator (a:as) =
|
||||||
output a `mappend` string separator `mappend` (join separator as)
|
output a `mappend` string separator `mappend` (join separator as)
|
||||||
|
|
||||||
newLine :: OBuilder
|
offset :: (Int -> Int) -> OContext ()
|
||||||
newLine = OContext $ buildEOL =<< ask
|
offset f = OContext $ modify $ \(Offset o) -> Offset (f o)
|
||||||
where
|
|
||||||
buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
|
|
||||||
buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
|
|
||||||
buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
|
|
||||||
|
|
||||||
nextLine :: OBuilder -> OBuilder -> OBuilder
|
newLine :: OBuilder
|
||||||
nextLine a b = a `mappend` newLine `mappend` b
|
newLine = buildEOL =<< OContext ask
|
||||||
|
where
|
||||||
|
buildEOL EOL.CR = return (char8 '\r') <* offset (+1)
|
||||||
|
buildEOL EOL.LF = return (char8 '\n') <* offset (+1)
|
||||||
|
buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2)
|
||||||
|
|
||||||
char :: Char -> OBuilder
|
char :: Char -> OBuilder
|
||||||
char c = lift char8 c <* OContext (modify (+1))
|
char c = lift char8 c <* offset (+1)
|
||||||
|
|
||||||
string :: String -> OBuilder
|
string :: String -> OBuilder
|
||||||
string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
|
string s = lift string8 s <* offset (+ toEnum (length s))
|
||||||
|
|
||||||
|
line :: String -> OBuilder
|
||||||
|
line l = string l `mappend` newLine
|
||||||
|
|
||||||
byteString :: ByteString -> OBuilder
|
byteString :: ByteString -> OBuilder
|
||||||
byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
|
byteString bs = lift B.byteString bs <* offset (+ BS.length bs)
|
||||||
|
|
||||||
render :: Output a => EOL.Style -> a -> Lazy.ByteString
|
render :: Output a => EOL.Style -> a -> Lazy.ByteString
|
||||||
render eolStyle a =
|
render eolStyle a =
|
||||||
let OContext builder = output a in
|
let OContext builder = output a in
|
||||||
let (outputByteString, _, _) = runRWS builder eolStyle 0 in
|
let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in
|
||||||
toLazyByteString outputByteString
|
toLazyByteString outputByteString
|
||||||
|
|
47
src/PDF/Update.hs
Normal file
47
src/PDF/Update.hs
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
|
module PDF.Update (
|
||||||
|
unify
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Map (member)
|
||||||
|
import qualified Data.Map as Map (empty, union)
|
||||||
|
import PDF.Object (
|
||||||
|
Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
|
||||||
|
, Structure(..)
|
||||||
|
)
|
||||||
|
|
||||||
|
emptyContent :: Content
|
||||||
|
emptyContent = Content {
|
||||||
|
docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
|
||||||
|
, objects = Map.empty
|
||||||
|
, occurrences = []
|
||||||
|
}
|
||||||
|
|
||||||
|
unify :: [Content] -> Content
|
||||||
|
unify = foldl complete emptyContent
|
||||||
|
where
|
||||||
|
complete tmpContent older =
|
||||||
|
let mergedObjects = Map.union (objects tmpContent) (objects older) in
|
||||||
|
Content {
|
||||||
|
docStructure =
|
||||||
|
unifyDocStructure (docStructure tmpContent) (docStructure older)
|
||||||
|
, objects = mergedObjects
|
||||||
|
, occurrences =
|
||||||
|
unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
|
||||||
|
}
|
||||||
|
|
||||||
|
unifyDocStructure :: Structure -> Structure -> Structure
|
||||||
|
unifyDocStructure update original = Structure {
|
||||||
|
xRef = Map.union (xRef update) (xRef original)
|
||||||
|
, trailer = Map.union (trailer update) (trailer original)
|
||||||
|
}
|
||||||
|
|
||||||
|
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
|
||||||
|
unifyOccurrences objects update = foldr addOlder update
|
||||||
|
where
|
||||||
|
addOlder occurrence@(Comment _) existing = occurrence : existing
|
||||||
|
addOlder occurrence@(Indirect indirect) existing =
|
||||||
|
if objectId indirect `member` objects
|
||||||
|
then occurrence : existing
|
||||||
|
else existing
|
||||||
|
|
Loading…
Reference in a new issue