Compare commits
No commits in common. "main" and "v0.1.1.1" have entirely different histories.
9 changed files with 119 additions and 292 deletions
|
@ -1,13 +1,5 @@
|
|||
# Revision history for Hufflepdf
|
||||
|
||||
## 0.2.0.1 -- 2019-11-27
|
||||
|
||||
* Fix bug discovered while running Hufflepdf on a PDF output from pdftk : magic keywords like `obj`, `stream` or `xref` can have spaces after them before the EOL
|
||||
|
||||
## 0.2.0.0 -- 2019-10-14
|
||||
|
||||
* Implement PDF's multilayer update mechanism
|
||||
|
||||
## 0.1.1.1 -- 2019-05-31
|
||||
|
||||
* Fix a bug preventing files with the «trailer» keyword on the same line as the dictionary that follows it to be parsed
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
-- documentation, see http://haskell.org/cabal/users-guide/
|
||||
|
||||
name: Hufflepdf
|
||||
version: 0.2.0.1
|
||||
version: 0.1.1.1
|
||||
synopsis: A PDF parser
|
||||
-- description:
|
||||
license: BSD3
|
||||
|
@ -20,7 +20,6 @@ library
|
|||
, PDF.EOL
|
||||
, PDF.Object
|
||||
, PDF.Output
|
||||
, PDF.Update
|
||||
other-modules: Data.ByteString.Char8.Util
|
||||
, PDF.Body
|
||||
, PDF.Parser
|
||||
|
@ -33,21 +32,3 @@ library
|
|||
hs-source-dirs: src
|
||||
ghc-options: -Wall
|
||||
default-language: Haskell2010
|
||||
|
||||
executable equivalent
|
||||
main-is: examples/equivalent.hs
|
||||
build-depends: base
|
||||
, bytestring
|
||||
, Hufflepdf
|
||||
ghc-options: -Wall
|
||||
default-language: Haskell2010
|
||||
|
||||
executable getObj
|
||||
main-is: examples/getObj.hs
|
||||
build-depends: base
|
||||
, bytestring
|
||||
, containers
|
||||
, Hufflepdf
|
||||
, zlib
|
||||
ghc-options: -Wall
|
||||
default-language: Haskell2010
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
|
||||
import PDF (parseDocument, render)
|
||||
import System.Environment (getArgs)
|
||||
import System.IO (hPutStrLn, stderr)
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
[inputFile, outputFile] <- getArgs
|
||||
result <- parseDocument <$> BS.readFile inputFile
|
||||
case result of
|
||||
Left parseError -> hPutStrLn stderr parseError
|
||||
Right doc -> Lazy.writeFile outputFile $ render doc
|
|
@ -1,53 +0,0 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
|
||||
import Codec.Compression.Zlib (decompress)
|
||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Char8 as BS (readFile)
|
||||
import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
|
||||
import Data.Map ((!?))
|
||||
import qualified Data.Map as Map (keys, lookup)
|
||||
import PDF (Document(..), parseDocument)
|
||||
import qualified PDF.EOL as EOL (Style)
|
||||
import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
|
||||
import PDF.Output (ObjectId(..))
|
||||
import qualified PDF.Output as Output (render)
|
||||
import PDF.Update (unify)
|
||||
import System.Environment (getArgs, getProgName)
|
||||
import System.Exit (die)
|
||||
import Text.Printf (printf)
|
||||
|
||||
display :: EOL.Style -> Object -> ByteString
|
||||
display eolStyle d@(Direct _) = Output.render eolStyle d
|
||||
display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
|
||||
case Map.lookup (Name "Filter") header of
|
||||
Just (NameObject (Name "FlateDecode")) -> Stream {
|
||||
header
|
||||
, streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
|
||||
}
|
||||
_ -> s
|
||||
|
||||
extractObject :: ObjectId -> Document -> Either String ByteString
|
||||
extractObject objectId (Document {eolStyle, updates}) =
|
||||
case objects content !? objectId of
|
||||
Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
|
||||
Just o -> Right $ display eolStyle o
|
||||
where
|
||||
content = unify updates
|
||||
|
||||
listObjectIds :: Document -> Either String [String]
|
||||
listObjectIds =
|
||||
Right . prependTitle . toString . Map.keys . objects . unify . updates
|
||||
where
|
||||
toString = fmap (show . getObjectId)
|
||||
prependTitle = ("ObjectIds defined in this PDF:":)
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
(inputFile, getData) <- parse =<< getArgs
|
||||
input <- BS.readFile inputFile
|
||||
either die id $ (parseDocument input >>= getData)
|
||||
where
|
||||
parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
|
||||
parse [inputFile, objectId] = return
|
||||
(inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
|
||||
parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
|
16
src/PDF.hs
16
src/PDF.hs
|
@ -19,21 +19,21 @@ import PDF.Object (
|
|||
, Structure(..)
|
||||
, eofMarker, magicNumber, structure
|
||||
)
|
||||
import qualified PDF.Output as Output (render, line)
|
||||
import PDF.Output (Output(..))
|
||||
import qualified PDF.Output as Output (render, string)
|
||||
import PDF.Output (Output(..), nextLine)
|
||||
import PDF.Parser (Parser, runParser, string, takeAll)
|
||||
import Text.Printf (printf)
|
||||
|
||||
data Document = Document {
|
||||
pdfVersion :: String
|
||||
, eolStyle :: EOL.Style
|
||||
, updates :: [Content]
|
||||
, contents :: [Content]
|
||||
} deriving Show
|
||||
|
||||
instance Output Document where
|
||||
output (Document {pdfVersion, updates}) =
|
||||
Output.line (printf "%%PDF-%s" pdfVersion)
|
||||
`mappend` output updates
|
||||
output (Document {pdfVersion, contents}) =
|
||||
Output.string (printf "%%PDF-%s" pdfVersion)
|
||||
`nextLine` output contents
|
||||
|
||||
render :: Document -> Lazy.ByteString
|
||||
render document@(Document {eolStyle}) = Output.render eolStyle document
|
||||
|
@ -99,5 +99,5 @@ parseDocument input = do
|
|||
(pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
|
||||
startXref <- readStartXref eolStyle input
|
||||
structuresRead <- readStructures startXref input
|
||||
let updates = populate input <$> structuresRead
|
||||
return $ Document {pdfVersion, eolStyle, updates}
|
||||
let contents = populate input <$> structuresRead
|
||||
return $ Document {pdfVersion, eolStyle, contents}
|
||||
|
|
|
@ -14,10 +14,9 @@ import qualified PDF.EOL as EOL (charset, parser)
|
|||
import PDF.Object (
|
||||
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
||||
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
||||
, Structure(..), XRefEntry(..), XRefSection
|
||||
, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||
, blank, dictionary, directObject, integer, line
|
||||
)
|
||||
import PDF.Output (ObjectId(..), Offset(..))
|
||||
import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
|
||||
|
||||
data UserState = UserState {
|
||||
|
@ -31,7 +30,7 @@ type SParser = Parser UserState
|
|||
modifyFlow :: (Flow -> Flow) -> SParser ()
|
||||
modifyFlow f = modify $ \state -> state {flow = f $ flow state}
|
||||
|
||||
addObject :: ObjectId -> Object -> SParser ()
|
||||
addObject :: Int -> Object -> SParser ()
|
||||
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
||||
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
||||
}
|
||||
|
@ -46,20 +45,29 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
|
|||
where
|
||||
afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
|
||||
|
||||
lookupOffset :: ObjectId -> SParser Offset
|
||||
lookupOffset objectId = do
|
||||
lookupOffset :: Int -> XRefSection -> Maybe Int
|
||||
lookupOffset _ [] = Nothing
|
||||
lookupOffset objectId (xrefSubSection:others) =
|
||||
let XRefSubSection {firstObjectId, entriesNumber, entries} = xrefSubSection in
|
||||
let index = objectId - firstObjectId in
|
||||
if index >= 0 && index < entriesNumber
|
||||
then
|
||||
case Map.lookup index entries of
|
||||
Just (InUse {offset}) -> Just offset
|
||||
_ -> Nothing
|
||||
else lookupOffset objectId others
|
||||
|
||||
getOffset :: Int -> SParser Int
|
||||
getOffset objectId = do
|
||||
table <- gets xreferences
|
||||
case Map.lookup objectId table >>= entryOffset of
|
||||
case lookupOffset objectId table of
|
||||
Nothing -> fail $
|
||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||
Just offset -> return offset
|
||||
where
|
||||
entryOffset (InUse {offset}) = Just offset
|
||||
entryOffset _ = Nothing
|
||||
|
||||
loadNumber :: ObjectId -> SParser Double
|
||||
loadNumber :: Int -> SParser Float
|
||||
loadNumber objectId = do
|
||||
offset <- getOffset <$> lookupOffset objectId
|
||||
offset <- getOffset objectId
|
||||
objectStart <- BS.drop offset <$> gets input
|
||||
indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
|
||||
objectValue <- (!objectId) . tmpObjects <$> gets flow
|
||||
|
@ -70,7 +78,7 @@ loadNumber objectId = do
|
|||
invalidValue :: Object -> String
|
||||
invalidValue v = "Invalid value " ++ show v
|
||||
|
||||
getSize :: Maybe DirectObject -> SParser Double
|
||||
getSize :: Maybe DirectObject -> SParser Float
|
||||
getSize Nothing = fail "Missing '/Length' key on stream"
|
||||
getSize (Just (NumberObject (Number size))) = return size
|
||||
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
||||
|
@ -96,7 +104,7 @@ object = streamObject <|> Direct <$> directObject
|
|||
|
||||
indirectObjCoordinates :: SParser IndirectObjCoordinates
|
||||
indirectObjCoordinates = do
|
||||
objectId <- ObjectId <$> integer
|
||||
objectId <- integer
|
||||
coordinates <- IndirectObjCoordinates objectId <$> integer
|
||||
objectValue <- line "obj" *> object <* blank <* line "endobj"
|
||||
addObject objectId objectValue
|
||||
|
@ -118,7 +126,7 @@ populate input structure =
|
|||
}
|
||||
where
|
||||
docStructure = inputStructure structure
|
||||
xreferences = xRef docStructure
|
||||
xreferences = xrefSection docStructure
|
||||
initialState = UserState {
|
||||
input, xreferences, flow = Flow {
|
||||
occurrencesStack = [], tmpObjects = Map.empty
|
||||
|
|
|
@ -5,7 +5,6 @@ module PDF.Object (
|
|||
Content(..)
|
||||
, DirectObject(..)
|
||||
, Flow(..)
|
||||
, IndexedObjects
|
||||
, IndirectObjCoordinates(..)
|
||||
, InputStructure(..)
|
||||
, Name(..)
|
||||
|
@ -15,6 +14,7 @@ module PDF.Object (
|
|||
, Structure(..)
|
||||
, XRefEntry(..)
|
||||
, XRefSection
|
||||
, XRefSubSection(..)
|
||||
, blank
|
||||
, dictionary
|
||||
, directObject
|
||||
|
@ -31,15 +31,12 @@ import qualified Data.ByteString.Char8 as BS (
|
|||
concat, cons, pack, singleton, unpack
|
||||
)
|
||||
import Data.Map (Map, (!), mapWithKey)
|
||||
import qualified Data.Map as Map (
|
||||
delete, empty, fromList, lookup, minViewWithKey, toList, union
|
||||
)
|
||||
import qualified Data.Map as Map (elems, fromList, toList)
|
||||
import qualified PDF.EOL as EOL (charset, parser)
|
||||
import qualified PDF.Output as Output (concat, line, string)
|
||||
import qualified PDF.Output as Output (string)
|
||||
import PDF.Output (
|
||||
OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
|
||||
, byteString, getObjectId, getOffset, getOffsets, join, newLine
|
||||
, saveOffset
|
||||
OBuilder, Offset(..), Output(..)
|
||||
, byteString, getOffsets, join, newLine, nextLine, saveOffset
|
||||
)
|
||||
import PDF.Parser (
|
||||
Parser, (<?>)
|
||||
|
@ -49,7 +46,7 @@ import PDF.Parser (
|
|||
import Text.Printf (printf)
|
||||
|
||||
line :: String -> Parser u ()
|
||||
line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
|
||||
line l = (string (BS.pack l) *> EOL.parser *> return ()) <?> printf "line «%s»" l
|
||||
|
||||
magicNumber :: ByteString
|
||||
magicNumber = "%PDF-"
|
||||
|
@ -76,8 +73,6 @@ integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
|
|||
-- OBJECTS
|
||||
-------------------------------------
|
||||
|
||||
type IndexedObjects = Map ObjectId Object
|
||||
|
||||
--
|
||||
-- Boolean
|
||||
--
|
||||
|
@ -88,7 +83,7 @@ boolean =
|
|||
--
|
||||
-- Number
|
||||
--
|
||||
newtype Number = Number Double deriving Show
|
||||
newtype Number = Number Float deriving Show
|
||||
|
||||
instance Output Number where
|
||||
output (Number f) = Output.string $
|
||||
|
@ -135,7 +130,7 @@ stringObject =
|
|||
newtype Name = Name String deriving (Eq, Ord, Show)
|
||||
|
||||
instance Output Name where
|
||||
output (Name n) = Output.string ('/':n)
|
||||
output (Name n) = "/" `mappend` Output.string n
|
||||
|
||||
name :: Parser u Name
|
||||
name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
|
||||
|
@ -158,7 +153,7 @@ instance Output Dictionary where
|
|||
where
|
||||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||||
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
||||
outputKeyVal (key, val) = Output.concat [output key, " ", output val]
|
||||
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
|
||||
|
||||
dictionary :: Parser u Dictionary
|
||||
dictionary =
|
||||
|
@ -177,13 +172,12 @@ nullObject = string "null" *> return () <?> "null object"
|
|||
-- Reference
|
||||
--
|
||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||
objectId :: ObjectId
|
||||
objectId :: Int
|
||||
, versionNumber :: Int
|
||||
} deriving Show
|
||||
|
||||
reference :: Parser u IndirectObjCoordinates
|
||||
reference = IndirectObjCoordinates
|
||||
<$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
|
||||
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' <?> "reference to an object"
|
||||
|
||||
--
|
||||
-- DirectObject
|
||||
|
@ -204,11 +198,11 @@ instance Output DirectObject where
|
|||
output (NumberObject n) = output n
|
||||
output (StringObject s) = output s
|
||||
output (NameObject n) = output n
|
||||
output (Array a) = Output.concat ["[", join " " a, "]"]
|
||||
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||||
output (Dictionary d) = output d
|
||||
output (Null) = "null"
|
||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
|
||||
Output.string (printf "%d %d R" objectId versionNumber)
|
||||
|
||||
directObject :: Parser u DirectObject
|
||||
directObject =
|
||||
|
@ -235,26 +229,25 @@ data Object =
|
|||
|
||||
instance Output Object where
|
||||
output (Direct d) = output d
|
||||
output (Stream {header, streamContent}) = Output.concat [
|
||||
output header, newLine
|
||||
, Output.line "stream"
|
||||
, byteString streamContent
|
||||
, "endstream"
|
||||
]
|
||||
output (Stream {header, streamContent}) =
|
||||
output header
|
||||
`nextLine` "stream"
|
||||
`nextLine` byteString streamContent
|
||||
`mappend` "endstream"
|
||||
|
||||
--
|
||||
-- Occurrence
|
||||
--
|
||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||
|
||||
outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
|
||||
outputOccurrence _ (Comment c) = Output.line c
|
||||
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||||
outputOccurrence _ (Comment c) =
|
||||
Output.string c `mappend` newLine
|
||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
saveOffset (Object objectId) >> Output.concat [
|
||||
Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
|
||||
, output (objects ! objectId), newLine
|
||||
, Output.line "endobj"
|
||||
]
|
||||
saveOffset (ObjectId objectId)
|
||||
>> Output.string (printf "%d %d obj" objectId versionNumber)
|
||||
`nextLine` output (objects ! objectId)
|
||||
`nextLine` "endobj" `mappend` newLine
|
||||
|
||||
-------------------------------------
|
||||
-- XREF TABLE
|
||||
|
@ -264,18 +257,18 @@ outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNum
|
|||
-- XRefEntry
|
||||
--
|
||||
data XRefEntry = InUse {
|
||||
offset :: Offset
|
||||
offset :: Int
|
||||
, generation :: Int
|
||||
} | Free {
|
||||
nextFree :: ObjectId
|
||||
nextFree :: Int
|
||||
, generation :: Int
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefEntry where
|
||||
output (InUse {offset, generation}) =
|
||||
Output.line (printf "%010d %05d n " (getOffset offset) generation)
|
||||
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
||||
output (Free {nextFree, generation}) =
|
||||
Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
|
||||
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
||||
|
||||
entry :: Parser u XRefEntry
|
||||
entry = do
|
||||
|
@ -283,56 +276,31 @@ entry = do
|
|||
(inUse big small <|> free big small <?> "XRef entry") <* blank
|
||||
where
|
||||
inUse :: Int -> Int -> Parser u XRefEntry
|
||||
inUse big generation =
|
||||
char 'n' *> return (InUse {offset = Offset big, generation})
|
||||
inUse offset generation = char 'n' *> return (InUse {offset, generation})
|
||||
free :: Int -> Int -> Parser u XRefEntry
|
||||
free big generation =
|
||||
char 'f' *> return (Free {nextFree = ObjectId big, generation})
|
||||
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
||||
|
||||
--
|
||||
-- XRefSubSection
|
||||
--
|
||||
data XRefSubSection = XRefSubSection {
|
||||
firstObjectId :: ObjectId
|
||||
, entries :: [XRefEntry]
|
||||
firstObjectId :: Int
|
||||
, entriesNumber :: Int
|
||||
, entries :: Map Int XRefEntry
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefSubSection where
|
||||
output (XRefSubSection {firstObjectId, entries}) =
|
||||
Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
|
||||
`mappend` output entries
|
||||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||||
`nextLine` output (Map.elems entries)
|
||||
|
||||
xRefSubSection :: Parser u XRefSubSection
|
||||
xRefSubSection = do
|
||||
(firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
||||
entries <- count entriesNumber entry
|
||||
return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
|
||||
xrefSubSection :: Parser u XRefSubSection
|
||||
xrefSubSection = do
|
||||
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
|
||||
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
||||
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
||||
|
||||
type XRefSection = Map ObjectId XRefEntry
|
||||
|
||||
instance Output XRefSection where
|
||||
output = output . sections
|
||||
where
|
||||
sections tmp =
|
||||
case Map.minViewWithKey tmp of
|
||||
Nothing -> []
|
||||
Just ((objectId@(ObjectId value), firstEntry), rest) ->
|
||||
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
|
||||
subSection : sections sndRest
|
||||
section firstObjectId stack nextValue tmp =
|
||||
let nextId = ObjectId nextValue in
|
||||
case Map.lookup nextId tmp of
|
||||
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
|
||||
Just nextEntry ->
|
||||
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
|
||||
|
||||
|
||||
xRefSection :: Parser u XRefSection
|
||||
xRefSection = foldr addSubsection Map.empty <$>
|
||||
(line "xref" *> xRefSubSection `sepBy` many EOL.parser)
|
||||
where
|
||||
addSubsection (XRefSubSection {firstObjectId, entries}) =
|
||||
Map.union . Map.fromList $ zip ([firstObjectId..]) entries
|
||||
type XRefSection = [XRefSubSection]
|
||||
|
||||
--
|
||||
-- Structure
|
||||
|
@ -343,28 +311,34 @@ data InputStructure = InputStructure {
|
|||
}
|
||||
|
||||
data Structure = Structure {
|
||||
xRef :: XRefSection
|
||||
xrefSection :: XRefSection
|
||||
, trailer :: Dictionary
|
||||
} deriving Show
|
||||
|
||||
structure :: Parser u Structure
|
||||
structure =
|
||||
Structure
|
||||
<$> xRefSection
|
||||
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
||||
<*> (string "trailer" *> blank *> dictionary <* EOL.parser)
|
||||
|
||||
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
|
||||
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
|
||||
updateXrefs :: XRefSection -> Map Offset Int -> (XRefSection, Int)
|
||||
updateXrefs xrefSection offsets = (
|
||||
updateSubSection <$> xrefSection
|
||||
, offsets ! StartXRef
|
||||
)
|
||||
where
|
||||
updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
|
||||
updateEntry _ e = e
|
||||
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
|
||||
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
|
||||
updateEntry firstObjectId index e@(InUse {}) =
|
||||
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
|
||||
updateEntry _ _ e = e
|
||||
|
||||
--
|
||||
-- Flow
|
||||
--
|
||||
data Flow = Flow {
|
||||
occurrencesStack :: [Occurrence]
|
||||
, tmpObjects :: IndexedObjects
|
||||
, tmpObjects :: Map Int Object
|
||||
} deriving Show
|
||||
|
||||
--
|
||||
|
@ -372,26 +346,25 @@ data Flow = Flow {
|
|||
--
|
||||
data Content = Content {
|
||||
occurrences :: [Occurrence]
|
||||
, objects :: IndexedObjects
|
||||
, objects :: Map Int Object
|
||||
, docStructure :: Structure
|
||||
} deriving Show
|
||||
|
||||
outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
|
||||
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
||||
outputBody (occurrences, objects) =
|
||||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||
|
||||
instance Output Content where
|
||||
output (Content {occurrences, objects, docStructure}) =
|
||||
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
|
||||
>>= \(body, (xref, startXRef)) -> Output.concat [
|
||||
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
|
||||
>>= \(body, (xref, startXRef)) ->
|
||||
body
|
||||
, Output.line "xref"
|
||||
, output xref
|
||||
, Output.line "trailer"
|
||||
, output trailer, newLine
|
||||
, Output.line "startxref"
|
||||
, Output.line (printf "%d" (getOffset startXRef))
|
||||
, byteString eofMarker
|
||||
]
|
||||
`mappend` "xref"
|
||||
`nextLine` output xref
|
||||
`mappend` "trailer"
|
||||
`nextLine` output trailer
|
||||
`nextLine` "startxref"
|
||||
`nextLine` (Output.string (printf "%d" startXRef))
|
||||
`nextLine` byteString eofMarker
|
||||
where
|
||||
Structure {xRef, trailer} = docStructure
|
||||
Structure {xrefSection, trailer} = docStructure
|
||||
|
|
|
@ -2,21 +2,17 @@
|
|||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
module PDF.Output (
|
||||
OBuilder
|
||||
, ObjectId(..)
|
||||
, OContext(..)
|
||||
, Offset(..)
|
||||
, Output(..)
|
||||
, Resource(..)
|
||||
, byteString
|
||||
, char
|
||||
, concat
|
||||
, getOffsets
|
||||
, join
|
||||
, line
|
||||
, newLine
|
||||
, nextLine
|
||||
, saveOffset
|
||||
, string
|
||||
, render
|
||||
|
@ -32,14 +28,10 @@ import qualified Data.Map as Map (singleton)
|
|||
import Data.String (IsString(..))
|
||||
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
||||
import qualified PDF.EOL as EOL (Style(..))
|
||||
import Prelude hiding (concat)
|
||||
|
||||
newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
|
||||
newtype Offset = Offset {getOffset :: Int} deriving (Show)
|
||||
data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
|
||||
|
||||
data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
|
||||
|
||||
newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
|
||||
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int) Int a)
|
||||
type OBuilder = OContext Builder
|
||||
|
||||
instance Functor OContext where
|
||||
|
@ -52,23 +44,20 @@ instance Applicative OContext where
|
|||
instance Monad OContext where
|
||||
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
||||
|
||||
saveOffset :: Resource -> OContext ()
|
||||
saveOffset resource = OContext $
|
||||
get >>= tell . Map.singleton resource
|
||||
saveOffset :: Offset -> OContext ()
|
||||
saveOffset offset = OContext $
|
||||
get >>= tell . Map.singleton offset
|
||||
|
||||
lift :: (a -> Builder) -> a -> OBuilder
|
||||
lift f a = return (f a)
|
||||
|
||||
getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
|
||||
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int)
|
||||
getOffsets (OContext builder) =
|
||||
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
||||
|
||||
append :: OBuilder -> OBuilder -> OBuilder
|
||||
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
||||
|
||||
concat :: [OBuilder] -> OBuilder
|
||||
concat = foldl mappend mempty
|
||||
|
||||
#if MIN_VERSION_base(4,11,0)
|
||||
instance Semigroup OBuilder where
|
||||
(<>) = append
|
||||
|
@ -94,7 +83,7 @@ instance Output Bool where
|
|||
output True = string "true"
|
||||
|
||||
instance Output a => Output [a] where
|
||||
output = concat . fmap output
|
||||
output = foldl mappend mempty . fmap output
|
||||
|
||||
join :: Output a => String -> [a] -> OBuilder
|
||||
join _ [] = mempty
|
||||
|
@ -102,30 +91,27 @@ join _ [a] = output a
|
|||
join separator (a:as) =
|
||||
output a `mappend` string separator `mappend` (join separator as)
|
||||
|
||||
offset :: (Int -> Int) -> OContext ()
|
||||
offset f = OContext $ modify $ \(Offset o) -> Offset (f o)
|
||||
|
||||
newLine :: OBuilder
|
||||
newLine = buildEOL =<< OContext ask
|
||||
newLine = OContext $ buildEOL =<< ask
|
||||
where
|
||||
buildEOL EOL.CR = return (char8 '\r') <* offset (+1)
|
||||
buildEOL EOL.LF = return (char8 '\n') <* offset (+1)
|
||||
buildEOL EOL.CRLF = return (string8 "\r\n") <* offset (+2)
|
||||
buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
|
||||
buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
|
||||
buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
|
||||
|
||||
nextLine :: OBuilder -> OBuilder -> OBuilder
|
||||
nextLine a b = a `mappend` newLine `mappend` b
|
||||
|
||||
char :: Char -> OBuilder
|
||||
char c = lift char8 c <* offset (+1)
|
||||
char c = lift char8 c <* OContext (modify (+1))
|
||||
|
||||
string :: String -> OBuilder
|
||||
string s = lift string8 s <* offset (+ toEnum (length s))
|
||||
|
||||
line :: String -> OBuilder
|
||||
line l = string l `mappend` newLine
|
||||
string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
|
||||
|
||||
byteString :: ByteString -> OBuilder
|
||||
byteString bs = lift B.byteString bs <* offset (+ BS.length bs)
|
||||
byteString bs = lift B.byteString bs <* OContext (modify (+ BS.length bs))
|
||||
|
||||
render :: Output a => EOL.Style -> a -> Lazy.ByteString
|
||||
render eolStyle a =
|
||||
let OContext builder = output a in
|
||||
let (outputByteString, _, _) = runRWS builder eolStyle (Offset 0) in
|
||||
let (outputByteString, _, _) = runRWS builder eolStyle 0 in
|
||||
toLazyByteString outputByteString
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
module PDF.Update (
|
||||
unify
|
||||
) where
|
||||
|
||||
import Data.Map (member)
|
||||
import qualified Data.Map as Map (empty, union)
|
||||
import PDF.Object (
|
||||
Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
|
||||
, Structure(..)
|
||||
)
|
||||
|
||||
emptyContent :: Content
|
||||
emptyContent = Content {
|
||||
docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
|
||||
, objects = Map.empty
|
||||
, occurrences = []
|
||||
}
|
||||
|
||||
unify :: [Content] -> Content
|
||||
unify = foldl complete emptyContent
|
||||
where
|
||||
complete tmpContent older =
|
||||
let mergedObjects = Map.union (objects tmpContent) (objects older) in
|
||||
Content {
|
||||
docStructure =
|
||||
unifyDocStructure (docStructure tmpContent) (docStructure older)
|
||||
, objects = mergedObjects
|
||||
, occurrences =
|
||||
unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
|
||||
}
|
||||
|
||||
unifyDocStructure :: Structure -> Structure -> Structure
|
||||
unifyDocStructure update original = Structure {
|
||||
xRef = Map.union (xRef update) (xRef original)
|
||||
, trailer = Map.union (trailer update) (trailer original)
|
||||
}
|
||||
|
||||
unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
|
||||
unifyOccurrences objects update = foldr addOlder update
|
||||
where
|
||||
addOlder occurrence@(Comment _) existing = occurrence : existing
|
||||
addOlder occurrence@(Indirect indirect) existing =
|
||||
if objectId indirect `member` objects
|
||||
then occurrence : existing
|
||||
else existing
|
||||
|
Loading…
Reference in a new issue