Fix output implementation with dynamic XRefs
This commit is contained in:
parent
e23618da68
commit
0336baa687
4 changed files with 345 additions and 197 deletions
25
src/PDF.hs
25
src/PDF.hs
|
@ -1,8 +1,8 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
module PDF (
|
||||
Document(..)
|
||||
, Content(..)
|
||||
, DirectObject(..)
|
||||
, parseDocument
|
||||
, render
|
||||
|
@ -17,7 +17,10 @@ import Data.Int (Int64)
|
|||
import qualified Data.Map as Map (lookup)
|
||||
import PDF.Body (populate)
|
||||
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
||||
import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber)
|
||||
import PDF.Object (
|
||||
Content(..), DirectObject(..), Name(..), Number(..), Structure(..)
|
||||
, eofMarker, magicNumber, structure
|
||||
)
|
||||
import qualified PDF.Output as Output (render, string)
|
||||
import PDF.Output (Output(..), nextLine)
|
||||
import Text.Parsec
|
||||
|
@ -86,23 +89,23 @@ findNextSection offset input =
|
|||
then newOffset + findNextLine newInput
|
||||
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
||||
|
||||
iterateContents :: Int64 -> ByteString -> Either ParseError [Content]
|
||||
iterateContents startXref input =
|
||||
parse content ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
|
||||
readStructures :: Int64 -> ByteString -> Either ParseError [Structure]
|
||||
readStructures startXref input =
|
||||
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
|
||||
where
|
||||
stopOrFollow c@(Content {trailer}) =
|
||||
case Map.lookup "Prev" trailer of
|
||||
stopOrFollow c@(Structure {inputTrailer}) =
|
||||
case Map.lookup (Name "Prev") inputTrailer of
|
||||
Nothing -> Right [c {startOffset = findNextLine input}]
|
||||
Just (Number newStartXref) ->
|
||||
Just (NumberObject (Number newStartXref)) ->
|
||||
let offset = truncate newStartXref in
|
||||
let startOffset = findNextSection offset (BS.drop offset input) in
|
||||
(c {startOffset}:) <$> (iterateContents offset input)
|
||||
(c {startOffset}:) <$> (readStructures offset input)
|
||||
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
||||
|
||||
parseDocument :: ByteString -> Either ParseError Document
|
||||
parseDocument input = do
|
||||
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
|
||||
startXref <- readStartXref eolStyle input
|
||||
structures <- iterateContents startXref input
|
||||
let contents = populate input <$> structures
|
||||
structuresRead <- readStructures startXref input
|
||||
let contents = populate input <$> structuresRead
|
||||
return $ Document {pdfVersion, eolStyle, contents}
|
||||
|
|
|
@ -7,33 +7,35 @@ import Data.ByteString.Lazy.Char8 (ByteString)
|
|||
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
||||
import Data.Int (Int64)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (insert, lookup)
|
||||
import qualified Data.Map as Map (empty, insert, lookup)
|
||||
import qualified PDF.EOL as EOL (charset, parser)
|
||||
import PDF.Object (
|
||||
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
||||
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..)
|
||||
, Number(..), Object(..), Occurrence(..), Parser, Structure(..)
|
||||
, XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||
, blank, dictionary, directObject, integer, line
|
||||
)
|
||||
import Text.Parsec
|
||||
|
||||
data UserState = UserState {
|
||||
input :: ByteString
|
||||
, content :: Content
|
||||
, structure :: Structure
|
||||
, flow :: Flow
|
||||
}
|
||||
|
||||
type SParser = Parser UserState
|
||||
|
||||
modifyContent :: (Content -> Content) -> SParser ()
|
||||
modifyContent f = modifyState $ \state -> state {content = f $ content state}
|
||||
modifyFlow :: (Flow -> Flow) -> SParser ()
|
||||
modifyFlow f = modifyState $ \state -> state {flow = f $ flow state}
|
||||
|
||||
addObject :: Int -> Object -> SParser ()
|
||||
addObject objectId newObject = modifyContent $ \content -> content {
|
||||
objects = Map.insert objectId newObject $ objects content
|
||||
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
||||
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
||||
}
|
||||
|
||||
pushOccurrence :: Occurrence -> SParser ()
|
||||
pushOccurrence newOccurrence = modifyContent $ \content -> content {
|
||||
body = newOccurrence : (body content)
|
||||
pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
|
||||
occurrencesStack = newOccurrence : (occurrencesStack flow)
|
||||
}
|
||||
|
||||
comment :: Parser u String
|
||||
|
@ -53,7 +55,7 @@ lookupOffset objectId (xrefSubSection:others) =
|
|||
|
||||
getOffset :: Int -> SParser Int64
|
||||
getOffset objectId = do
|
||||
Content {xrefSection} <- content <$> getState
|
||||
Structure {xrefSection} <- structure <$> getState
|
||||
case lookupOffset objectId xrefSection of
|
||||
Nothing -> fail $
|
||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||
|
@ -69,9 +71,9 @@ loadNumber objectId = do
|
|||
offset <- getOffset objectId
|
||||
objectStart <- BS.drop offset . input <$> getState
|
||||
indirectObjCoordinates `on` objectStart >> return ()
|
||||
objectValue <- (!objectId) . objects . content <$> getState
|
||||
objectValue <- (!objectId) . tmpObjects . flow <$> getState
|
||||
case objectValue of
|
||||
Direct (Number n) -> return n
|
||||
Direct (NumberObject (Number n)) -> return n
|
||||
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
||||
|
||||
invalidValue :: Object -> String
|
||||
|
@ -79,12 +81,12 @@ invalidValue v = "Invalid value " ++ show v
|
|||
|
||||
getSize :: Maybe DirectObject -> SParser Float
|
||||
getSize Nothing = fail "Missing '/Length' key on stream"
|
||||
getSize (Just (Number size)) = return size
|
||||
getSize (Just (NumberObject (Number size))) = return size
|
||||
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
||||
Content {objects} <- content <$> getState
|
||||
case Map.lookup objectId objects of
|
||||
Flow {tmpObjects} <- flow <$> getState
|
||||
case Map.lookup objectId tmpObjects of
|
||||
Nothing -> loadNumber objectId
|
||||
Just (Direct (Number size)) -> return size
|
||||
Just (Direct (NumberObject (Number size))) -> return size
|
||||
Just v -> fail $
|
||||
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
||||
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
||||
|
@ -92,7 +94,7 @@ getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
|||
streamObject :: SParser Object
|
||||
streamObject = try $ do
|
||||
header <- dictionary <* blank
|
||||
size <- getSize (Map.lookup "Length" header)
|
||||
size <- getSize (Map.lookup (Name "Length") header)
|
||||
streamContent <- BS.pack <$> stream (truncate size)
|
||||
return $ Stream {header, streamContent}
|
||||
where
|
||||
|
@ -112,16 +114,20 @@ indirectObjCoordinates = do
|
|||
occurrence :: SParser Occurrence
|
||||
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
||||
|
||||
populate :: ByteString -> Content -> Content
|
||||
populate input initialContent =
|
||||
let bodyInput = BS.drop (startOffset initialContent) input in
|
||||
populate :: ByteString -> Structure -> Content
|
||||
populate input structure =
|
||||
let bodyInput = BS.drop (startOffset structure) input in
|
||||
case runParser recurseOnOccurrences initialState "" bodyInput of
|
||||
Left _ -> initialContent
|
||||
Left _ -> emptyContent
|
||||
Right finalState ->
|
||||
let finalContent = content finalState in
|
||||
finalContent {body = reverse (body finalContent)}
|
||||
let Flow {occurrencesStack, tmpObjects} = flow finalState in
|
||||
Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer}
|
||||
where
|
||||
initialState = UserState {input, content = initialContent}
|
||||
trailer = inputTrailer structure
|
||||
emptyContent = Content {occurrences = [], objects = Map.empty, trailer}
|
||||
initialState = UserState {
|
||||
input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty}
|
||||
}
|
||||
|
||||
recurseOnOccurrences :: SParser UserState
|
||||
recurseOnOccurrences =
|
||||
|
|
|
@ -1,159 +1,46 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
module PDF.Object (
|
||||
Content(..)
|
||||
, DirectObject(..)
|
||||
, Flow(..)
|
||||
, IndirectObjCoordinates(..)
|
||||
, Name(..)
|
||||
, Number(..)
|
||||
, Object(..)
|
||||
, Occurrence(..)
|
||||
, Parser
|
||||
, Structure(..)
|
||||
, XRefEntry(..)
|
||||
, XRefSection
|
||||
, XRefSubSection(..)
|
||||
, blank
|
||||
, content
|
||||
, dictionary
|
||||
, directObject
|
||||
, eofMarker
|
||||
, integer
|
||||
, line
|
||||
, magicNumber
|
||||
, structure
|
||||
) where
|
||||
|
||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import Data.Int (Int64)
|
||||
import Data.Map (Map, (!))
|
||||
import qualified Data.Map as Map (empty, elems, fromList, toList)
|
||||
import qualified Data.Map as Map (elems, fromList, toList)
|
||||
import qualified PDF.EOL as EOL (charset, parser)
|
||||
import qualified PDF.Output as Output (string)
|
||||
import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine)
|
||||
import PDF.Output (
|
||||
OBuilder, Offset(..), Output(..)
|
||||
, byteString, getOffsets, join, newLine, nextLine, saveOffset
|
||||
)
|
||||
import Text.Parsec
|
||||
import Text.Printf (printf)
|
||||
|
||||
type Parser u = Parsec ByteString u
|
||||
|
||||
type Dictionary = Map String DirectObject
|
||||
|
||||
instance Output (Map String DirectObject) where
|
||||
output dict =
|
||||
"<<" `mappend` keyValues `mappend` ">>"
|
||||
where
|
||||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||||
outputKeyVal (key, val) =
|
||||
Output.string (printf "/%s " key) `mappend` output val
|
||||
|
||||
data StringObj = Literal String | Hexadecimal String deriving Show
|
||||
|
||||
instance Output StringObj where
|
||||
output (Literal s) = Output.string (printf "(%s)" s)
|
||||
output (Hexadecimal s) = Output.string (printf "<%s>" s)
|
||||
|
||||
data DirectObject =
|
||||
Boolean Bool
|
||||
| Number Float
|
||||
| String StringObj
|
||||
| Name String
|
||||
| Array [DirectObject]
|
||||
| Dictionary Dictionary
|
||||
| Null
|
||||
| Reference IndirectObjCoordinates
|
||||
deriving Show
|
||||
|
||||
outputFloat :: Float -> OBuilder
|
||||
outputFloat f = Output.string $
|
||||
case properFraction f of
|
||||
(n, 0) -> printf "%d" (n :: Int)
|
||||
_ -> printf "%f" f
|
||||
|
||||
instance Output DirectObject where
|
||||
output (Boolean b) = output b
|
||||
output (Number n) = outputFloat n
|
||||
output (String s) = output s
|
||||
output (Name n) = "/" `mappend` Output.string n
|
||||
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||||
output (Dictionary d) = output d
|
||||
output (Null) = "null"
|
||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
Output.string (printf "%d %d R" objectId versionNumber)
|
||||
|
||||
data Object =
|
||||
Direct DirectObject
|
||||
| Stream {
|
||||
header :: Dictionary
|
||||
, streamContent :: ByteString
|
||||
}
|
||||
deriving Show
|
||||
|
||||
instance Output Object where
|
||||
output (Direct d) = output d
|
||||
output (Stream {header, streamContent}) =
|
||||
output header
|
||||
`nextLine` "stream"
|
||||
`nextLine` byteString streamContent
|
||||
`mappend` "endstream"
|
||||
|
||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||
objectId :: Int
|
||||
, versionNumber :: Int
|
||||
} deriving Show
|
||||
|
||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||
|
||||
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||||
outputOccurrence _ (Comment c) =
|
||||
Output.string (printf "%%%s" c) `mappend` newLine
|
||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
Output.string (printf "%d %d obj" objectId versionNumber)
|
||||
`nextLine` output (objects ! objectId)
|
||||
`nextLine` "endobj" `mappend` newLine
|
||||
|
||||
data XRefEntry = InUse {
|
||||
offset :: Int64
|
||||
, generation :: Int
|
||||
} | Free {
|
||||
nextFree :: Int64
|
||||
, generation :: Int
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefEntry where
|
||||
output (InUse {offset, generation}) =
|
||||
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
||||
output (Free {nextFree, generation}) =
|
||||
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
||||
|
||||
data XRefSubSection = XRefSubSection {
|
||||
firstObjectId :: Int
|
||||
, entriesNumber :: Int
|
||||
, entries :: Map Int XRefEntry
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefSubSection where
|
||||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||||
`nextLine` output (Map.elems entries)
|
||||
|
||||
type XRefSection = [XRefSubSection]
|
||||
|
||||
data Content = Content {
|
||||
startOffset :: Int64
|
||||
, body :: [Occurrence]
|
||||
, objects :: Map Int Object
|
||||
, xrefSection :: XRefSection
|
||||
, trailer :: Dictionary
|
||||
, startXrefPosition :: Int64
|
||||
} deriving Show
|
||||
|
||||
instance Output Content where
|
||||
output (Content {body, objects, trailer, xrefSection, startXrefPosition}) =
|
||||
output (outputOccurrence objects <$> body)
|
||||
`mappend` output xrefSection
|
||||
`mappend` "trailer"
|
||||
`nextLine` output trailer
|
||||
`nextLine` "startxref"
|
||||
`nextLine` Output.string (printf "%d" startXrefPosition)
|
||||
`nextLine` byteString eofMarker
|
||||
|
||||
line :: String -> Parser u ()
|
||||
line l = string l *> EOL.parser *> return ()
|
||||
|
||||
|
@ -181,29 +68,46 @@ regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
|
|||
integer :: (Read a, Num a) => Parser u a
|
||||
integer = read <$> many1 digit <* whiteSpace
|
||||
|
||||
directObject :: Parser u DirectObject
|
||||
directObject =
|
||||
Boolean <$> try boolean
|
||||
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
|
||||
<|> Number <$> try number
|
||||
<|> String <$> try stringObj
|
||||
<|> Name <$> try name
|
||||
<|> Array <$> try array
|
||||
<|> Dictionary <$> try dictionary
|
||||
<|> const Null <$> try nullObject
|
||||
-------------------------------------
|
||||
-- OBJECTS
|
||||
-------------------------------------
|
||||
|
||||
--
|
||||
-- Boolean
|
||||
--
|
||||
boolean :: Parser u Bool
|
||||
boolean = (string "true" *> return True) <|> (string "false" *> return False)
|
||||
|
||||
number :: Parser u Float
|
||||
number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
|
||||
--
|
||||
-- Number
|
||||
--
|
||||
newtype Number = Number Float deriving Show
|
||||
|
||||
instance Output Number where
|
||||
output (Number f) = Output.string $
|
||||
case properFraction f of
|
||||
(n, 0) -> printf "%d" (n :: Int)
|
||||
_ -> printf "%f" f
|
||||
|
||||
number :: Parser u Number
|
||||
number = Number . read <$>
|
||||
(mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
|
||||
where
|
||||
sign = string "-" <|> option "" (char '+' >> return "")
|
||||
integerPart = mappend <$> many1 digit <*> option "" floatPart
|
||||
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
|
||||
|
||||
stringObj :: Parser u StringObj
|
||||
stringObj =
|
||||
--
|
||||
-- StringObject
|
||||
--
|
||||
data StringObject = Literal String | Hexadecimal String deriving Show
|
||||
|
||||
instance Output StringObject where
|
||||
output (Literal s) = Output.string (printf "(%s)" s)
|
||||
output (Hexadecimal s) = Output.string (printf "<%s>" s)
|
||||
|
||||
stringObject :: Parser u StringObject
|
||||
stringObject =
|
||||
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
||||
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
||||
where
|
||||
|
@ -213,12 +117,36 @@ stringObj =
|
|||
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
||||
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
|
||||
|
||||
name :: Parser u String
|
||||
name = char '/' *> many regular
|
||||
--
|
||||
-- Name
|
||||
--
|
||||
newtype Name = Name String deriving (Eq, Ord, Show)
|
||||
|
||||
instance Output Name where
|
||||
output (Name n) = "/" `mappend` Output.string n
|
||||
|
||||
name :: Parser u Name
|
||||
name = Name <$> (char '/' *> many regular)
|
||||
|
||||
--
|
||||
-- Array
|
||||
--
|
||||
array :: Parser u [DirectObject]
|
||||
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
|
||||
|
||||
--
|
||||
-- Dictionary
|
||||
--
|
||||
type Dictionary = Map Name DirectObject
|
||||
|
||||
instance Output Dictionary where
|
||||
output dict =
|
||||
"<<" `mappend` keyValues `mappend` ">>"
|
||||
where
|
||||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||||
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
||||
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
|
||||
|
||||
dictionary :: Parser u Dictionary
|
||||
dictionary =
|
||||
try (string "<<" *> blank *> keyValPairs <* string ">>")
|
||||
|
@ -226,12 +154,114 @@ dictionary =
|
|||
keyVal = (,) <$> name <* blank <*> directObject
|
||||
keyValPairs = Map.fromList <$> keyVal `endBy` blank
|
||||
|
||||
--
|
||||
-- Null
|
||||
--
|
||||
nullObject :: Parser u ()
|
||||
nullObject = string "null" *> return ()
|
||||
|
||||
--
|
||||
-- Reference
|
||||
--
|
||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||
objectId :: Int
|
||||
, versionNumber :: Int
|
||||
} deriving Show
|
||||
|
||||
reference :: Parser u IndirectObjCoordinates
|
||||
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
|
||||
|
||||
--
|
||||
-- DirectObject
|
||||
--
|
||||
data DirectObject =
|
||||
Boolean Bool
|
||||
| NumberObject Number
|
||||
| StringObject StringObject
|
||||
| NameObject Name
|
||||
| Array [DirectObject]
|
||||
| Dictionary Dictionary
|
||||
| Null
|
||||
| Reference IndirectObjCoordinates
|
||||
deriving Show
|
||||
|
||||
instance Output DirectObject where
|
||||
output (Boolean b) = output b
|
||||
output (NumberObject n) = output n
|
||||
output (StringObject s) = output s
|
||||
output (NameObject n) = output n
|
||||
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||||
output (Dictionary d) = output d
|
||||
output (Null) = "null"
|
||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
Output.string (printf "%d %d R" objectId versionNumber)
|
||||
|
||||
directObject :: Parser u DirectObject
|
||||
directObject =
|
||||
Boolean <$> try boolean
|
||||
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
|
||||
<|> NumberObject <$> try number
|
||||
<|> StringObject <$> try stringObject
|
||||
<|> NameObject <$> try name
|
||||
<|> Array <$> try array
|
||||
<|> Dictionary <$> try dictionary
|
||||
<|> const Null <$> try nullObject
|
||||
|
||||
--
|
||||
-- Object
|
||||
--
|
||||
data Object =
|
||||
Direct DirectObject
|
||||
| Stream {
|
||||
header :: Dictionary
|
||||
, streamContent :: ByteString
|
||||
}
|
||||
deriving Show
|
||||
|
||||
instance Output Object where
|
||||
output (Direct d) = output d
|
||||
output (Stream {header, streamContent}) =
|
||||
output header
|
||||
`nextLine` "stream"
|
||||
`nextLine` byteString streamContent
|
||||
`mappend` "endstream"
|
||||
|
||||
|
||||
--
|
||||
-- Occurrence
|
||||
--
|
||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||
|
||||
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||||
outputOccurrence _ (Comment c) =
|
||||
Output.string (printf "%%%s" c) `mappend` newLine
|
||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||
saveOffset (ObjectId objectId)
|
||||
>> Output.string (printf "%d %d obj" objectId versionNumber)
|
||||
`nextLine` output (objects ! objectId)
|
||||
`nextLine` "endobj" `mappend` newLine
|
||||
|
||||
-------------------------------------
|
||||
-- XREF TABLE
|
||||
-------------------------------------
|
||||
|
||||
--
|
||||
-- XRefEntry
|
||||
--
|
||||
data XRefEntry = InUse {
|
||||
offset :: Int64
|
||||
, generation :: Int
|
||||
} | Free {
|
||||
nextFree :: Int64
|
||||
, generation :: Int
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefEntry where
|
||||
output (InUse {offset, generation}) =
|
||||
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
||||
output (Free {nextFree, generation}) =
|
||||
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
||||
|
||||
entry :: Parser u XRefEntry
|
||||
entry = do
|
||||
(big, small) <- (,) <$> integer <*> integer
|
||||
|
@ -242,15 +272,83 @@ entry = do
|
|||
free :: Int64 -> Int -> Parser u XRefEntry
|
||||
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
||||
|
||||
|
||||
--
|
||||
-- XRefSubSection
|
||||
--
|
||||
data XRefSubSection = XRefSubSection {
|
||||
firstObjectId :: Int
|
||||
, entriesNumber :: Int
|
||||
, entries :: Map Int XRefEntry
|
||||
} deriving Show
|
||||
|
||||
instance Output XRefSubSection where
|
||||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||||
`nextLine` output (Map.elems entries)
|
||||
|
||||
xrefSubSection :: Parser u XRefSubSection
|
||||
xrefSubSection = do
|
||||
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
|
||||
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
||||
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
||||
|
||||
content :: Parser u Content
|
||||
content =
|
||||
Content 0 [] Map.empty
|
||||
type XRefSection = [XRefSubSection]
|
||||
|
||||
--
|
||||
-- Structure
|
||||
--
|
||||
data Structure = Structure {
|
||||
startOffset :: Int64
|
||||
, xrefSection :: XRefSection
|
||||
, inputTrailer :: Dictionary
|
||||
--, startXRef :: Int64
|
||||
} deriving Show
|
||||
|
||||
structure :: Parser u Structure
|
||||
structure =
|
||||
Structure 0
|
||||
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
||||
<*> (line "trailer" *> dictionary <* EOL.parser)
|
||||
<*> (line "startxref" *> integer)
|
||||
-- <*> (line "startxref" *> integer)
|
||||
|
||||
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
|
||||
xrefFromOffsets offsets = (
|
||||
[]
|
||||
, offsets ! StartXRef
|
||||
)
|
||||
|
||||
--
|
||||
-- Flow
|
||||
--
|
||||
data Flow = Flow {
|
||||
occurrencesStack :: [Occurrence]
|
||||
, tmpObjects :: Map Int Object
|
||||
} deriving Show
|
||||
|
||||
--
|
||||
-- Content
|
||||
--
|
||||
data Content = Content {
|
||||
occurrences :: [Occurrence]
|
||||
, objects :: Map Int Object
|
||||
, trailer :: Dictionary
|
||||
} deriving Show
|
||||
|
||||
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
||||
outputBody (occurrences, objects) =
|
||||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||
|
||||
instance Output Content where
|
||||
output (Content {occurrences, objects, trailer}) =
|
||||
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
|
||||
>>= \(body, (xref, startXRef)) ->
|
||||
body
|
||||
`mappend` "xref"
|
||||
`nextLine` "trailer"
|
||||
--`nextLine` output xrefSection
|
||||
--`mappend` "trailer"
|
||||
`nextLine` output trailer
|
||||
`nextLine` "startxref"
|
||||
`nextLine` (Output.string (printf "%d" startXRef))
|
||||
`nextLine` byteString eofMarker
|
||||
|
|
|
@ -1,42 +1,82 @@
|
|||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||
module PDF.Output (
|
||||
OBuilder(..)
|
||||
OBuilder
|
||||
, OContext(..)
|
||||
, Offset(..)
|
||||
, Output(..)
|
||||
, byteString
|
||||
, char
|
||||
, getOffsets
|
||||
, join
|
||||
, lift
|
||||
, newLine
|
||||
, nextLine
|
||||
, saveOffset
|
||||
, string
|
||||
, render
|
||||
) where
|
||||
|
||||
--import Data.ByteString.Builder (Builder, char8, lazyByteString, string8)
|
||||
import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString)
|
||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Lazy.Char8 as BS (length)
|
||||
import Data.Int (Int64)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map (singleton)
|
||||
-- #if MIN_VERSION_base(4,9,0)
|
||||
-- import qualified Data.Semigroup as Sem
|
||||
-- #endif
|
||||
import Data.String (IsString(..))
|
||||
import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
||||
import qualified PDF.EOL as EOL (Style(..))
|
||||
|
||||
newtype OBuilder = OBuilder (Reader EOL.Style Builder)
|
||||
data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
|
||||
|
||||
{-
|
||||
incrOffset :: (Int64 -> Int64) -> OutputState
|
||||
-}
|
||||
|
||||
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int64) Int64 a)
|
||||
type OBuilder = OContext Builder
|
||||
|
||||
instance Functor OContext where
|
||||
fmap f (OContext oc) = OContext $ fmap f oc
|
||||
|
||||
instance Applicative OContext where
|
||||
pure = OContext . pure
|
||||
(<*>) (OContext f) (OContext a) = OContext (f <*> a)
|
||||
|
||||
instance Monad OContext where
|
||||
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
||||
|
||||
saveOffset :: Offset -> OContext ()
|
||||
saveOffset offset = OContext $
|
||||
get >>= tell . Map.singleton offset
|
||||
|
||||
lift :: (a -> Builder) -> a -> OBuilder
|
||||
lift f a = OBuilder $ return (f a)
|
||||
lift f a = return (f a)
|
||||
|
||||
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int64)
|
||||
getOffsets (OContext builder) =
|
||||
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
||||
|
||||
append :: OBuilder -> OBuilder -> OBuilder
|
||||
append (OBuilder a) (OBuilder b) = OBuilder (mappend <$> a <*> b)
|
||||
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
||||
|
||||
#if MIN_VERSION_base(4,9,0)
|
||||
#if MIN_VERSION_base(4,11,0)
|
||||
instance Semigroup OBuilder where
|
||||
(<>) = append
|
||||
#endif
|
||||
|
||||
instance Monoid OBuilder where
|
||||
mempty = OBuilder (return mempty)
|
||||
#if !(MIN_VERSION_base(4,9,0))
|
||||
mempty = OContext (return mempty)
|
||||
#if !(MIN_VERSION_base(4,11,0))
|
||||
mappend = append
|
||||
#endif
|
||||
|
||||
|
@ -63,25 +103,26 @@ join separator (a:as) =
|
|||
output a `mappend` string separator `mappend` (join separator as)
|
||||
|
||||
newLine :: OBuilder
|
||||
newLine = OBuilder $ buildEOL <$> ask
|
||||
newLine = OContext $ buildEOL =<< ask
|
||||
where
|
||||
buildEOL EOL.CR = char8 '\r'
|
||||
buildEOL EOL.LF = char8 '\n'
|
||||
buildEOL EOL.CRLF = string8 "\r\n"
|
||||
buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
|
||||
buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
|
||||
buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
|
||||
|
||||
nextLine :: OBuilder -> OBuilder -> OBuilder
|
||||
nextLine a b = a `mappend` newLine `mappend` b
|
||||
|
||||
char :: Char -> OBuilder
|
||||
char = lift char8
|
||||
char c = lift char8 c <* OContext (modify (+1))
|
||||
|
||||
string :: String -> OBuilder
|
||||
string = lift string8
|
||||
string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
|
||||
|
||||
byteString :: ByteString -> OBuilder
|
||||
byteString = lift lazyByteString
|
||||
byteString bs = lift lazyByteString bs <* OContext (modify (+ BS.length bs))
|
||||
|
||||
render :: Output a => EOL.Style -> a -> ByteString
|
||||
render eolStyle a =
|
||||
let OBuilder r = output a in
|
||||
toLazyByteString $ runReader r eolStyle
|
||||
let OContext builder = output a in
|
||||
let (outputByteString, _, _) = runRWS builder eolStyle 0 in
|
||||
toLazyByteString outputByteString
|
||||
|
|
Loading…
Reference in a new issue