Fix output implementation with dynamic XRefs

This commit is contained in:
Tissevert 2019-05-17 16:14:06 +02:00
parent e23618da68
commit 0336baa687
4 changed files with 345 additions and 197 deletions

View file

@ -1,8 +1,8 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF ( module PDF (
Document(..) Document(..)
, Content(..)
, DirectObject(..) , DirectObject(..)
, parseDocument , parseDocument
, render , render
@ -17,7 +17,10 @@ import Data.Int (Int64)
import qualified Data.Map as Map (lookup) import qualified Data.Map as Map (lookup)
import PDF.Body (populate) import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser) import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber) import PDF.Object (
Content(..), DirectObject(..), Name(..), Number(..), Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, string) import qualified PDF.Output as Output (render, string)
import PDF.Output (Output(..), nextLine) import PDF.Output (Output(..), nextLine)
import Text.Parsec import Text.Parsec
@ -86,23 +89,23 @@ findNextSection offset input =
then newOffset + findNextLine newInput then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput) else findNextSection (newOffset + 1) (BS.drop 1 newInput)
iterateContents :: Int64 -> ByteString -> Either ParseError [Content] readStructures :: Int64 -> ByteString -> Either ParseError [Structure]
iterateContents startXref input = readStructures startXref input =
parse content ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
where where
stopOrFollow c@(Content {trailer}) = stopOrFollow c@(Structure {inputTrailer}) =
case Map.lookup "Prev" trailer of case Map.lookup (Name "Prev") inputTrailer of
Nothing -> Right [c {startOffset = findNextLine input}] Nothing -> Right [c {startOffset = findNextLine input}]
Just (Number newStartXref) -> Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in let offset = truncate newStartXref in
let startOffset = findNextSection offset (BS.drop offset input) in let startOffset = findNextSection offset (BS.drop offset input) in
(c {startOffset}:) <$> (iterateContents offset input) (c {startOffset}:) <$> (readStructures offset input)
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either ParseError Document parseDocument :: ByteString -> Either ParseError Document
parseDocument input = do parseDocument input = do
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input (pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
startXref <- readStartXref eolStyle input startXref <- readStartXref eolStyle input
structures <- iterateContents startXref input structuresRead <- readStructures startXref input
let contents = populate input <$> structures let contents = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, contents} return $ Document {pdfVersion, eolStyle, contents}

View file

@ -7,33 +7,35 @@ import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack) import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup) import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object ( import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..) Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..) , Number(..), Object(..), Occurrence(..), Parser, Structure(..)
, XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line , blank, dictionary, directObject, integer, line
) )
import Text.Parsec import Text.Parsec
data UserState = UserState { data UserState = UserState {
input :: ByteString input :: ByteString
, content :: Content , structure :: Structure
, flow :: Flow
} }
type SParser = Parser UserState type SParser = Parser UserState
modifyContent :: (Content -> Content) -> SParser () modifyFlow :: (Flow -> Flow) -> SParser ()
modifyContent f = modifyState $ \state -> state {content = f $ content state} modifyFlow f = modifyState $ \state -> state {flow = f $ flow state}
addObject :: Int -> Object -> SParser () addObject :: Int -> Object -> SParser ()
addObject objectId newObject = modifyContent $ \content -> content { addObject objectId newObject = modifyFlow $ \flow -> flow {
objects = Map.insert objectId newObject $ objects content tmpObjects = Map.insert objectId newObject $ tmpObjects flow
} }
pushOccurrence :: Occurrence -> SParser () pushOccurrence :: Occurrence -> SParser ()
pushOccurrence newOccurrence = modifyContent $ \content -> content { pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
body = newOccurrence : (body content) occurrencesStack = newOccurrence : (occurrencesStack flow)
} }
comment :: Parser u String comment :: Parser u String
@ -53,7 +55,7 @@ lookupOffset objectId (xrefSubSection:others) =
getOffset :: Int -> SParser Int64 getOffset :: Int -> SParser Int64
getOffset objectId = do getOffset objectId = do
Content {xrefSection} <- content <$> getState Structure {xrefSection} <- structure <$> getState
case lookupOffset objectId xrefSection of case lookupOffset objectId xrefSection of
Nothing -> fail $ Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table" "obj " ++ show objectId ++ " is referenced but missing in XRef table"
@ -69,9 +71,9 @@ loadNumber objectId = do
offset <- getOffset objectId offset <- getOffset objectId
objectStart <- BS.drop offset . input <$> getState objectStart <- BS.drop offset . input <$> getState
indirectObjCoordinates `on` objectStart >> return () indirectObjCoordinates `on` objectStart >> return ()
objectValue <- (!objectId) . objects . content <$> getState objectValue <- (!objectId) . tmpObjects . flow <$> getState
case objectValue of case objectValue of
Direct (Number n) -> return n Direct (NumberObject (Number n)) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
invalidValue :: Object -> String invalidValue :: Object -> String
@ -79,12 +81,12 @@ invalidValue v = "Invalid value " ++ show v
getSize :: Maybe DirectObject -> SParser Float getSize :: Maybe DirectObject -> SParser Float
getSize Nothing = fail "Missing '/Length' key on stream" getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (Number size)) = return size getSize (Just (NumberObject (Number size))) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
Content {objects} <- content <$> getState Flow {tmpObjects} <- flow <$> getState
case Map.lookup objectId objects of case Map.lookup objectId tmpObjects of
Nothing -> loadNumber objectId Nothing -> loadNumber objectId
Just (Direct (Number size)) -> return size Just (Direct (NumberObject (Number size))) -> return size
Just v -> fail $ Just v -> fail $
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length" invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length" getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
@ -92,7 +94,7 @@ getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
streamObject :: SParser Object streamObject :: SParser Object
streamObject = try $ do streamObject = try $ do
header <- dictionary <* blank header <- dictionary <* blank
size <- getSize (Map.lookup "Length" header) size <- getSize (Map.lookup (Name "Length") header)
streamContent <- BS.pack <$> stream (truncate size) streamContent <- BS.pack <$> stream (truncate size)
return $ Stream {header, streamContent} return $ Stream {header, streamContent}
where where
@ -112,16 +114,20 @@ indirectObjCoordinates = do
occurrence :: SParser Occurrence occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> Content -> Content populate :: ByteString -> Structure -> Content
populate input initialContent = populate input structure =
let bodyInput = BS.drop (startOffset initialContent) input in let bodyInput = BS.drop (startOffset structure) input in
case runParser recurseOnOccurrences initialState "" bodyInput of case runParser recurseOnOccurrences initialState "" bodyInput of
Left _ -> initialContent Left _ -> emptyContent
Right finalState -> Right finalState ->
let finalContent = content finalState in let Flow {occurrencesStack, tmpObjects} = flow finalState in
finalContent {body = reverse (body finalContent)} Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer}
where where
initialState = UserState {input, content = initialContent} trailer = inputTrailer structure
emptyContent = Content {occurrences = [], objects = Map.empty, trailer}
initialState = UserState {
input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty}
}
recurseOnOccurrences :: SParser UserState recurseOnOccurrences :: SParser UserState
recurseOnOccurrences = recurseOnOccurrences =

View file

@ -1,159 +1,46 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Object ( module PDF.Object (
Content(..) Content(..)
, DirectObject(..) , DirectObject(..)
, Flow(..)
, IndirectObjCoordinates(..) , IndirectObjCoordinates(..)
, Name(..)
, Number(..)
, Object(..) , Object(..)
, Occurrence(..) , Occurrence(..)
, Parser , Parser
, Structure(..)
, XRefEntry(..) , XRefEntry(..)
, XRefSection , XRefSection
, XRefSubSection(..) , XRefSubSection(..)
, blank , blank
, content
, dictionary , dictionary
, directObject , directObject
, eofMarker , eofMarker
, integer , integer
, line , line
, magicNumber , magicNumber
, structure
) where ) where
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map (Map, (!)) import Data.Map (Map, (!))
import qualified Data.Map as Map (empty, elems, fromList, toList) import qualified Data.Map as Map (elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string) import qualified PDF.Output as Output (string)
import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine) import PDF.Output (
OBuilder, Offset(..), Output(..)
, byteString, getOffsets, join, newLine, nextLine, saveOffset
)
import Text.Parsec import Text.Parsec
import Text.Printf (printf) import Text.Printf (printf)
type Parser u = Parsec ByteString u type Parser u = Parsec ByteString u
type Dictionary = Map String DirectObject
instance Output (Map String DirectObject) where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal (key, val) =
Output.string (printf "/%s " key) `mappend` output val
data StringObj = Literal String | Hexadecimal String deriving Show
instance Output StringObj where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
data DirectObject =
Boolean Bool
| Number Float
| String StringObj
| Name String
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
outputFloat :: Float -> OBuilder
outputFloat f = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
instance Output DirectObject where
output (Boolean b) = output b
output (Number n) = outputFloat n
output (String s) = output s
output (Name n) = "/" `mappend` Output.string n
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream"
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
} | Free {
nextFree :: Int64
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries)
type XRefSection = [XRefSubSection]
data Content = Content {
startOffset :: Int64
, body :: [Occurrence]
, objects :: Map Int Object
, xrefSection :: XRefSection
, trailer :: Dictionary
, startXrefPosition :: Int64
} deriving Show
instance Output Content where
output (Content {body, objects, trailer, xrefSection, startXrefPosition}) =
output (outputOccurrence objects <$> body)
`mappend` output xrefSection
`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` Output.string (printf "%d" startXrefPosition)
`nextLine` byteString eofMarker
line :: String -> Parser u () line :: String -> Parser u ()
line l = string l *> EOL.parser *> return () line l = string l *> EOL.parser *> return ()
@ -181,29 +68,46 @@ regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
integer :: (Read a, Num a) => Parser u a integer :: (Read a, Num a) => Parser u a
integer = read <$> many1 digit <* whiteSpace integer = read <$> many1 digit <* whiteSpace
directObject :: Parser u DirectObject -------------------------------------
directObject = -- OBJECTS
Boolean <$> try boolean -------------------------------------
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
<|> Number <$> try number
<|> String <$> try stringObj
<|> Name <$> try name
<|> Array <$> try array
<|> Dictionary <$> try dictionary
<|> const Null <$> try nullObject
--
-- Boolean
--
boolean :: Parser u Bool boolean :: Parser u Bool
boolean = (string "true" *> return True) <|> (string "false" *> return False) boolean = (string "true" *> return True) <|> (string "false" *> return False)
number :: Parser u Float --
number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart)) -- Number
--
newtype Number = Number Float deriving Show
instance Output Number where
output (Number f) = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
number :: Parser u Number
number = Number . read <$>
(mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
where where
sign = string "-" <|> option "" (char '+' >> return "") sign = string "-" <|> option "" (char '+' >> return "")
integerPart = mappend <$> many1 digit <*> option "" floatPart integerPart = mappend <$> many1 digit <*> option "" floatPart
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit) floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
stringObj :: Parser u StringObj --
stringObj = -- StringObject
--
data StringObject = Literal String | Hexadecimal String deriving Show
instance Output StringObject where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
stringObject :: Parser u StringObject
stringObject =
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')') Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>') <|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
where where
@ -213,12 +117,36 @@ stringObj =
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode) escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
octalCode = choice $ (\n -> count n octDigit) <$> [1..3] octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
name :: Parser u String --
name = char '/' *> many regular -- Name
--
newtype Name = Name String deriving (Eq, Ord, Show)
instance Output Name where
output (Name n) = "/" `mappend` Output.string n
name :: Parser u Name
name = Name <$> (char '/' *> many regular)
--
-- Array
--
array :: Parser u [DirectObject] array :: Parser u [DirectObject]
array = char '[' *> blank *> directObject `endBy` blank <* char ']' array = char '[' *> blank *> directObject `endBy` blank <* char ']'
--
-- Dictionary
--
type Dictionary = Map Name DirectObject
instance Output Dictionary where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal :: (Name, DirectObject) -> OBuilder
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
dictionary :: Parser u Dictionary dictionary :: Parser u Dictionary
dictionary = dictionary =
try (string "<<" *> blank *> keyValPairs <* string ">>") try (string "<<" *> blank *> keyValPairs <* string ">>")
@ -226,12 +154,114 @@ dictionary =
keyVal = (,) <$> name <* blank <*> directObject keyVal = (,) <$> name <* blank <*> directObject
keyValPairs = Map.fromList <$> keyVal `endBy` blank keyValPairs = Map.fromList <$> keyVal `endBy` blank
--
-- Null
--
nullObject :: Parser u () nullObject :: Parser u ()
nullObject = string "null" *> return () nullObject = string "null" *> return ()
--
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
, versionNumber :: Int
} deriving Show
reference :: Parser u IndirectObjCoordinates reference :: Parser u IndirectObjCoordinates
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R' reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
--
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
| Null
| Reference IndirectObjCoordinates
deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (NumberObject n) = output n
output (StringObject s) = output s
output (NameObject n) = output n
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
directObject :: Parser u DirectObject
directObject =
Boolean <$> try boolean
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
<|> NumberObject <$> try number
<|> StringObject <$> try stringObject
<|> NameObject <$> try name
<|> Array <$> try array
<|> Dictionary <$> try dictionary
<|> const Null <$> try nullObject
--
-- Object
--
data Object =
Direct DirectObject
| Stream {
header :: Dictionary
, streamContent :: ByteString
}
deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream"
--
-- Occurrence
--
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId objectId)
>> Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
-------------------------------------
-- XREF TABLE
-------------------------------------
--
-- XRefEntry
--
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
} | Free {
nextFree :: Int64
, generation :: Int
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
entry :: Parser u XRefEntry entry :: Parser u XRefEntry
entry = do entry = do
(big, small) <- (,) <$> integer <*> integer (big, small) <- (,) <$> integer <*> integer
@ -242,15 +272,83 @@ entry = do
free :: Int64 -> Int -> Parser u XRefEntry free :: Int64 -> Int -> Parser u XRefEntry
free nextFree generation = char 'f' *> return (Free {nextFree, generation}) free nextFree generation = char 'f' *> return (Free {nextFree, generation})
--
-- XRefSubSection
--
data XRefSubSection = XRefSubSection {
firstObjectId :: Int
, entriesNumber :: Int
, entries :: Map Int XRefEntry
} deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries)
xrefSubSection :: Parser u XRefSubSection xrefSubSection :: Parser u XRefSubSection
xrefSubSection = do xrefSubSection = do
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer (firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
return $ XRefSubSection {firstObjectId, entriesNumber, entries} return $ XRefSubSection {firstObjectId, entriesNumber, entries}
content :: Parser u Content type XRefSection = [XRefSubSection]
content =
Content 0 [] Map.empty --
-- Structure
--
data Structure = Structure {
startOffset :: Int64
, xrefSection :: XRefSection
, inputTrailer :: Dictionary
--, startXRef :: Int64
} deriving Show
structure :: Parser u Structure
structure =
Structure 0
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* EOL.parser) <*> (line "trailer" *> dictionary <* EOL.parser)
<*> (line "startxref" *> integer) -- <*> (line "startxref" *> integer)
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
xrefFromOffsets offsets = (
[]
, offsets ! StartXRef
)
--
-- Flow
--
data Flow = Flow {
occurrencesStack :: [Occurrence]
, tmpObjects :: Map Int Object
} deriving Show
--
-- Content
--
data Content = Content {
occurrences :: [Occurrence]
, objects :: Map Int Object
, trailer :: Dictionary
} deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
output (Content {occurrences, objects, trailer}) =
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) ->
body
`mappend` "xref"
`nextLine` "trailer"
--`nextLine` output xrefSection
--`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker

View file

@ -1,42 +1,82 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
module PDF.Output ( module PDF.Output (
OBuilder(..) OBuilder
, OContext(..)
, Offset(..)
, Output(..) , Output(..)
, byteString , byteString
, char , char
, getOffsets
, join , join
, lift , lift
, newLine , newLine
, nextLine , nextLine
, saveOffset
, string , string
, render , render
) where ) where
--import Data.ByteString.Builder (Builder, char8, lazyByteString, string8)
import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString) import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString)
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (length)
import Data.Int (Int64)
import Data.Map (Map)
import qualified Data.Map as Map (singleton)
-- #if MIN_VERSION_base(4,9,0) -- #if MIN_VERSION_base(4,9,0)
-- import qualified Data.Semigroup as Sem -- import qualified Data.Semigroup as Sem
-- #endif -- #endif
import Data.String (IsString(..)) import Data.String (IsString(..))
import Control.Monad.Reader (MonadReader(..), Reader, runReader) --import Control.Monad.Reader (MonadReader(..), Reader, runReader)
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
import qualified PDF.EOL as EOL (Style(..)) import qualified PDF.EOL as EOL (Style(..))
newtype OBuilder = OBuilder (Reader EOL.Style Builder) data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
{-
incrOffset :: (Int64 -> Int64) -> OutputState
-}
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int64) Int64 a)
type OBuilder = OContext Builder
instance Functor OContext where
fmap f (OContext oc) = OContext $ fmap f oc
instance Applicative OContext where
pure = OContext . pure
(<*>) (OContext f) (OContext a) = OContext (f <*> a)
instance Monad OContext where
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
saveOffset :: Offset -> OContext ()
saveOffset offset = OContext $
get >>= tell . Map.singleton offset
lift :: (a -> Builder) -> a -> OBuilder lift :: (a -> Builder) -> a -> OBuilder
lift f a = OBuilder $ return (f a) lift f a = return (f a)
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int64)
getOffsets (OContext builder) =
OContext (listen builder >>= \(a, w) -> return (return a, w))
append :: OBuilder -> OBuilder -> OBuilder append :: OBuilder -> OBuilder -> OBuilder
append (OBuilder a) (OBuilder b) = OBuilder (mappend <$> a <*> b) append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
#if MIN_VERSION_base(4,9,0) #if MIN_VERSION_base(4,11,0)
instance Semigroup OBuilder where instance Semigroup OBuilder where
(<>) = append (<>) = append
#endif #endif
instance Monoid OBuilder where instance Monoid OBuilder where
mempty = OBuilder (return mempty) mempty = OContext (return mempty)
#if !(MIN_VERSION_base(4,9,0)) #if !(MIN_VERSION_base(4,11,0))
mappend = append mappend = append
#endif #endif
@ -63,25 +103,26 @@ join separator (a:as) =
output a `mappend` string separator `mappend` (join separator as) output a `mappend` string separator `mappend` (join separator as)
newLine :: OBuilder newLine :: OBuilder
newLine = OBuilder $ buildEOL <$> ask newLine = OContext $ buildEOL =<< ask
where where
buildEOL EOL.CR = char8 '\r' buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
buildEOL EOL.LF = char8 '\n' buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
buildEOL EOL.CRLF = string8 "\r\n" buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
nextLine :: OBuilder -> OBuilder -> OBuilder nextLine :: OBuilder -> OBuilder -> OBuilder
nextLine a b = a `mappend` newLine `mappend` b nextLine a b = a `mappend` newLine `mappend` b
char :: Char -> OBuilder char :: Char -> OBuilder
char = lift char8 char c = lift char8 c <* OContext (modify (+1))
string :: String -> OBuilder string :: String -> OBuilder
string = lift string8 string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
byteString :: ByteString -> OBuilder byteString :: ByteString -> OBuilder
byteString = lift lazyByteString byteString bs = lift lazyByteString bs <* OContext (modify (+ BS.length bs))
render :: Output a => EOL.Style -> a -> ByteString render :: Output a => EOL.Style -> a -> ByteString
render eolStyle a = render eolStyle a =
let OBuilder r = output a in let OContext builder = output a in
toLazyByteString $ runReader r eolStyle let (outputByteString, _, _) = runRWS builder eolStyle 0 in
toLazyByteString outputByteString