Generate valid PDF

This commit is contained in:
Tissevert 2019-05-18 09:01:13 +02:00
parent 0336baa687
commit 5614a25048
4 changed files with 49 additions and 38 deletions

View file

@ -1,4 +1,3 @@
{-# LANGUAGE NamedFieldPuns #-}
module Data.ByteString.Lazy.Char8.Util (
previous
, subBS

View file

@ -1,6 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF (
Document(..)
, DirectObject(..)
@ -18,7 +16,8 @@ import qualified Data.Map as Map (lookup)
import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object (
Content(..), DirectObject(..), Name(..), Number(..), Structure(..)
Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
, Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, string)
@ -89,17 +88,17 @@ findNextSection offset input =
then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
readStructures :: Int64 -> ByteString -> Either ParseError [Structure]
readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure]
readStructures startXref input =
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
where
stopOrFollow c@(Structure {inputTrailer}) =
case Map.lookup (Name "Prev") inputTrailer of
Nothing -> Right [c {startOffset = findNextLine input}]
stopOrFollow s@(Structure {trailer}) =
case Map.lookup (Name "Prev") trailer of
Nothing -> Right [InputStructure (findNextLine input) s]
Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in
let startOffset = findNextSection offset (BS.drop offset input) in
(c {startOffset}:) <$> (readStructures offset input)
(InputStructure startOffset s:) <$> (readStructures offset input)
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either ParseError Document

View file

@ -10,16 +10,16 @@ import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..)
, Number(..), Object(..), Occurrence(..), Parser, Structure(..)
, XRefEntry(..), XRefSection, XRefSubSection(..)
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
, Parser, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line
)
import Text.Parsec
data UserState = UserState {
input :: ByteString
, structure :: Structure
, xreferences :: XRefSection
, flow :: Flow
}
@ -55,8 +55,8 @@ lookupOffset objectId (xrefSubSection:others) =
getOffset :: Int -> SParser Int64
getOffset objectId = do
Structure {xrefSection} <- structure <$> getState
case lookupOffset objectId xrefSection of
table <- xreferences <$> getState
case lookupOffset objectId table of
Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset
@ -114,19 +114,23 @@ indirectObjCoordinates = do
occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> Structure -> Content
populate :: ByteString -> InputStructure -> Content
populate input structure =
let bodyInput = BS.drop (startOffset structure) input in
case runParser recurseOnOccurrences initialState "" bodyInput of
Left _ -> emptyContent
Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
Right finalState ->
let Flow {occurrencesStack, tmpObjects} = flow finalState in
Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer}
Content {
occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
}
where
trailer = inputTrailer structure
emptyContent = Content {occurrences = [], objects = Map.empty, trailer}
docStructure = inputStructure structure
xreferences = xrefSection docStructure
initialState = UserState {
input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty}
input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty
}
}
recurseOnOccurrences :: SParser UserState

View file

@ -1,12 +1,12 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Object (
Content(..)
, DirectObject(..)
, Flow(..)
, IndirectObjCoordinates(..)
, InputStructure(..)
, Name(..)
, Number(..)
, Object(..)
@ -28,7 +28,7 @@ module PDF.Object (
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64)
import Data.Map (Map, (!))
import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map (elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
@ -298,25 +298,33 @@ type XRefSection = [XRefSubSection]
--
-- Structure
--
data Structure = Structure {
data InputStructure = InputStructure {
startOffset :: Int64
, xrefSection :: XRefSection
, inputTrailer :: Dictionary
--, startXRef :: Int64
, inputStructure :: Structure
}
data Structure = Structure {
xrefSection :: XRefSection
, trailer :: Dictionary
} deriving Show
structure :: Parser u Structure
structure =
Structure 0
Structure
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* EOL.parser)
-- <*> (line "startxref" *> integer)
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
xrefFromOffsets offsets = (
[]
updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64)
updateXrefs xrefSection offsets = (
updateSubSection <$> xrefSection
, offsets ! StartXRef
)
where
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
updateEntry firstObjectId index e@(InUse {}) =
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
updateEntry _ _ e = e
--
-- Flow
@ -332,7 +340,7 @@ data Flow = Flow {
data Content = Content {
occurrences :: [Occurrence]
, objects :: Map Int Object
, trailer :: Dictionary
, docStructure :: Structure
} deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
@ -340,15 +348,16 @@ outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where
output (Content {occurrences, objects, trailer}) =
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
output (Content {occurrences, objects, docStructure}) =
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) ->
body
`mappend` "xref"
`nextLine` "trailer"
--`nextLine` output xrefSection
--`mappend` "trailer"
`nextLine` output xref
`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker
where
Structure {xrefSection, trailer} = docStructure