Generate valid PDF

This commit is contained in:
Tissevert 2019-05-18 09:01:13 +02:00
parent 0336baa687
commit 5614a25048
4 changed files with 49 additions and 38 deletions

View file

@ -1,4 +1,3 @@
{-# LANGUAGE NamedFieldPuns #-}
module Data.ByteString.Lazy.Char8.Util ( module Data.ByteString.Lazy.Char8.Util (
previous previous
, subBS , subBS

View file

@ -1,6 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF ( module PDF (
Document(..) Document(..)
, DirectObject(..) , DirectObject(..)
@ -18,7 +16,8 @@ import qualified Data.Map as Map (lookup)
import PDF.Body (populate) import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser) import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object ( import PDF.Object (
Content(..), DirectObject(..), Name(..), Number(..), Structure(..) Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
, Structure(..)
, eofMarker, magicNumber, structure , eofMarker, magicNumber, structure
) )
import qualified PDF.Output as Output (render, string) import qualified PDF.Output as Output (render, string)
@ -89,17 +88,17 @@ findNextSection offset input =
then newOffset + findNextLine newInput then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput) else findNextSection (newOffset + 1) (BS.drop 1 newInput)
readStructures :: Int64 -> ByteString -> Either ParseError [Structure] readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure]
readStructures startXref input = readStructures startXref input =
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
where where
stopOrFollow c@(Structure {inputTrailer}) = stopOrFollow s@(Structure {trailer}) =
case Map.lookup (Name "Prev") inputTrailer of case Map.lookup (Name "Prev") trailer of
Nothing -> Right [c {startOffset = findNextLine input}] Nothing -> Right [InputStructure (findNextLine input) s]
Just (NumberObject (Number newStartXref)) -> Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in let offset = truncate newStartXref in
let startOffset = findNextSection offset (BS.drop offset input) in let startOffset = findNextSection offset (BS.drop offset input) in
(c {startOffset}:) <$> (readStructures offset input) (InputStructure startOffset s:) <$> (readStructures offset input)
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either ParseError Document parseDocument :: ByteString -> Either ParseError Document

View file

@ -10,16 +10,16 @@ import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, lookup) import qualified Data.Map as Map (empty, insert, lookup)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object ( import PDF.Object (
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..) Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
, Number(..), Object(..), Occurrence(..), Parser, Structure(..) , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
, XRefEntry(..), XRefSection, XRefSubSection(..) , Parser, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, dictionary, directObject, integer, line , blank, dictionary, directObject, integer, line
) )
import Text.Parsec import Text.Parsec
data UserState = UserState { data UserState = UserState {
input :: ByteString input :: ByteString
, structure :: Structure , xreferences :: XRefSection
, flow :: Flow , flow :: Flow
} }
@ -55,8 +55,8 @@ lookupOffset objectId (xrefSubSection:others) =
getOffset :: Int -> SParser Int64 getOffset :: Int -> SParser Int64
getOffset objectId = do getOffset objectId = do
Structure {xrefSection} <- structure <$> getState table <- xreferences <$> getState
case lookupOffset objectId xrefSection of case lookupOffset objectId table of
Nothing -> fail $ Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table" "obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset Just offset -> return offset
@ -114,19 +114,23 @@ indirectObjCoordinates = do
occurrence :: SParser Occurrence occurrence :: SParser Occurrence
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
populate :: ByteString -> Structure -> Content populate :: ByteString -> InputStructure -> Content
populate input structure = populate input structure =
let bodyInput = BS.drop (startOffset structure) input in let bodyInput = BS.drop (startOffset structure) input in
case runParser recurseOnOccurrences initialState "" bodyInput of case runParser recurseOnOccurrences initialState "" bodyInput of
Left _ -> emptyContent Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
Right finalState -> Right finalState ->
let Flow {occurrencesStack, tmpObjects} = flow finalState in let Flow {occurrencesStack, tmpObjects} = flow finalState in
Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer} Content {
occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
}
where where
trailer = inputTrailer structure docStructure = inputStructure structure
emptyContent = Content {occurrences = [], objects = Map.empty, trailer} xreferences = xrefSection docStructure
initialState = UserState { initialState = UserState {
input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty} input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty
}
} }
recurseOnOccurrences :: SParser UserState recurseOnOccurrences :: SParser UserState

View file

@ -1,12 +1,12 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Object ( module PDF.Object (
Content(..) Content(..)
, DirectObject(..) , DirectObject(..)
, Flow(..) , Flow(..)
, IndirectObjCoordinates(..) , IndirectObjCoordinates(..)
, InputStructure(..)
, Name(..) , Name(..)
, Number(..) , Number(..)
, Object(..) , Object(..)
@ -28,7 +28,7 @@ module PDF.Object (
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map (Map, (!)) import Data.Map (Map, (!), mapWithKey)
import qualified Data.Map as Map (elems, fromList, toList) import qualified Data.Map as Map (elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string) import qualified PDF.Output as Output (string)
@ -298,25 +298,33 @@ type XRefSection = [XRefSubSection]
-- --
-- Structure -- Structure
-- --
data Structure = Structure { data InputStructure = InputStructure {
startOffset :: Int64 startOffset :: Int64
, xrefSection :: XRefSection , inputStructure :: Structure
, inputTrailer :: Dictionary }
--, startXRef :: Int64
data Structure = Structure {
xrefSection :: XRefSection
, trailer :: Dictionary
} deriving Show } deriving Show
structure :: Parser u Structure structure :: Parser u Structure
structure = structure =
Structure 0 Structure
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* EOL.parser) <*> (line "trailer" *> dictionary <* EOL.parser)
-- <*> (line "startxref" *> integer)
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64) updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64)
xrefFromOffsets offsets = ( updateXrefs xrefSection offsets = (
[] updateSubSection <$> xrefSection
, offsets ! StartXRef , offsets ! StartXRef
) )
where
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
updateEntry firstObjectId index e@(InUse {}) =
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
updateEntry _ _ e = e
-- --
-- Flow -- Flow
@ -332,7 +340,7 @@ data Flow = Flow {
data Content = Content { data Content = Content {
occurrences :: [Occurrence] occurrences :: [Occurrence]
, objects :: Map Int Object , objects :: Map Int Object
, trailer :: Dictionary , docStructure :: Structure
} deriving Show } deriving Show
outputBody :: ([Occurrence], Map Int Object) -> OBuilder outputBody :: ([Occurrence], Map Int Object) -> OBuilder
@ -340,15 +348,16 @@ outputBody (occurrences, objects) =
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
instance Output Content where instance Output Content where
output (Content {occurrences, objects, trailer}) = output (Content {occurrences, objects, docStructure}) =
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects)) fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) -> >>= \(body, (xref, startXRef)) ->
body body
`mappend` "xref" `mappend` "xref"
`nextLine` "trailer" `nextLine` output xref
--`nextLine` output xrefSection `mappend` "trailer"
--`mappend` "trailer"
`nextLine` output trailer `nextLine` output trailer
`nextLine` "startxref" `nextLine` "startxref"
`nextLine` (Output.string (printf "%d" startXRef)) `nextLine` (Output.string (printf "%d" startXRef))
`nextLine` byteString eofMarker `nextLine` byteString eofMarker
where
Structure {xrefSection, trailer} = docStructure