Generate valid PDF
This commit is contained in:
parent
0336baa687
commit
5614a25048
4 changed files with 49 additions and 38 deletions
|
@ -1,4 +1,3 @@
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
|
||||||
module Data.ByteString.Lazy.Char8.Util (
|
module Data.ByteString.Lazy.Char8.Util (
|
||||||
previous
|
previous
|
||||||
, subBS
|
, subBS
|
||||||
|
|
15
src/PDF.hs
15
src/PDF.hs
|
@ -1,6 +1,4 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
||||||
module PDF (
|
module PDF (
|
||||||
Document(..)
|
Document(..)
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
|
@ -18,7 +16,8 @@ import qualified Data.Map as Map (lookup)
|
||||||
import PDF.Body (populate)
|
import PDF.Body (populate)
|
||||||
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), Name(..), Number(..), Structure(..)
|
Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
|
||||||
|
, Structure(..)
|
||||||
, eofMarker, magicNumber, structure
|
, eofMarker, magicNumber, structure
|
||||||
)
|
)
|
||||||
import qualified PDF.Output as Output (render, string)
|
import qualified PDF.Output as Output (render, string)
|
||||||
|
@ -89,17 +88,17 @@ findNextSection offset input =
|
||||||
then newOffset + findNextLine newInput
|
then newOffset + findNextLine newInput
|
||||||
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
||||||
|
|
||||||
readStructures :: Int64 -> ByteString -> Either ParseError [Structure]
|
readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure]
|
||||||
readStructures startXref input =
|
readStructures startXref input =
|
||||||
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
|
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
|
||||||
where
|
where
|
||||||
stopOrFollow c@(Structure {inputTrailer}) =
|
stopOrFollow s@(Structure {trailer}) =
|
||||||
case Map.lookup (Name "Prev") inputTrailer of
|
case Map.lookup (Name "Prev") trailer of
|
||||||
Nothing -> Right [c {startOffset = findNextLine input}]
|
Nothing -> Right [InputStructure (findNextLine input) s]
|
||||||
Just (NumberObject (Number newStartXref)) ->
|
Just (NumberObject (Number newStartXref)) ->
|
||||||
let offset = truncate newStartXref in
|
let offset = truncate newStartXref in
|
||||||
let startOffset = findNextSection offset (BS.drop offset input) in
|
let startOffset = findNextSection offset (BS.drop offset input) in
|
||||||
(c {startOffset}:) <$> (readStructures offset input)
|
(InputStructure startOffset s:) <$> (readStructures offset input)
|
||||||
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
||||||
|
|
||||||
parseDocument :: ByteString -> Either ParseError Document
|
parseDocument :: ByteString -> Either ParseError Document
|
||||||
|
|
|
@ -10,16 +10,16 @@ import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (empty, insert, lookup)
|
import qualified Data.Map as Map (empty, insert, lookup)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..)
|
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
|
||||||
, Number(..), Object(..), Occurrence(..), Parser, Structure(..)
|
, InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
|
||||||
, XRefEntry(..), XRefSection, XRefSubSection(..)
|
, Parser, Structure(..), XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||||
, blank, dictionary, directObject, integer, line
|
, blank, dictionary, directObject, integer, line
|
||||||
)
|
)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
|
|
||||||
data UserState = UserState {
|
data UserState = UserState {
|
||||||
input :: ByteString
|
input :: ByteString
|
||||||
, structure :: Structure
|
, xreferences :: XRefSection
|
||||||
, flow :: Flow
|
, flow :: Flow
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,8 +55,8 @@ lookupOffset objectId (xrefSubSection:others) =
|
||||||
|
|
||||||
getOffset :: Int -> SParser Int64
|
getOffset :: Int -> SParser Int64
|
||||||
getOffset objectId = do
|
getOffset objectId = do
|
||||||
Structure {xrefSection} <- structure <$> getState
|
table <- xreferences <$> getState
|
||||||
case lookupOffset objectId xrefSection of
|
case lookupOffset objectId table of
|
||||||
Nothing -> fail $
|
Nothing -> fail $
|
||||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||||
Just offset -> return offset
|
Just offset -> return offset
|
||||||
|
@ -114,19 +114,23 @@ indirectObjCoordinates = do
|
||||||
occurrence :: SParser Occurrence
|
occurrence :: SParser Occurrence
|
||||||
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
||||||
|
|
||||||
populate :: ByteString -> Structure -> Content
|
populate :: ByteString -> InputStructure -> Content
|
||||||
populate input structure =
|
populate input structure =
|
||||||
let bodyInput = BS.drop (startOffset structure) input in
|
let bodyInput = BS.drop (startOffset structure) input in
|
||||||
case runParser recurseOnOccurrences initialState "" bodyInput of
|
case runParser recurseOnOccurrences initialState "" bodyInput of
|
||||||
Left _ -> emptyContent
|
Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
|
||||||
Right finalState ->
|
Right finalState ->
|
||||||
let Flow {occurrencesStack, tmpObjects} = flow finalState in
|
let Flow {occurrencesStack, tmpObjects} = flow finalState in
|
||||||
Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer}
|
Content {
|
||||||
|
occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
|
||||||
|
}
|
||||||
where
|
where
|
||||||
trailer = inputTrailer structure
|
docStructure = inputStructure structure
|
||||||
emptyContent = Content {occurrences = [], objects = Map.empty, trailer}
|
xreferences = xrefSection docStructure
|
||||||
initialState = UserState {
|
initialState = UserState {
|
||||||
input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty}
|
input, xreferences, flow = Flow {
|
||||||
|
occurrencesStack = [], tmpObjects = Map.empty
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
recurseOnOccurrences :: SParser UserState
|
recurseOnOccurrences :: SParser UserState
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
||||||
module PDF.Object (
|
module PDF.Object (
|
||||||
Content(..)
|
Content(..)
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
, Flow(..)
|
, Flow(..)
|
||||||
, IndirectObjCoordinates(..)
|
, IndirectObjCoordinates(..)
|
||||||
|
, InputStructure(..)
|
||||||
, Name(..)
|
, Name(..)
|
||||||
, Number(..)
|
, Number(..)
|
||||||
, Object(..)
|
, Object(..)
|
||||||
|
@ -28,7 +28,7 @@ module PDF.Object (
|
||||||
|
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
import Data.Map (Map, (!))
|
import Data.Map (Map, (!), mapWithKey)
|
||||||
import qualified Data.Map as Map (elems, fromList, toList)
|
import qualified Data.Map as Map (elems, fromList, toList)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import qualified PDF.Output as Output (string)
|
import qualified PDF.Output as Output (string)
|
||||||
|
@ -298,25 +298,33 @@ type XRefSection = [XRefSubSection]
|
||||||
--
|
--
|
||||||
-- Structure
|
-- Structure
|
||||||
--
|
--
|
||||||
data Structure = Structure {
|
data InputStructure = InputStructure {
|
||||||
startOffset :: Int64
|
startOffset :: Int64
|
||||||
, xrefSection :: XRefSection
|
, inputStructure :: Structure
|
||||||
, inputTrailer :: Dictionary
|
}
|
||||||
--, startXRef :: Int64
|
|
||||||
|
data Structure = Structure {
|
||||||
|
xrefSection :: XRefSection
|
||||||
|
, trailer :: Dictionary
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
structure :: Parser u Structure
|
structure :: Parser u Structure
|
||||||
structure =
|
structure =
|
||||||
Structure 0
|
Structure
|
||||||
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
||||||
<*> (line "trailer" *> dictionary <* EOL.parser)
|
<*> (line "trailer" *> dictionary <* EOL.parser)
|
||||||
-- <*> (line "startxref" *> integer)
|
|
||||||
|
|
||||||
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
|
updateXrefs :: XRefSection -> Map Offset Int64 -> (XRefSection, Int64)
|
||||||
xrefFromOffsets offsets = (
|
updateXrefs xrefSection offsets = (
|
||||||
[]
|
updateSubSection <$> xrefSection
|
||||||
, offsets ! StartXRef
|
, offsets ! StartXRef
|
||||||
)
|
)
|
||||||
|
where
|
||||||
|
updateSubSection subSection@(XRefSubSection {firstObjectId, entries}) =
|
||||||
|
subSection {entries = mapWithKey (updateEntry firstObjectId) entries}
|
||||||
|
updateEntry firstObjectId index e@(InUse {}) =
|
||||||
|
e {offset = offsets ! (ObjectId $ firstObjectId + index)}
|
||||||
|
updateEntry _ _ e = e
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Flow
|
-- Flow
|
||||||
|
@ -332,7 +340,7 @@ data Flow = Flow {
|
||||||
data Content = Content {
|
data Content = Content {
|
||||||
occurrences :: [Occurrence]
|
occurrences :: [Occurrence]
|
||||||
, objects :: Map Int Object
|
, objects :: Map Int Object
|
||||||
, trailer :: Dictionary
|
, docStructure :: Structure
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
||||||
|
@ -340,15 +348,16 @@ outputBody (occurrences, objects) =
|
||||||
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||||
|
|
||||||
instance Output Content where
|
instance Output Content where
|
||||||
output (Content {occurrences, objects, trailer}) =
|
output (Content {occurrences, objects, docStructure}) =
|
||||||
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
|
fmap (updateXrefs xrefSection) <$> getOffsets (outputBody (occurrences, objects))
|
||||||
>>= \(body, (xref, startXRef)) ->
|
>>= \(body, (xref, startXRef)) ->
|
||||||
body
|
body
|
||||||
`mappend` "xref"
|
`mappend` "xref"
|
||||||
`nextLine` "trailer"
|
`nextLine` output xref
|
||||||
--`nextLine` output xrefSection
|
`mappend` "trailer"
|
||||||
--`mappend` "trailer"
|
|
||||||
`nextLine` output trailer
|
`nextLine` output trailer
|
||||||
`nextLine` "startxref"
|
`nextLine` "startxref"
|
||||||
`nextLine` (Output.string (printf "%d" startXRef))
|
`nextLine` (Output.string (printf "%d" startXRef))
|
||||||
`nextLine` byteString eofMarker
|
`nextLine` byteString eofMarker
|
||||||
|
where
|
||||||
|
Structure {xrefSection, trailer} = docStructure
|
||||||
|
|
Loading…
Reference in a new issue