Fix output implementation with dynamic XRefs
This commit is contained in:
parent
e23618da68
commit
0336baa687
4 changed files with 345 additions and 197 deletions
25
src/PDF.hs
25
src/PDF.hs
|
@ -1,8 +1,8 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
module PDF (
|
module PDF (
|
||||||
Document(..)
|
Document(..)
|
||||||
, Content(..)
|
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
, parseDocument
|
, parseDocument
|
||||||
, render
|
, render
|
||||||
|
@ -17,7 +17,10 @@ import Data.Int (Int64)
|
||||||
import qualified Data.Map as Map (lookup)
|
import qualified Data.Map as Map (lookup)
|
||||||
import PDF.Body (populate)
|
import PDF.Body (populate)
|
||||||
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
||||||
import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber)
|
import PDF.Object (
|
||||||
|
Content(..), DirectObject(..), Name(..), Number(..), Structure(..)
|
||||||
|
, eofMarker, magicNumber, structure
|
||||||
|
)
|
||||||
import qualified PDF.Output as Output (render, string)
|
import qualified PDF.Output as Output (render, string)
|
||||||
import PDF.Output (Output(..), nextLine)
|
import PDF.Output (Output(..), nextLine)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
|
@ -86,23 +89,23 @@ findNextSection offset input =
|
||||||
then newOffset + findNextLine newInput
|
then newOffset + findNextLine newInput
|
||||||
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
||||||
|
|
||||||
iterateContents :: Int64 -> ByteString -> Either ParseError [Content]
|
readStructures :: Int64 -> ByteString -> Either ParseError [Structure]
|
||||||
iterateContents startXref input =
|
readStructures startXref input =
|
||||||
parse content ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
|
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
|
||||||
where
|
where
|
||||||
stopOrFollow c@(Content {trailer}) =
|
stopOrFollow c@(Structure {inputTrailer}) =
|
||||||
case Map.lookup "Prev" trailer of
|
case Map.lookup (Name "Prev") inputTrailer of
|
||||||
Nothing -> Right [c {startOffset = findNextLine input}]
|
Nothing -> Right [c {startOffset = findNextLine input}]
|
||||||
Just (Number newStartXref) ->
|
Just (NumberObject (Number newStartXref)) ->
|
||||||
let offset = truncate newStartXref in
|
let offset = truncate newStartXref in
|
||||||
let startOffset = findNextSection offset (BS.drop offset input) in
|
let startOffset = findNextSection offset (BS.drop offset input) in
|
||||||
(c {startOffset}:) <$> (iterateContents offset input)
|
(c {startOffset}:) <$> (readStructures offset input)
|
||||||
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
||||||
|
|
||||||
parseDocument :: ByteString -> Either ParseError Document
|
parseDocument :: ByteString -> Either ParseError Document
|
||||||
parseDocument input = do
|
parseDocument input = do
|
||||||
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
|
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
|
||||||
startXref <- readStartXref eolStyle input
|
startXref <- readStartXref eolStyle input
|
||||||
structures <- iterateContents startXref input
|
structuresRead <- readStructures startXref input
|
||||||
let contents = populate input <$> structures
|
let contents = populate input <$> structuresRead
|
||||||
return $ Document {pdfVersion, eolStyle, contents}
|
return $ Document {pdfVersion, eolStyle, contents}
|
||||||
|
|
|
@ -7,33 +7,35 @@ import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
import Data.Map ((!))
|
import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (insert, lookup)
|
import qualified Data.Map as Map (empty, insert, lookup)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..), Name(..)
|
||||||
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
, Number(..), Object(..), Occurrence(..), Parser, Structure(..)
|
||||||
|
, XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||||
, blank, dictionary, directObject, integer, line
|
, blank, dictionary, directObject, integer, line
|
||||||
)
|
)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
|
|
||||||
data UserState = UserState {
|
data UserState = UserState {
|
||||||
input :: ByteString
|
input :: ByteString
|
||||||
, content :: Content
|
, structure :: Structure
|
||||||
|
, flow :: Flow
|
||||||
}
|
}
|
||||||
|
|
||||||
type SParser = Parser UserState
|
type SParser = Parser UserState
|
||||||
|
|
||||||
modifyContent :: (Content -> Content) -> SParser ()
|
modifyFlow :: (Flow -> Flow) -> SParser ()
|
||||||
modifyContent f = modifyState $ \state -> state {content = f $ content state}
|
modifyFlow f = modifyState $ \state -> state {flow = f $ flow state}
|
||||||
|
|
||||||
addObject :: Int -> Object -> SParser ()
|
addObject :: Int -> Object -> SParser ()
|
||||||
addObject objectId newObject = modifyContent $ \content -> content {
|
addObject objectId newObject = modifyFlow $ \flow -> flow {
|
||||||
objects = Map.insert objectId newObject $ objects content
|
tmpObjects = Map.insert objectId newObject $ tmpObjects flow
|
||||||
}
|
}
|
||||||
|
|
||||||
pushOccurrence :: Occurrence -> SParser ()
|
pushOccurrence :: Occurrence -> SParser ()
|
||||||
pushOccurrence newOccurrence = modifyContent $ \content -> content {
|
pushOccurrence newOccurrence = modifyFlow $ \flow -> flow {
|
||||||
body = newOccurrence : (body content)
|
occurrencesStack = newOccurrence : (occurrencesStack flow)
|
||||||
}
|
}
|
||||||
|
|
||||||
comment :: Parser u String
|
comment :: Parser u String
|
||||||
|
@ -53,7 +55,7 @@ lookupOffset objectId (xrefSubSection:others) =
|
||||||
|
|
||||||
getOffset :: Int -> SParser Int64
|
getOffset :: Int -> SParser Int64
|
||||||
getOffset objectId = do
|
getOffset objectId = do
|
||||||
Content {xrefSection} <- content <$> getState
|
Structure {xrefSection} <- structure <$> getState
|
||||||
case lookupOffset objectId xrefSection of
|
case lookupOffset objectId xrefSection of
|
||||||
Nothing -> fail $
|
Nothing -> fail $
|
||||||
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
"obj " ++ show objectId ++ " is referenced but missing in XRef table"
|
||||||
|
@ -69,9 +71,9 @@ loadNumber objectId = do
|
||||||
offset <- getOffset objectId
|
offset <- getOffset objectId
|
||||||
objectStart <- BS.drop offset . input <$> getState
|
objectStart <- BS.drop offset . input <$> getState
|
||||||
indirectObjCoordinates `on` objectStart >> return ()
|
indirectObjCoordinates `on` objectStart >> return ()
|
||||||
objectValue <- (!objectId) . objects . content <$> getState
|
objectValue <- (!objectId) . tmpObjects . flow <$> getState
|
||||||
case objectValue of
|
case objectValue of
|
||||||
Direct (Number n) -> return n
|
Direct (NumberObject (Number n)) -> return n
|
||||||
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
|
||||||
|
|
||||||
invalidValue :: Object -> String
|
invalidValue :: Object -> String
|
||||||
|
@ -79,12 +81,12 @@ invalidValue v = "Invalid value " ++ show v
|
||||||
|
|
||||||
getSize :: Maybe DirectObject -> SParser Float
|
getSize :: Maybe DirectObject -> SParser Float
|
||||||
getSize Nothing = fail "Missing '/Length' key on stream"
|
getSize Nothing = fail "Missing '/Length' key on stream"
|
||||||
getSize (Just (Number size)) = return size
|
getSize (Just (NumberObject (Number size))) = return size
|
||||||
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
|
||||||
Content {objects} <- content <$> getState
|
Flow {tmpObjects} <- flow <$> getState
|
||||||
case Map.lookup objectId objects of
|
case Map.lookup objectId tmpObjects of
|
||||||
Nothing -> loadNumber objectId
|
Nothing -> loadNumber objectId
|
||||||
Just (Direct (Number size)) -> return size
|
Just (Direct (NumberObject (Number size))) -> return size
|
||||||
Just v -> fail $
|
Just v -> fail $
|
||||||
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
invalidValue v ++ " for obj " ++ show objectId ++ "used as /Length"
|
||||||
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
||||||
|
@ -92,7 +94,7 @@ getSize (Just v) = fail $ invalidValue (Direct v) ++ " for /Length"
|
||||||
streamObject :: SParser Object
|
streamObject :: SParser Object
|
||||||
streamObject = try $ do
|
streamObject = try $ do
|
||||||
header <- dictionary <* blank
|
header <- dictionary <* blank
|
||||||
size <- getSize (Map.lookup "Length" header)
|
size <- getSize (Map.lookup (Name "Length") header)
|
||||||
streamContent <- BS.pack <$> stream (truncate size)
|
streamContent <- BS.pack <$> stream (truncate size)
|
||||||
return $ Stream {header, streamContent}
|
return $ Stream {header, streamContent}
|
||||||
where
|
where
|
||||||
|
@ -112,16 +114,20 @@ indirectObjCoordinates = do
|
||||||
occurrence :: SParser Occurrence
|
occurrence :: SParser Occurrence
|
||||||
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
occurrence = Comment <$> comment <|> Indirect <$> indirectObjCoordinates
|
||||||
|
|
||||||
populate :: ByteString -> Content -> Content
|
populate :: ByteString -> Structure -> Content
|
||||||
populate input initialContent =
|
populate input structure =
|
||||||
let bodyInput = BS.drop (startOffset initialContent) input in
|
let bodyInput = BS.drop (startOffset structure) input in
|
||||||
case runParser recurseOnOccurrences initialState "" bodyInput of
|
case runParser recurseOnOccurrences initialState "" bodyInput of
|
||||||
Left _ -> initialContent
|
Left _ -> emptyContent
|
||||||
Right finalState ->
|
Right finalState ->
|
||||||
let finalContent = content finalState in
|
let Flow {occurrencesStack, tmpObjects} = flow finalState in
|
||||||
finalContent {body = reverse (body finalContent)}
|
Content {occurrences = reverse occurrencesStack, objects = tmpObjects, trailer}
|
||||||
where
|
where
|
||||||
initialState = UserState {input, content = initialContent}
|
trailer = inputTrailer structure
|
||||||
|
emptyContent = Content {occurrences = [], objects = Map.empty, trailer}
|
||||||
|
initialState = UserState {
|
||||||
|
input, structure, flow = Flow {occurrencesStack = [], tmpObjects = Map.empty}
|
||||||
|
}
|
||||||
|
|
||||||
recurseOnOccurrences :: SParser UserState
|
recurseOnOccurrences :: SParser UserState
|
||||||
recurseOnOccurrences =
|
recurseOnOccurrences =
|
||||||
|
|
|
@ -1,159 +1,46 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
module PDF.Object (
|
module PDF.Object (
|
||||||
Content(..)
|
Content(..)
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
|
, Flow(..)
|
||||||
, IndirectObjCoordinates(..)
|
, IndirectObjCoordinates(..)
|
||||||
|
, Name(..)
|
||||||
|
, Number(..)
|
||||||
, Object(..)
|
, Object(..)
|
||||||
, Occurrence(..)
|
, Occurrence(..)
|
||||||
, Parser
|
, Parser
|
||||||
|
, Structure(..)
|
||||||
, XRefEntry(..)
|
, XRefEntry(..)
|
||||||
, XRefSection
|
, XRefSection
|
||||||
, XRefSubSection(..)
|
, XRefSubSection(..)
|
||||||
, blank
|
, blank
|
||||||
, content
|
|
||||||
, dictionary
|
, dictionary
|
||||||
, directObject
|
, directObject
|
||||||
, eofMarker
|
, eofMarker
|
||||||
, integer
|
, integer
|
||||||
, line
|
, line
|
||||||
, magicNumber
|
, magicNumber
|
||||||
|
, structure
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
import Data.Map (Map, (!))
|
import Data.Map (Map, (!))
|
||||||
import qualified Data.Map as Map (empty, elems, fromList, toList)
|
import qualified Data.Map as Map (elems, fromList, toList)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import qualified PDF.Output as Output (string)
|
import qualified PDF.Output as Output (string)
|
||||||
import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine)
|
import PDF.Output (
|
||||||
|
OBuilder, Offset(..), Output(..)
|
||||||
|
, byteString, getOffsets, join, newLine, nextLine, saveOffset
|
||||||
|
)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
type Parser u = Parsec ByteString u
|
type Parser u = Parsec ByteString u
|
||||||
|
|
||||||
type Dictionary = Map String DirectObject
|
|
||||||
|
|
||||||
instance Output (Map String DirectObject) where
|
|
||||||
output dict =
|
|
||||||
"<<" `mappend` keyValues `mappend` ">>"
|
|
||||||
where
|
|
||||||
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
|
||||||
outputKeyVal (key, val) =
|
|
||||||
Output.string (printf "/%s " key) `mappend` output val
|
|
||||||
|
|
||||||
data StringObj = Literal String | Hexadecimal String deriving Show
|
|
||||||
|
|
||||||
instance Output StringObj where
|
|
||||||
output (Literal s) = Output.string (printf "(%s)" s)
|
|
||||||
output (Hexadecimal s) = Output.string (printf "<%s>" s)
|
|
||||||
|
|
||||||
data DirectObject =
|
|
||||||
Boolean Bool
|
|
||||||
| Number Float
|
|
||||||
| String StringObj
|
|
||||||
| Name String
|
|
||||||
| Array [DirectObject]
|
|
||||||
| Dictionary Dictionary
|
|
||||||
| Null
|
|
||||||
| Reference IndirectObjCoordinates
|
|
||||||
deriving Show
|
|
||||||
|
|
||||||
outputFloat :: Float -> OBuilder
|
|
||||||
outputFloat f = Output.string $
|
|
||||||
case properFraction f of
|
|
||||||
(n, 0) -> printf "%d" (n :: Int)
|
|
||||||
_ -> printf "%f" f
|
|
||||||
|
|
||||||
instance Output DirectObject where
|
|
||||||
output (Boolean b) = output b
|
|
||||||
output (Number n) = outputFloat n
|
|
||||||
output (String s) = output s
|
|
||||||
output (Name n) = "/" `mappend` Output.string n
|
|
||||||
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
|
||||||
output (Dictionary d) = output d
|
|
||||||
output (Null) = "null"
|
|
||||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
|
||||||
Output.string (printf "%d %d R" objectId versionNumber)
|
|
||||||
|
|
||||||
data Object =
|
|
||||||
Direct DirectObject
|
|
||||||
| Stream {
|
|
||||||
header :: Dictionary
|
|
||||||
, streamContent :: ByteString
|
|
||||||
}
|
|
||||||
deriving Show
|
|
||||||
|
|
||||||
instance Output Object where
|
|
||||||
output (Direct d) = output d
|
|
||||||
output (Stream {header, streamContent}) =
|
|
||||||
output header
|
|
||||||
`nextLine` "stream"
|
|
||||||
`nextLine` byteString streamContent
|
|
||||||
`mappend` "endstream"
|
|
||||||
|
|
||||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
|
||||||
objectId :: Int
|
|
||||||
, versionNumber :: Int
|
|
||||||
} deriving Show
|
|
||||||
|
|
||||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
|
||||||
|
|
||||||
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
|
||||||
outputOccurrence _ (Comment c) =
|
|
||||||
Output.string (printf "%%%s" c) `mappend` newLine
|
|
||||||
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
|
||||||
Output.string (printf "%d %d obj" objectId versionNumber)
|
|
||||||
`nextLine` output (objects ! objectId)
|
|
||||||
`nextLine` "endobj" `mappend` newLine
|
|
||||||
|
|
||||||
data XRefEntry = InUse {
|
|
||||||
offset :: Int64
|
|
||||||
, generation :: Int
|
|
||||||
} | Free {
|
|
||||||
nextFree :: Int64
|
|
||||||
, generation :: Int
|
|
||||||
} deriving Show
|
|
||||||
|
|
||||||
instance Output XRefEntry where
|
|
||||||
output (InUse {offset, generation}) =
|
|
||||||
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
|
||||||
output (Free {nextFree, generation}) =
|
|
||||||
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
|
||||||
|
|
||||||
data XRefSubSection = XRefSubSection {
|
|
||||||
firstObjectId :: Int
|
|
||||||
, entriesNumber :: Int
|
|
||||||
, entries :: Map Int XRefEntry
|
|
||||||
} deriving Show
|
|
||||||
|
|
||||||
instance Output XRefSubSection where
|
|
||||||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
|
||||||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
|
||||||
`nextLine` output (Map.elems entries)
|
|
||||||
|
|
||||||
type XRefSection = [XRefSubSection]
|
|
||||||
|
|
||||||
data Content = Content {
|
|
||||||
startOffset :: Int64
|
|
||||||
, body :: [Occurrence]
|
|
||||||
, objects :: Map Int Object
|
|
||||||
, xrefSection :: XRefSection
|
|
||||||
, trailer :: Dictionary
|
|
||||||
, startXrefPosition :: Int64
|
|
||||||
} deriving Show
|
|
||||||
|
|
||||||
instance Output Content where
|
|
||||||
output (Content {body, objects, trailer, xrefSection, startXrefPosition}) =
|
|
||||||
output (outputOccurrence objects <$> body)
|
|
||||||
`mappend` output xrefSection
|
|
||||||
`mappend` "trailer"
|
|
||||||
`nextLine` output trailer
|
|
||||||
`nextLine` "startxref"
|
|
||||||
`nextLine` Output.string (printf "%d" startXrefPosition)
|
|
||||||
`nextLine` byteString eofMarker
|
|
||||||
|
|
||||||
line :: String -> Parser u ()
|
line :: String -> Parser u ()
|
||||||
line l = string l *> EOL.parser *> return ()
|
line l = string l *> EOL.parser *> return ()
|
||||||
|
|
||||||
|
@ -181,29 +68,46 @@ regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
|
||||||
integer :: (Read a, Num a) => Parser u a
|
integer :: (Read a, Num a) => Parser u a
|
||||||
integer = read <$> many1 digit <* whiteSpace
|
integer = read <$> many1 digit <* whiteSpace
|
||||||
|
|
||||||
directObject :: Parser u DirectObject
|
-------------------------------------
|
||||||
directObject =
|
-- OBJECTS
|
||||||
Boolean <$> try boolean
|
-------------------------------------
|
||||||
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
|
|
||||||
<|> Number <$> try number
|
|
||||||
<|> String <$> try stringObj
|
|
||||||
<|> Name <$> try name
|
|
||||||
<|> Array <$> try array
|
|
||||||
<|> Dictionary <$> try dictionary
|
|
||||||
<|> const Null <$> try nullObject
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Boolean
|
||||||
|
--
|
||||||
boolean :: Parser u Bool
|
boolean :: Parser u Bool
|
||||||
boolean = (string "true" *> return True) <|> (string "false" *> return False)
|
boolean = (string "true" *> return True) <|> (string "false" *> return False)
|
||||||
|
|
||||||
number :: Parser u Float
|
--
|
||||||
number = read <$> (mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
|
-- Number
|
||||||
|
--
|
||||||
|
newtype Number = Number Float deriving Show
|
||||||
|
|
||||||
|
instance Output Number where
|
||||||
|
output (Number f) = Output.string $
|
||||||
|
case properFraction f of
|
||||||
|
(n, 0) -> printf "%d" (n :: Int)
|
||||||
|
_ -> printf "%f" f
|
||||||
|
|
||||||
|
number :: Parser u Number
|
||||||
|
number = Number . read <$>
|
||||||
|
(mappend <$> sign <*> (integerPart <|> ('0':) <$> floatPart))
|
||||||
where
|
where
|
||||||
sign = string "-" <|> option "" (char '+' >> return "")
|
sign = string "-" <|> option "" (char '+' >> return "")
|
||||||
integerPart = mappend <$> many1 digit <*> option "" floatPart
|
integerPart = mappend <$> many1 digit <*> option "" floatPart
|
||||||
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
|
floatPart = (:) <$> char '.' <*> (option "0" $ many1 digit)
|
||||||
|
|
||||||
stringObj :: Parser u StringObj
|
--
|
||||||
stringObj =
|
-- StringObject
|
||||||
|
--
|
||||||
|
data StringObject = Literal String | Hexadecimal String deriving Show
|
||||||
|
|
||||||
|
instance Output StringObject where
|
||||||
|
output (Literal s) = Output.string (printf "(%s)" s)
|
||||||
|
output (Hexadecimal s) = Output.string (printf "<%s>" s)
|
||||||
|
|
||||||
|
stringObject :: Parser u StringObject
|
||||||
|
stringObject =
|
||||||
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
Literal <$> (char '(' *> (concat <$> many literalStringBlock) <* char ')')
|
||||||
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
<|> Hexadecimal <$> (char '<' *> many hexDigit <* char '>')
|
||||||
where
|
where
|
||||||
|
@ -213,12 +117,36 @@ stringObj =
|
||||||
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
escapedChar = (:) <$> char '\\' <*> ((:[]) <$> oneOf "nrtbf()\\" <|> octalCode)
|
||||||
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
|
octalCode = choice $ (\n -> count n octDigit) <$> [1..3]
|
||||||
|
|
||||||
name :: Parser u String
|
--
|
||||||
name = char '/' *> many regular
|
-- Name
|
||||||
|
--
|
||||||
|
newtype Name = Name String deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
instance Output Name where
|
||||||
|
output (Name n) = "/" `mappend` Output.string n
|
||||||
|
|
||||||
|
name :: Parser u Name
|
||||||
|
name = Name <$> (char '/' *> many regular)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Array
|
||||||
|
--
|
||||||
array :: Parser u [DirectObject]
|
array :: Parser u [DirectObject]
|
||||||
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
|
array = char '[' *> blank *> directObject `endBy` blank <* char ']'
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Dictionary
|
||||||
|
--
|
||||||
|
type Dictionary = Map Name DirectObject
|
||||||
|
|
||||||
|
instance Output Dictionary where
|
||||||
|
output dict =
|
||||||
|
"<<" `mappend` keyValues `mappend` ">>"
|
||||||
|
where
|
||||||
|
keyValues = join " " $ outputKeyVal <$> Map.toList dict
|
||||||
|
outputKeyVal :: (Name, DirectObject) -> OBuilder
|
||||||
|
outputKeyVal (key, val) = output key `mappend` " " `mappend` output val
|
||||||
|
|
||||||
dictionary :: Parser u Dictionary
|
dictionary :: Parser u Dictionary
|
||||||
dictionary =
|
dictionary =
|
||||||
try (string "<<" *> blank *> keyValPairs <* string ">>")
|
try (string "<<" *> blank *> keyValPairs <* string ">>")
|
||||||
|
@ -226,12 +154,114 @@ dictionary =
|
||||||
keyVal = (,) <$> name <* blank <*> directObject
|
keyVal = (,) <$> name <* blank <*> directObject
|
||||||
keyValPairs = Map.fromList <$> keyVal `endBy` blank
|
keyValPairs = Map.fromList <$> keyVal `endBy` blank
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Null
|
||||||
|
--
|
||||||
nullObject :: Parser u ()
|
nullObject :: Parser u ()
|
||||||
nullObject = string "null" *> return ()
|
nullObject = string "null" *> return ()
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Reference
|
||||||
|
--
|
||||||
|
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||||
|
objectId :: Int
|
||||||
|
, versionNumber :: Int
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
reference :: Parser u IndirectObjCoordinates
|
reference :: Parser u IndirectObjCoordinates
|
||||||
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
|
reference = IndirectObjCoordinates <$> integer <*> integer <* char 'R'
|
||||||
|
|
||||||
|
--
|
||||||
|
-- DirectObject
|
||||||
|
--
|
||||||
|
data DirectObject =
|
||||||
|
Boolean Bool
|
||||||
|
| NumberObject Number
|
||||||
|
| StringObject StringObject
|
||||||
|
| NameObject Name
|
||||||
|
| Array [DirectObject]
|
||||||
|
| Dictionary Dictionary
|
||||||
|
| Null
|
||||||
|
| Reference IndirectObjCoordinates
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
instance Output DirectObject where
|
||||||
|
output (Boolean b) = output b
|
||||||
|
output (NumberObject n) = output n
|
||||||
|
output (StringObject s) = output s
|
||||||
|
output (NameObject n) = output n
|
||||||
|
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||||||
|
output (Dictionary d) = output d
|
||||||
|
output (Null) = "null"
|
||||||
|
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
|
Output.string (printf "%d %d R" objectId versionNumber)
|
||||||
|
|
||||||
|
directObject :: Parser u DirectObject
|
||||||
|
directObject =
|
||||||
|
Boolean <$> try boolean
|
||||||
|
<|> Reference <$> try reference {- defined before Number because Number is a prefix of it -}
|
||||||
|
<|> NumberObject <$> try number
|
||||||
|
<|> StringObject <$> try stringObject
|
||||||
|
<|> NameObject <$> try name
|
||||||
|
<|> Array <$> try array
|
||||||
|
<|> Dictionary <$> try dictionary
|
||||||
|
<|> const Null <$> try nullObject
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Object
|
||||||
|
--
|
||||||
|
data Object =
|
||||||
|
Direct DirectObject
|
||||||
|
| Stream {
|
||||||
|
header :: Dictionary
|
||||||
|
, streamContent :: ByteString
|
||||||
|
}
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
instance Output Object where
|
||||||
|
output (Direct d) = output d
|
||||||
|
output (Stream {header, streamContent}) =
|
||||||
|
output header
|
||||||
|
`nextLine` "stream"
|
||||||
|
`nextLine` byteString streamContent
|
||||||
|
`mappend` "endstream"
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Occurrence
|
||||||
|
--
|
||||||
|
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||||
|
|
||||||
|
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||||||
|
outputOccurrence _ (Comment c) =
|
||||||
|
Output.string (printf "%%%s" c) `mappend` newLine
|
||||||
|
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
|
saveOffset (ObjectId objectId)
|
||||||
|
>> Output.string (printf "%d %d obj" objectId versionNumber)
|
||||||
|
`nextLine` output (objects ! objectId)
|
||||||
|
`nextLine` "endobj" `mappend` newLine
|
||||||
|
|
||||||
|
-------------------------------------
|
||||||
|
-- XREF TABLE
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
--
|
||||||
|
-- XRefEntry
|
||||||
|
--
|
||||||
|
data XRefEntry = InUse {
|
||||||
|
offset :: Int64
|
||||||
|
, generation :: Int
|
||||||
|
} | Free {
|
||||||
|
nextFree :: Int64
|
||||||
|
, generation :: Int
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
|
instance Output XRefEntry where
|
||||||
|
output (InUse {offset, generation}) =
|
||||||
|
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
|
||||||
|
output (Free {nextFree, generation}) =
|
||||||
|
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
|
||||||
|
|
||||||
entry :: Parser u XRefEntry
|
entry :: Parser u XRefEntry
|
||||||
entry = do
|
entry = do
|
||||||
(big, small) <- (,) <$> integer <*> integer
|
(big, small) <- (,) <$> integer <*> integer
|
||||||
|
@ -242,15 +272,83 @@ entry = do
|
||||||
free :: Int64 -> Int -> Parser u XRefEntry
|
free :: Int64 -> Int -> Parser u XRefEntry
|
||||||
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
free nextFree generation = char 'f' *> return (Free {nextFree, generation})
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- XRefSubSection
|
||||||
|
--
|
||||||
|
data XRefSubSection = XRefSubSection {
|
||||||
|
firstObjectId :: Int
|
||||||
|
, entriesNumber :: Int
|
||||||
|
, entries :: Map Int XRefEntry
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
|
instance Output XRefSubSection where
|
||||||
|
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||||||
|
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||||||
|
`nextLine` output (Map.elems entries)
|
||||||
|
|
||||||
xrefSubSection :: Parser u XRefSubSection
|
xrefSubSection :: Parser u XRefSubSection
|
||||||
xrefSubSection = do
|
xrefSubSection = do
|
||||||
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
|
(firstObjectId, entriesNumber) <- (,) <$> integer <*> integer
|
||||||
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
entries <- Map.fromList . zip [firstObjectId..] <$> count entriesNumber entry
|
||||||
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
return $ XRefSubSection {firstObjectId, entriesNumber, entries}
|
||||||
|
|
||||||
content :: Parser u Content
|
type XRefSection = [XRefSubSection]
|
||||||
content =
|
|
||||||
Content 0 [] Map.empty
|
--
|
||||||
|
-- Structure
|
||||||
|
--
|
||||||
|
data Structure = Structure {
|
||||||
|
startOffset :: Int64
|
||||||
|
, xrefSection :: XRefSection
|
||||||
|
, inputTrailer :: Dictionary
|
||||||
|
--, startXRef :: Int64
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
|
structure :: Parser u Structure
|
||||||
|
structure =
|
||||||
|
Structure 0
|
||||||
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
<$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
|
||||||
<*> (line "trailer" *> dictionary <* EOL.parser)
|
<*> (line "trailer" *> dictionary <* EOL.parser)
|
||||||
<*> (line "startxref" *> integer)
|
-- <*> (line "startxref" *> integer)
|
||||||
|
|
||||||
|
xrefFromOffsets :: Map Offset Int64 -> (XRefSection, Int64)
|
||||||
|
xrefFromOffsets offsets = (
|
||||||
|
[]
|
||||||
|
, offsets ! StartXRef
|
||||||
|
)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Flow
|
||||||
|
--
|
||||||
|
data Flow = Flow {
|
||||||
|
occurrencesStack :: [Occurrence]
|
||||||
|
, tmpObjects :: Map Int Object
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Content
|
||||||
|
--
|
||||||
|
data Content = Content {
|
||||||
|
occurrences :: [Occurrence]
|
||||||
|
, objects :: Map Int Object
|
||||||
|
, trailer :: Dictionary
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
|
outputBody :: ([Occurrence], Map Int Object) -> OBuilder
|
||||||
|
outputBody (occurrences, objects) =
|
||||||
|
output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
|
||||||
|
|
||||||
|
instance Output Content where
|
||||||
|
output (Content {occurrences, objects, trailer}) =
|
||||||
|
fmap xrefFromOffsets <$> getOffsets (outputBody (occurrences, objects))
|
||||||
|
>>= \(body, (xref, startXRef)) ->
|
||||||
|
body
|
||||||
|
`mappend` "xref"
|
||||||
|
`nextLine` "trailer"
|
||||||
|
--`nextLine` output xrefSection
|
||||||
|
--`mappend` "trailer"
|
||||||
|
`nextLine` output trailer
|
||||||
|
`nextLine` "startxref"
|
||||||
|
`nextLine` (Output.string (printf "%d" startXRef))
|
||||||
|
`nextLine` byteString eofMarker
|
||||||
|
|
|
@ -1,42 +1,82 @@
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
|
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||||
module PDF.Output (
|
module PDF.Output (
|
||||||
OBuilder(..)
|
OBuilder
|
||||||
|
, OContext(..)
|
||||||
|
, Offset(..)
|
||||||
, Output(..)
|
, Output(..)
|
||||||
, byteString
|
, byteString
|
||||||
, char
|
, char
|
||||||
|
, getOffsets
|
||||||
, join
|
, join
|
||||||
, lift
|
, lift
|
||||||
, newLine
|
, newLine
|
||||||
, nextLine
|
, nextLine
|
||||||
|
, saveOffset
|
||||||
, string
|
, string
|
||||||
, render
|
, render
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
--import Data.ByteString.Builder (Builder, char8, lazyByteString, string8)
|
||||||
import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString)
|
import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString)
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as BS (length)
|
||||||
|
import Data.Int (Int64)
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map (singleton)
|
||||||
-- #if MIN_VERSION_base(4,9,0)
|
-- #if MIN_VERSION_base(4,9,0)
|
||||||
-- import qualified Data.Semigroup as Sem
|
-- import qualified Data.Semigroup as Sem
|
||||||
-- #endif
|
-- #endif
|
||||||
import Data.String (IsString(..))
|
import Data.String (IsString(..))
|
||||||
import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||||
|
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||||
|
--import Control.Monad.Reader (MonadReader(..), Reader, runReader)
|
||||||
|
import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
|
||||||
import qualified PDF.EOL as EOL (Style(..))
|
import qualified PDF.EOL as EOL (Style(..))
|
||||||
|
|
||||||
newtype OBuilder = OBuilder (Reader EOL.Style Builder)
|
data Offset = StartXRef | ObjectId Int deriving (Eq, Ord)
|
||||||
|
|
||||||
|
{-
|
||||||
|
incrOffset :: (Int64 -> Int64) -> OutputState
|
||||||
|
-}
|
||||||
|
|
||||||
|
newtype OContext a = OContext (RWS EOL.Style (Map Offset Int64) Int64 a)
|
||||||
|
type OBuilder = OContext Builder
|
||||||
|
|
||||||
|
instance Functor OContext where
|
||||||
|
fmap f (OContext oc) = OContext $ fmap f oc
|
||||||
|
|
||||||
|
instance Applicative OContext where
|
||||||
|
pure = OContext . pure
|
||||||
|
(<*>) (OContext f) (OContext a) = OContext (f <*> a)
|
||||||
|
|
||||||
|
instance Monad OContext where
|
||||||
|
(>>=) (OContext a) f = OContext (a >>= (\x -> let OContext y = f x in y))
|
||||||
|
|
||||||
|
saveOffset :: Offset -> OContext ()
|
||||||
|
saveOffset offset = OContext $
|
||||||
|
get >>= tell . Map.singleton offset
|
||||||
|
|
||||||
lift :: (a -> Builder) -> a -> OBuilder
|
lift :: (a -> Builder) -> a -> OBuilder
|
||||||
lift f a = OBuilder $ return (f a)
|
lift f a = return (f a)
|
||||||
|
|
||||||
|
getOffsets :: OBuilder -> OContext (OBuilder, Map Offset Int64)
|
||||||
|
getOffsets (OContext builder) =
|
||||||
|
OContext (listen builder >>= \(a, w) -> return (return a, w))
|
||||||
|
|
||||||
append :: OBuilder -> OBuilder -> OBuilder
|
append :: OBuilder -> OBuilder -> OBuilder
|
||||||
append (OBuilder a) (OBuilder b) = OBuilder (mappend <$> a <*> b)
|
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
|
||||||
|
|
||||||
#if MIN_VERSION_base(4,9,0)
|
#if MIN_VERSION_base(4,11,0)
|
||||||
instance Semigroup OBuilder where
|
instance Semigroup OBuilder where
|
||||||
(<>) = append
|
(<>) = append
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
instance Monoid OBuilder where
|
instance Monoid OBuilder where
|
||||||
mempty = OBuilder (return mempty)
|
mempty = OContext (return mempty)
|
||||||
#if !(MIN_VERSION_base(4,9,0))
|
#if !(MIN_VERSION_base(4,11,0))
|
||||||
mappend = append
|
mappend = append
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -63,25 +103,26 @@ join separator (a:as) =
|
||||||
output a `mappend` string separator `mappend` (join separator as)
|
output a `mappend` string separator `mappend` (join separator as)
|
||||||
|
|
||||||
newLine :: OBuilder
|
newLine :: OBuilder
|
||||||
newLine = OBuilder $ buildEOL <$> ask
|
newLine = OContext $ buildEOL =<< ask
|
||||||
where
|
where
|
||||||
buildEOL EOL.CR = char8 '\r'
|
buildEOL EOL.CR = return (char8 '\r') <* modify (+1)
|
||||||
buildEOL EOL.LF = char8 '\n'
|
buildEOL EOL.LF = return (char8 '\n') <* modify (+1)
|
||||||
buildEOL EOL.CRLF = string8 "\r\n"
|
buildEOL EOL.CRLF = return (string8 "\r\n") <* modify (+2)
|
||||||
|
|
||||||
nextLine :: OBuilder -> OBuilder -> OBuilder
|
nextLine :: OBuilder -> OBuilder -> OBuilder
|
||||||
nextLine a b = a `mappend` newLine `mappend` b
|
nextLine a b = a `mappend` newLine `mappend` b
|
||||||
|
|
||||||
char :: Char -> OBuilder
|
char :: Char -> OBuilder
|
||||||
char = lift char8
|
char c = lift char8 c <* OContext (modify (+1))
|
||||||
|
|
||||||
string :: String -> OBuilder
|
string :: String -> OBuilder
|
||||||
string = lift string8
|
string s = lift string8 s <* OContext (modify (+ toEnum (length s)))
|
||||||
|
|
||||||
byteString :: ByteString -> OBuilder
|
byteString :: ByteString -> OBuilder
|
||||||
byteString = lift lazyByteString
|
byteString bs = lift lazyByteString bs <* OContext (modify (+ BS.length bs))
|
||||||
|
|
||||||
render :: Output a => EOL.Style -> a -> ByteString
|
render :: Output a => EOL.Style -> a -> ByteString
|
||||||
render eolStyle a =
|
render eolStyle a =
|
||||||
let OBuilder r = output a in
|
let OContext builder = output a in
|
||||||
toLazyByteString $ runReader r eolStyle
|
let (outputByteString, _, _) = runRWS builder eolStyle 0 in
|
||||||
|
toLazyByteString outputByteString
|
||||||
|
|
Loading…
Reference in a new issue