Implement output

This commit is contained in:
Tissevert 2019-05-16 22:41:14 +02:00
parent 088637b2c0
commit e23618da68
5 changed files with 60 additions and 32 deletions

View file

@ -1,10 +1,11 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
module PDF ( module PDF (
parseDocument Document(..)
, Document(..)
, Content(..) , Content(..)
, DirectObject(..) , DirectObject(..)
, parseDocument
, render
) where ) where
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
@ -14,20 +15,33 @@ import qualified Data.ByteString.Lazy.Char8 as BS (
import Data.ByteString.Lazy.Char8.Util (previous, subBS) import Data.ByteString.Lazy.Char8.Util (previous, subBS)
import Data.Int (Int64) import Data.Int (Int64)
import qualified Data.Map as Map (lookup) import qualified Data.Map as Map (lookup)
import PDF.Object (Content(..), DirectObject(..), EOLStyle(..), content, eol, eolCharset)
import PDF.Body (populate) import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber)
import qualified PDF.Output as Output (render, string)
import PDF.Output (Output(..), nextLine)
import Text.Parsec import Text.Parsec
import Text.Parsec.ByteString.Lazy (Parser) import Text.Parsec.ByteString.Lazy (Parser)
import Text.Parsec.Pos (newPos) import Text.Parsec.Pos (newPos)
import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage) import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage)
import Text.Printf (printf)
data Document = Document { data Document = Document {
pdfVersion :: String pdfVersion :: String
, eolStyle :: EOL.Style
, contents :: [Content] , contents :: [Content]
} deriving Show } deriving Show
instance Output Document where
output (Document {pdfVersion, contents}) =
Output.string (printf "%%PDF-%s" pdfVersion)
`nextLine` output contents
render :: Document -> ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document
version :: Parser String version :: Parser String
version = string magicNumber *> many (noneOf eolCharset) version = string magicNumber *> many (noneOf EOL.charset)
parseError :: String -> Either ParseError a parseError :: String -> Either ParseError a
parseError errorMessage = parseError errorMessage =
@ -36,15 +50,15 @@ parseError errorMessage =
check :: Bool -> String -> Either ParseError () check :: Bool -> String -> Either ParseError ()
check test errorMessage = if test then return () else parseError errorMessage check test errorMessage = if test then return () else parseError errorMessage
readStartXref :: EOLStyle -> ByteString -> Either ParseError Int64 readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64
readStartXref eolStyle input = readStartXref eolStyle input =
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input)) check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
"Badly formed document : missing EOF marker at the end" "Badly formed document : missing EOF marker at the end"
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input) >> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
where where
(eolOffset, eolLastByte) = case eolStyle of (eolOffset, eolLastByte) = case eolStyle of
CRLF -> (2, '\n') EOL.CRLF -> (2, '\n')
CR -> (1, '\r') EOL.CR -> (1, '\r')
_ -> (1, '\n') _ -> (1, '\n')
eofMarkerPosition = eofMarkerPosition =
BS.length input - BS.length eofMarker BS.length input - BS.length eofMarker
@ -53,24 +67,24 @@ readStartXref eolStyle input =
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1 previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
nextLine :: ByteString -> Int64 findNextLine :: ByteString -> Int64
nextLine input = findNextLine input =
let (line, eolPrefixed) = BS.span notInEol input in let (line, eolPrefixed) = BS.span notInEol input in
let nextNotInEol = BS.findIndex notInEol eolPrefixed in let nextNotInEol = BS.findIndex notInEol eolPrefixed in
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol) BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
where where
notInEol = not . (`elem` eolCharset) notInEol = not . (`elem` EOL.charset)
nextSection :: Int64 -> ByteString -> Int64 findNextSection :: Int64 -> ByteString -> Int64
nextSection offset input = findNextSection offset input =
case BS.findIndex (== BS.head eofMarker) input of case BS.findIndex (== BS.head eofMarker) input of
Nothing -> 0 Nothing -> 0
Just delta -> Just delta ->
let newInput = BS.drop delta input in let newInput = BS.drop delta input in
let newOffset = offset + delta in let newOffset = offset + delta in
if BS.isPrefixOf eofMarker newInput if BS.isPrefixOf eofMarker newInput
then newOffset + nextLine newInput then newOffset + findNextLine newInput
else nextSection (newOffset + 1) (BS.drop 1 newInput) else findNextSection (newOffset + 1) (BS.drop 1 newInput)
iterateContents :: Int64 -> ByteString -> Either ParseError [Content] iterateContents :: Int64 -> ByteString -> Either ParseError [Content]
iterateContents startXref input = iterateContents startXref input =
@ -78,17 +92,17 @@ iterateContents startXref input =
where where
stopOrFollow c@(Content {trailer}) = stopOrFollow c@(Content {trailer}) =
case Map.lookup "Prev" trailer of case Map.lookup "Prev" trailer of
Nothing -> Right [c {startOffset = nextLine input}] Nothing -> Right [c {startOffset = findNextLine input}]
Just (Number newStartXref) -> Just (Number newStartXref) ->
let offset = truncate newStartXref in let offset = truncate newStartXref in
let startOffset = nextSection offset (BS.drop offset input) in let startOffset = findNextSection offset (BS.drop offset input) in
(c {startOffset}:) <$> (iterateContents offset input) (c {startOffset}:) <$> (iterateContents offset input)
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either ParseError Document parseDocument :: ByteString -> Either ParseError Document
parseDocument input = do parseDocument input = do
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> eol) "" input (pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
startXref <- readStartXref eolStyle input startXref <- readStartXref eolStyle input
structures <- iterateContents startXref input structures <- iterateContents startXref input
let contents = populate input <$> structures let contents = populate input <$> structures
return $ Document {pdfVersion, contents} return $ Document {pdfVersion, eolStyle, contents}

View file

@ -8,10 +8,11 @@ import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup) import qualified Data.Map as Map (insert, lookup)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object ( import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..) Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..) , Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, eol, eolCharset, dictionary, directObject, integer, line , blank, dictionary, directObject, integer, line
) )
import Text.Parsec import Text.Parsec
@ -36,7 +37,7 @@ pushOccurrence newOccurrence = modifyContent $ \content -> content {
} }
comment :: Parser u String comment :: Parser u String
comment = char '%' *> many (noneOf eolCharset) <* eol comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser
lookupOffset :: Int -> XRefSection -> Maybe Int64 lookupOffset :: Int -> XRefSection -> Maybe Int64
lookupOffset _ [] = Nothing lookupOffset _ [] = Nothing

View file

@ -7,7 +7,7 @@ module PDF.EOL (
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import Text.Parsec ((<|>), Parsec, string, try) import Text.Parsec ((<|>), Parsec, string, try)
data Style = CR | LF | CRLF data Style = CR | LF | CRLF deriving Show
charset :: String charset :: String
charset = "\r\n" charset = "\r\n"

View file

@ -23,11 +23,11 @@ module PDF.Object (
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map (Map) import Data.Map (Map, (!))
import qualified Data.Map as Map (empty, elems, fromList, toList) import qualified Data.Map as Map (empty, elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string) import qualified PDF.Output as Output (string)
import PDF.Output (Output(..), byteString, join, newLine, nextLine) import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine)
import Text.Parsec import Text.Parsec
import Text.Printf (printf) import Text.Printf (printf)
@ -60,12 +60,18 @@ data DirectObject =
| Reference IndirectObjCoordinates | Reference IndirectObjCoordinates
deriving Show deriving Show
outputFloat :: Float -> OBuilder
outputFloat f = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
instance Output DirectObject where instance Output DirectObject where
output (Boolean b) = output b output (Boolean b) = output b
output (Number n) = output n output (Number n) = outputFloat n
output (String s) = output s output (String s) = output s
output (Name n) = "/" `mappend` Output.string n output (Name n) = "/" `mappend` Output.string n
output (Array a) = join " " a output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d output (Dictionary d) = output d
output (Null) = "null" output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
@ -85,7 +91,7 @@ instance Output Object where
output header output header
`nextLine` "stream" `nextLine` "stream"
`nextLine` byteString streamContent `nextLine` byteString streamContent
`mappend` "endstream" `mappend` newLine `mappend` "endstream"
data IndirectObjCoordinates = IndirectObjCoordinates { data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int objectId :: Int
@ -94,6 +100,14 @@ data IndirectObjCoordinates = IndirectObjCoordinates {
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
data XRefEntry = InUse { data XRefEntry = InUse {
offset :: Int64 offset :: Int64
, generation :: Int , generation :: Int
@ -117,7 +131,7 @@ data XRefSubSection = XRefSubSection {
instance Output XRefSubSection where instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) = output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber) Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries) `mappend` newLine `nextLine` output (Map.elems entries)
type XRefSection = [XRefSubSection] type XRefSection = [XRefSubSection]
@ -131,9 +145,11 @@ data Content = Content {
} deriving Show } deriving Show
instance Output Content where instance Output Content where
output (Content {xrefSection, startXrefPosition}) = output (Content {body, objects, trailer, xrefSection, startXrefPosition}) =
output xrefSection output (outputOccurrence objects <$> body)
`mappend` output xrefSection
`mappend` "trailer" `mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref" `nextLine` "startxref"
`nextLine` Output.string (printf "%d" startXrefPosition) `nextLine` Output.string (printf "%d" startXrefPosition)
`nextLine` byteString eofMarker `nextLine` byteString eofMarker

View file

@ -53,9 +53,6 @@ instance Output Bool where
output False = string "false" output False = string "false"
output True = string "true" output True = string "true"
instance Output Float where
output = string . show
instance Output a => Output [a] where instance Output a => Output [a] where
output = foldl mappend mempty . fmap output output = foldl mappend mempty . fmap output