From e23618da68ad3618ade1fc81f49faefd1f774075 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Thu, 16 May 2019 22:41:14 +0200 Subject: [PATCH] Implement output --- src/PDF.hs | 50 ++++++++++++++++++++++++++++++----------------- src/PDF/Body.hs | 5 +++-- src/PDF/EOL.hs | 2 +- src/PDF/Object.hs | 32 ++++++++++++++++++++++-------- src/PDF/Output.hs | 3 --- 5 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/PDF.hs b/src/PDF.hs index a85de33..d2fad68 100644 --- a/src/PDF.hs +++ b/src/PDF.hs @@ -1,10 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} module PDF ( - parseDocument - , Document(..) + Document(..) , Content(..) , DirectObject(..) + , parseDocument + , render ) where import Data.ByteString.Lazy.Char8 (ByteString) @@ -14,20 +15,33 @@ import qualified Data.ByteString.Lazy.Char8 as BS ( import Data.ByteString.Lazy.Char8.Util (previous, subBS) import Data.Int (Int64) import qualified Data.Map as Map (lookup) -import PDF.Object (Content(..), DirectObject(..), EOLStyle(..), content, eol, eolCharset) import PDF.Body (populate) +import qualified PDF.EOL as EOL (Style(..), charset, parser) +import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber) +import qualified PDF.Output as Output (render, string) +import PDF.Output (Output(..), nextLine) import Text.Parsec import Text.Parsec.ByteString.Lazy (Parser) import Text.Parsec.Pos (newPos) import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage) +import Text.Printf (printf) data Document = Document { pdfVersion :: String + , eolStyle :: EOL.Style , contents :: [Content] } deriving Show +instance Output Document where + output (Document {pdfVersion, contents}) = + Output.string (printf "%%PDF-%s" pdfVersion) + `nextLine` output contents + +render :: Document -> ByteString +render document@(Document {eolStyle}) = Output.render eolStyle document + version :: Parser String -version = string magicNumber *> many (noneOf eolCharset) +version = string magicNumber *> many (noneOf EOL.charset) parseError :: String -> Either ParseError a parseError errorMessage = @@ -36,15 +50,15 @@ parseError errorMessage = check :: Bool -> String -> Either ParseError () check test errorMessage = if test then return () else parseError errorMessage -readStartXref :: EOLStyle -> ByteString -> Either ParseError Int64 +readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64 readStartXref eolStyle input = check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input)) "Badly formed document : missing EOF marker at the end" >> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input) where (eolOffset, eolLastByte) = case eolStyle of - CRLF -> (2, '\n') - CR -> (1, '\r') + EOL.CRLF -> (2, '\n') + EOL.CR -> (1, '\r') _ -> (1, '\n') eofMarkerPosition = BS.length input - BS.length eofMarker @@ -53,24 +67,24 @@ readStartXref eolStyle input = previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1 startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition -nextLine :: ByteString -> Int64 -nextLine input = +findNextLine :: ByteString -> Int64 +findNextLine input = let (line, eolPrefixed) = BS.span notInEol input in let nextNotInEol = BS.findIndex notInEol eolPrefixed in BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol) where - notInEol = not . (`elem` eolCharset) + notInEol = not . (`elem` EOL.charset) -nextSection :: Int64 -> ByteString -> Int64 -nextSection offset input = +findNextSection :: Int64 -> ByteString -> Int64 +findNextSection offset input = case BS.findIndex (== BS.head eofMarker) input of Nothing -> 0 Just delta -> let newInput = BS.drop delta input in let newOffset = offset + delta in if BS.isPrefixOf eofMarker newInput - then newOffset + nextLine newInput - else nextSection (newOffset + 1) (BS.drop 1 newInput) + then newOffset + findNextLine newInput + else findNextSection (newOffset + 1) (BS.drop 1 newInput) iterateContents :: Int64 -> ByteString -> Either ParseError [Content] iterateContents startXref input = @@ -78,17 +92,17 @@ iterateContents startXref input = where stopOrFollow c@(Content {trailer}) = case Map.lookup "Prev" trailer of - Nothing -> Right [c {startOffset = nextLine input}] + Nothing -> Right [c {startOffset = findNextLine input}] Just (Number newStartXref) -> let offset = truncate newStartXref in - let startOffset = nextSection offset (BS.drop offset input) in + let startOffset = findNextSection offset (BS.drop offset input) in (c {startOffset}:) <$> (iterateContents offset input) Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v parseDocument :: ByteString -> Either ParseError Document parseDocument input = do - (pdfVersion, eolStyle) <- parse ((,) <$> version <*> eol) "" input + (pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input startXref <- readStartXref eolStyle input structures <- iterateContents startXref input let contents = populate input <$> structures - return $ Document {pdfVersion, contents} + return $ Document {pdfVersion, eolStyle, contents} diff --git a/src/PDF/Body.hs b/src/PDF/Body.hs index cf8b836..46da7b8 100644 --- a/src/PDF/Body.hs +++ b/src/PDF/Body.hs @@ -8,10 +8,11 @@ import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack) import Data.Int (Int64) import Data.Map ((!)) import qualified Data.Map as Map (insert, lookup) +import qualified PDF.EOL as EOL (charset, parser) import PDF.Object ( Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..) , Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..) - , blank, eol, eolCharset, dictionary, directObject, integer, line + , blank, dictionary, directObject, integer, line ) import Text.Parsec @@ -36,7 +37,7 @@ pushOccurrence newOccurrence = modifyContent $ \content -> content { } comment :: Parser u String -comment = char '%' *> many (noneOf eolCharset) <* eol +comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser lookupOffset :: Int -> XRefSection -> Maybe Int64 lookupOffset _ [] = Nothing diff --git a/src/PDF/EOL.hs b/src/PDF/EOL.hs index 5a36c3a..4d88751 100644 --- a/src/PDF/EOL.hs +++ b/src/PDF/EOL.hs @@ -7,7 +7,7 @@ module PDF.EOL ( import Data.ByteString.Lazy.Char8 (ByteString) import Text.Parsec ((<|>), Parsec, string, try) -data Style = CR | LF | CRLF +data Style = CR | LF | CRLF deriving Show charset :: String charset = "\r\n" diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 313c748..9fa4068 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -23,11 +23,11 @@ module PDF.Object ( import Data.ByteString.Lazy.Char8 (ByteString) import Data.Int (Int64) -import Data.Map (Map) +import Data.Map (Map, (!)) import qualified Data.Map as Map (empty, elems, fromList, toList) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (string) -import PDF.Output (Output(..), byteString, join, newLine, nextLine) +import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine) import Text.Parsec import Text.Printf (printf) @@ -60,12 +60,18 @@ data DirectObject = | Reference IndirectObjCoordinates deriving Show +outputFloat :: Float -> OBuilder +outputFloat f = Output.string $ + case properFraction f of + (n, 0) -> printf "%d" (n :: Int) + _ -> printf "%f" f + instance Output DirectObject where output (Boolean b) = output b - output (Number n) = output n + output (Number n) = outputFloat n output (String s) = output s output (Name n) = "/" `mappend` Output.string n - output (Array a) = join " " a + output (Array a) = "[" `mappend` join " " a `mappend` "]" output (Dictionary d) = output d output (Null) = "null" output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = @@ -85,7 +91,7 @@ instance Output Object where output header `nextLine` "stream" `nextLine` byteString streamContent - `mappend` "endstream" `mappend` newLine + `mappend` "endstream" data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: Int @@ -94,6 +100,14 @@ data IndirectObjCoordinates = IndirectObjCoordinates { data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show +outputOccurrence :: Map Int Object -> Occurrence -> OBuilder +outputOccurrence _ (Comment c) = + Output.string (printf "%%%s" c) `mappend` newLine +outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = + Output.string (printf "%d %d obj" objectId versionNumber) + `nextLine` output (objects ! objectId) + `nextLine` "endobj" `mappend` newLine + data XRefEntry = InUse { offset :: Int64 , generation :: Int @@ -117,7 +131,7 @@ data XRefSubSection = XRefSubSection { instance Output XRefSubSection where output (XRefSubSection {firstObjectId, entriesNumber, entries}) = Output.string (printf "%d %d" firstObjectId entriesNumber) - `nextLine` output (Map.elems entries) `mappend` newLine + `nextLine` output (Map.elems entries) type XRefSection = [XRefSubSection] @@ -131,9 +145,11 @@ data Content = Content { } deriving Show instance Output Content where - output (Content {xrefSection, startXrefPosition}) = - output xrefSection + output (Content {body, objects, trailer, xrefSection, startXrefPosition}) = + output (outputOccurrence objects <$> body) + `mappend` output xrefSection `mappend` "trailer" + `nextLine` output trailer `nextLine` "startxref" `nextLine` Output.string (printf "%d" startXrefPosition) `nextLine` byteString eofMarker diff --git a/src/PDF/Output.hs b/src/PDF/Output.hs index d253332..a4a3762 100644 --- a/src/PDF/Output.hs +++ b/src/PDF/Output.hs @@ -53,9 +53,6 @@ instance Output Bool where output False = string "false" output True = string "true" -instance Output Float where - output = string . show - instance Output a => Output [a] where output = foldl mappend mempty . fmap output