From 645466024a82988f6653fce5c7fe10bd9339eaa0 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Thu, 16 May 2019 17:04:45 +0200 Subject: [PATCH] Starting to implement output with String builder --- src/PDF.hs | 6 ---- src/PDF/EOL.hs | 19 ++++++++++ src/PDF/Object.hs | 88 +++++++++++++++++++++++++++++++++++++---------- src/PDF/Output.hs | 76 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 25 deletions(-) create mode 100644 src/PDF/EOL.hs create mode 100644 src/PDF/Output.hs diff --git a/src/PDF.hs b/src/PDF.hs index ec68971..a85de33 100644 --- a/src/PDF.hs +++ b/src/PDF.hs @@ -26,15 +26,9 @@ data Document = Document { , contents :: [Content] } deriving Show -magicNumber :: String -magicNumber = "%PDF-" - version :: Parser String version = string magicNumber *> many (noneOf eolCharset) -eofMarker :: ByteString -eofMarker = "%%EOF" - parseError :: String -> Either ParseError a parseError errorMessage = Left $ newErrorMessage (Message errorMessage) (newPos "" 0 0) diff --git a/src/PDF/EOL.hs b/src/PDF/EOL.hs new file mode 100644 index 0000000..5a36c3a --- /dev/null +++ b/src/PDF/EOL.hs @@ -0,0 +1,19 @@ +module PDF.EOL ( + Style(..) + , charset + , parser + ) where + +import Data.ByteString.Lazy.Char8 (ByteString) +import Text.Parsec ((<|>), Parsec, string, try) + +data Style = CR | LF | CRLF + +charset :: String +charset = "\r\n" + +parser :: Parsec ByteString u Style +parser = + try (string "\r\n" >> return CRLF) + <|> (string "\r" >> return CR) + <|> (string "\n" >> return LF) diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 5d5d5f8..313c748 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -1,8 +1,9 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE FlexibleInstances #-} module PDF.Object ( Content(..) , DirectObject(..) - , EOLStyle(..) , IndirectObjCoordinates(..) , Object(..) , Occurrence(..) @@ -14,26 +15,40 @@ module PDF.Object ( , content , dictionary , directObject - , eol - , eolCharset + , eofMarker , integer , line + , magicNumber ) where import Data.ByteString.Lazy.Char8 (ByteString) import Data.Int (Int64) import Data.Map (Map) -import qualified Data.Map as Map (empty, fromList) +import qualified Data.Map as Map (empty, elems, fromList, toList) +import qualified PDF.EOL as EOL (charset, parser) +import qualified PDF.Output as Output (string) +import PDF.Output (Output(..), byteString, join, newLine, nextLine) import Text.Parsec +import Text.Printf (printf) type Parser u = Parsec ByteString u -data EOLStyle = CR | LF | CRLF - type Dictionary = Map String DirectObject +instance Output (Map String DirectObject) where + output dict = + "<<" `mappend` keyValues `mappend` ">>" + where + keyValues = join " " $ outputKeyVal <$> Map.toList dict + outputKeyVal (key, val) = + Output.string (printf "/%s " key) `mappend` output val + data StringObj = Literal String | Hexadecimal String deriving Show +instance Output StringObj where + output (Literal s) = Output.string (printf "(%s)" s) + output (Hexadecimal s) = Output.string (printf "<%s>" s) + data DirectObject = Boolean Bool | Number Float @@ -45,6 +60,17 @@ data DirectObject = | Reference IndirectObjCoordinates deriving Show +instance Output DirectObject where + output (Boolean b) = output b + output (Number n) = output n + output (String s) = output s + output (Name n) = "/" `mappend` Output.string n + output (Array a) = join " " a + output (Dictionary d) = output d + output (Null) = "null" + output (Reference (IndirectObjCoordinates {objectId, versionNumber})) = + Output.string (printf "%d %d R" objectId versionNumber) + data Object = Direct DirectObject | Stream { @@ -53,6 +79,14 @@ data Object = } deriving Show +instance Output Object where + output (Direct d) = output d + output (Stream {header, streamContent}) = + output header + `nextLine` "stream" + `nextLine` byteString streamContent + `mappend` "endstream" `mappend` newLine + data IndirectObjCoordinates = IndirectObjCoordinates { objectId :: Int , versionNumber :: Int @@ -68,12 +102,23 @@ data XRefEntry = InUse { , generation :: Int } deriving Show +instance Output XRefEntry where + output (InUse {offset, generation}) = + Output.string (printf "%010d %05d n " offset generation) `mappend` newLine + output (Free {nextFree, generation}) = + Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine + data XRefSubSection = XRefSubSection { firstObjectId :: Int , entriesNumber :: Int , entries :: Map Int XRefEntry } deriving Show +instance Output XRefSubSection where + output (XRefSubSection {firstObjectId, entriesNumber, entries}) = + Output.string (printf "%d %d" firstObjectId entriesNumber) + `nextLine` output (Map.elems entries) `mappend` newLine + type XRefSection = [XRefSubSection] data Content = Content { @@ -85,23 +130,28 @@ data Content = Content { , startXrefPosition :: Int64 } deriving Show -eolCharset :: String -eolCharset = "\r\n" - -eol :: Parser u EOLStyle -eol = - try (string "\r\n" >> return CRLF) - <|> (string "\r" >> return CR) - <|> (string "\n" >> return LF) +instance Output Content where + output (Content {xrefSection, startXrefPosition}) = + output xrefSection + `mappend` "trailer" + `nextLine` "startxref" + `nextLine` Output.string (printf "%d" startXrefPosition) + `nextLine` byteString eofMarker line :: String -> Parser u () -line l = string l *> eol *> return () +line l = string l *> EOL.parser *> return () + +magicNumber :: String +magicNumber = "%PDF-" + +eofMarker :: ByteString +eofMarker = "%%EOF" whiteSpaceCharset :: String whiteSpaceCharset = "\0\t\12 " whiteSpace :: Parser u () -whiteSpace = oneOf whiteSpaceCharset *> return () <|> eol *> return () +whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return () blank :: Parser u () blank = skipMany whiteSpace @@ -110,7 +160,7 @@ delimiterCharset :: String delimiterCharset = "()<>[]{}/%" regular :: Parser u Char -regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset +regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset integer :: (Read a, Num a) => Parser u a integer = read <$> many1 digit <* whiteSpace @@ -185,6 +235,6 @@ xrefSubSection = do content :: Parser u Content content = Content 0 [] Map.empty - <$> (line "xref" *> xrefSubSection `sepBy` eol) - <*> (line "trailer" *> dictionary <* eol) + <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser) + <*> (line "trailer" *> dictionary <* EOL.parser) <*> (line "startxref" *> integer) diff --git a/src/PDF/Output.hs b/src/PDF/Output.hs new file mode 100644 index 0000000..18b7161 --- /dev/null +++ b/src/PDF/Output.hs @@ -0,0 +1,76 @@ +module PDF.Output ( + OBuilder(..) + , Output(..) + , byteString + , char + , join + , lift + , newLine + , nextLine + , string + , render + ) where + +import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString) +import Data.ByteString.Lazy.Char8 (ByteString) +import Data.String (IsString(..)) +import Control.Monad.Reader (MonadReader(..), Reader, runReader) +import qualified PDF.EOL as EOL (Style(..)) + +newtype OBuilder = OBuilder (Reader EOL.Style Builder) + +lift :: (a -> Builder) -> a -> OBuilder +lift f a = OBuilder $ return (f a) + +instance Monoid OBuilder where + mempty = OBuilder (return mempty) + mappend (OBuilder a) (OBuilder b) = OBuilder (mappend <$> a <*> b) + +instance IsString OBuilder where + fromString = string + +class Output a where + output :: a -> OBuilder + +instance Output OBuilder where + output = id + +instance Output Bool where + output False = string "false" + output True = string "true" + +instance Output Float where + output = string . show + +instance Output a => Output [a] where + output = foldl mappend mempty . fmap output + +join :: Output a => String -> [a] -> OBuilder +join _ [] = mempty +join _ [a] = output a +join separator (a:as) = + output a `mappend` string separator `mappend` (join separator as) + +newLine :: OBuilder +newLine = OBuilder $ buildEOL <$> ask + where + buildEOL EOL.CR = char8 '\r' + buildEOL EOL.LF = char8 '\n' + buildEOL EOL.CRLF = string8 "\r\n" + +nextLine :: OBuilder -> OBuilder -> OBuilder +nextLine a b = a `mappend` newLine `mappend` b + +char :: Char -> OBuilder +char = lift char8 + +string :: String -> OBuilder +string = lift string8 + +byteString :: ByteString -> OBuilder +byteString = lift lazyByteString + +render :: Output a => EOL.Style -> a -> ByteString +render eolStyle a = + let OBuilder r = output a in + toLazyByteString $ runReader r eolStyle