Starting to implement output with String builder

This commit is contained in:
Tissevert 2019-05-16 17:04:45 +02:00
parent 9b2f890227
commit 645466024a
4 changed files with 164 additions and 25 deletions

View file

@ -26,15 +26,9 @@ data Document = Document {
, contents :: [Content] , contents :: [Content]
} deriving Show } deriving Show
magicNumber :: String
magicNumber = "%PDF-"
version :: Parser String version :: Parser String
version = string magicNumber *> many (noneOf eolCharset) version = string magicNumber *> many (noneOf eolCharset)
eofMarker :: ByteString
eofMarker = "%%EOF"
parseError :: String -> Either ParseError a parseError :: String -> Either ParseError a
parseError errorMessage = parseError errorMessage =
Left $ newErrorMessage (Message errorMessage) (newPos "" 0 0) Left $ newErrorMessage (Message errorMessage) (newPos "" 0 0)

19
src/PDF/EOL.hs Normal file
View file

@ -0,0 +1,19 @@
module PDF.EOL (
Style(..)
, charset
, parser
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import Text.Parsec ((<|>), Parsec, string, try)
data Style = CR | LF | CRLF
charset :: String
charset = "\r\n"
parser :: Parsec ByteString u Style
parser =
try (string "\r\n" >> return CRLF)
<|> (string "\r" >> return CR)
<|> (string "\n" >> return LF)

View file

@ -1,8 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
module PDF.Object ( module PDF.Object (
Content(..) Content(..)
, DirectObject(..) , DirectObject(..)
, EOLStyle(..)
, IndirectObjCoordinates(..) , IndirectObjCoordinates(..)
, Object(..) , Object(..)
, Occurrence(..) , Occurrence(..)
@ -14,26 +15,40 @@ module PDF.Object (
, content , content
, dictionary , dictionary
, directObject , directObject
, eol , eofMarker
, eolCharset
, integer , integer
, line , line
, magicNumber
) where ) where
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64) import Data.Int (Int64)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList) import qualified Data.Map as Map (empty, elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
import PDF.Output (Output(..), byteString, join, newLine, nextLine)
import Text.Parsec import Text.Parsec
import Text.Printf (printf)
type Parser u = Parsec ByteString u type Parser u = Parsec ByteString u
data EOLStyle = CR | LF | CRLF
type Dictionary = Map String DirectObject type Dictionary = Map String DirectObject
instance Output (Map String DirectObject) where
output dict =
"<<" `mappend` keyValues `mappend` ">>"
where
keyValues = join " " $ outputKeyVal <$> Map.toList dict
outputKeyVal (key, val) =
Output.string (printf "/%s " key) `mappend` output val
data StringObj = Literal String | Hexadecimal String deriving Show data StringObj = Literal String | Hexadecimal String deriving Show
instance Output StringObj where
output (Literal s) = Output.string (printf "(%s)" s)
output (Hexadecimal s) = Output.string (printf "<%s>" s)
data DirectObject = data DirectObject =
Boolean Bool Boolean Bool
| Number Float | Number Float
@ -45,6 +60,17 @@ data DirectObject =
| Reference IndirectObjCoordinates | Reference IndirectObjCoordinates
deriving Show deriving Show
instance Output DirectObject where
output (Boolean b) = output b
output (Number n) = output n
output (String s) = output s
output (Name n) = "/" `mappend` Output.string n
output (Array a) = join " " a
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d R" objectId versionNumber)
data Object = data Object =
Direct DirectObject Direct DirectObject
| Stream { | Stream {
@ -53,6 +79,14 @@ data Object =
} }
deriving Show deriving Show
instance Output Object where
output (Direct d) = output d
output (Stream {header, streamContent}) =
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream" `mappend` newLine
data IndirectObjCoordinates = IndirectObjCoordinates { data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int objectId :: Int
, versionNumber :: Int , versionNumber :: Int
@ -68,12 +102,23 @@ data XRefEntry = InUse {
, generation :: Int , generation :: Int
} deriving Show } deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.string (printf "%010d %05d n " offset generation) `mappend` newLine
output (Free {nextFree, generation}) =
Output.string (printf "%010d %05d f " nextFree generation) `mappend` newLine
data XRefSubSection = XRefSubSection { data XRefSubSection = XRefSubSection {
firstObjectId :: Int firstObjectId :: Int
, entriesNumber :: Int , entriesNumber :: Int
, entries :: Map Int XRefEntry , entries :: Map Int XRefEntry
} deriving Show } deriving Show
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries) `mappend` newLine
type XRefSection = [XRefSubSection] type XRefSection = [XRefSubSection]
data Content = Content { data Content = Content {
@ -85,23 +130,28 @@ data Content = Content {
, startXrefPosition :: Int64 , startXrefPosition :: Int64
} deriving Show } deriving Show
eolCharset :: String instance Output Content where
eolCharset = "\r\n" output (Content {xrefSection, startXrefPosition}) =
output xrefSection
eol :: Parser u EOLStyle `mappend` "trailer"
eol = `nextLine` "startxref"
try (string "\r\n" >> return CRLF) `nextLine` Output.string (printf "%d" startXrefPosition)
<|> (string "\r" >> return CR) `nextLine` byteString eofMarker
<|> (string "\n" >> return LF)
line :: String -> Parser u () line :: String -> Parser u ()
line l = string l *> eol *> return () line l = string l *> EOL.parser *> return ()
magicNumber :: String
magicNumber = "%PDF-"
eofMarker :: ByteString
eofMarker = "%%EOF"
whiteSpaceCharset :: String whiteSpaceCharset :: String
whiteSpaceCharset = "\0\t\12 " whiteSpaceCharset = "\0\t\12 "
whiteSpace :: Parser u () whiteSpace :: Parser u ()
whiteSpace = oneOf whiteSpaceCharset *> return () <|> eol *> return () whiteSpace = oneOf whiteSpaceCharset *> return () <|> EOL.parser *> return ()
blank :: Parser u () blank :: Parser u ()
blank = skipMany whiteSpace blank = skipMany whiteSpace
@ -110,7 +160,7 @@ delimiterCharset :: String
delimiterCharset = "()<>[]{}/%" delimiterCharset = "()<>[]{}/%"
regular :: Parser u Char regular :: Parser u Char
regular = noneOf $ eolCharset ++ whiteSpaceCharset ++ delimiterCharset regular = noneOf $ EOL.charset ++ whiteSpaceCharset ++ delimiterCharset
integer :: (Read a, Num a) => Parser u a integer :: (Read a, Num a) => Parser u a
integer = read <$> many1 digit <* whiteSpace integer = read <$> many1 digit <* whiteSpace
@ -185,6 +235,6 @@ xrefSubSection = do
content :: Parser u Content content :: Parser u Content
content = content =
Content 0 [] Map.empty Content 0 [] Map.empty
<$> (line "xref" *> xrefSubSection `sepBy` eol) <$> (line "xref" *> xrefSubSection `sepBy` EOL.parser)
<*> (line "trailer" *> dictionary <* eol) <*> (line "trailer" *> dictionary <* EOL.parser)
<*> (line "startxref" *> integer) <*> (line "startxref" *> integer)

76
src/PDF/Output.hs Normal file
View file

@ -0,0 +1,76 @@
module PDF.Output (
OBuilder(..)
, Output(..)
, byteString
, char
, join
, lift
, newLine
, nextLine
, string
, render
) where
import Data.ByteString.Builder (Builder, char8, lazyByteString, string8, toLazyByteString)
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.String (IsString(..))
import Control.Monad.Reader (MonadReader(..), Reader, runReader)
import qualified PDF.EOL as EOL (Style(..))
newtype OBuilder = OBuilder (Reader EOL.Style Builder)
lift :: (a -> Builder) -> a -> OBuilder
lift f a = OBuilder $ return (f a)
instance Monoid OBuilder where
mempty = OBuilder (return mempty)
mappend (OBuilder a) (OBuilder b) = OBuilder (mappend <$> a <*> b)
instance IsString OBuilder where
fromString = string
class Output a where
output :: a -> OBuilder
instance Output OBuilder where
output = id
instance Output Bool where
output False = string "false"
output True = string "true"
instance Output Float where
output = string . show
instance Output a => Output [a] where
output = foldl mappend mempty . fmap output
join :: Output a => String -> [a] -> OBuilder
join _ [] = mempty
join _ [a] = output a
join separator (a:as) =
output a `mappend` string separator `mappend` (join separator as)
newLine :: OBuilder
newLine = OBuilder $ buildEOL <$> ask
where
buildEOL EOL.CR = char8 '\r'
buildEOL EOL.LF = char8 '\n'
buildEOL EOL.CRLF = string8 "\r\n"
nextLine :: OBuilder -> OBuilder -> OBuilder
nextLine a b = a `mappend` newLine `mappend` b
char :: Char -> OBuilder
char = lift char8
string :: String -> OBuilder
string = lift string8
byteString :: ByteString -> OBuilder
byteString = lift lazyByteString
render :: Output a => EOL.Style -> a -> ByteString
render eolStyle a =
let OBuilder r = output a in
toLazyByteString $ runReader r eolStyle