Implement output

This commit is contained in:
Tissevert 2019-05-16 22:41:14 +02:00
parent 088637b2c0
commit e23618da68
5 changed files with 60 additions and 32 deletions

View file

@ -1,10 +1,11 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
module PDF (
parseDocument
, Document(..)
Document(..)
, Content(..)
, DirectObject(..)
, parseDocument
, render
) where
import Data.ByteString.Lazy.Char8 (ByteString)
@ -14,20 +15,33 @@ import qualified Data.ByteString.Lazy.Char8 as BS (
import Data.ByteString.Lazy.Char8.Util (previous, subBS)
import Data.Int (Int64)
import qualified Data.Map as Map (lookup)
import PDF.Object (Content(..), DirectObject(..), EOLStyle(..), content, eol, eolCharset)
import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber)
import qualified PDF.Output as Output (render, string)
import PDF.Output (Output(..), nextLine)
import Text.Parsec
import Text.Parsec.ByteString.Lazy (Parser)
import Text.Parsec.Pos (newPos)
import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage)
import Text.Printf (printf)
data Document = Document {
pdfVersion :: String
, eolStyle :: EOL.Style
, contents :: [Content]
} deriving Show
instance Output Document where
output (Document {pdfVersion, contents}) =
Output.string (printf "%%PDF-%s" pdfVersion)
`nextLine` output contents
render :: Document -> ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document
version :: Parser String
version = string magicNumber *> many (noneOf eolCharset)
version = string magicNumber *> many (noneOf EOL.charset)
parseError :: String -> Either ParseError a
parseError errorMessage =
@ -36,15 +50,15 @@ parseError errorMessage =
check :: Bool -> String -> Either ParseError ()
check test errorMessage = if test then return () else parseError errorMessage
readStartXref :: EOLStyle -> ByteString -> Either ParseError Int64
readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64
readStartXref eolStyle input =
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
"Badly formed document : missing EOF marker at the end"
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
where
(eolOffset, eolLastByte) = case eolStyle of
CRLF -> (2, '\n')
CR -> (1, '\r')
EOL.CRLF -> (2, '\n')
EOL.CR -> (1, '\r')
_ -> (1, '\n')
eofMarkerPosition =
BS.length input - BS.length eofMarker
@ -53,24 +67,24 @@ readStartXref eolStyle input =
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
nextLine :: ByteString -> Int64
nextLine input =
findNextLine :: ByteString -> Int64
findNextLine input =
let (line, eolPrefixed) = BS.span notInEol input in
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
where
notInEol = not . (`elem` eolCharset)
notInEol = not . (`elem` EOL.charset)
nextSection :: Int64 -> ByteString -> Int64
nextSection offset input =
findNextSection :: Int64 -> ByteString -> Int64
findNextSection offset input =
case BS.findIndex (== BS.head eofMarker) input of
Nothing -> 0
Just delta ->
let newInput = BS.drop delta input in
let newOffset = offset + delta in
if BS.isPrefixOf eofMarker newInput
then newOffset + nextLine newInput
else nextSection (newOffset + 1) (BS.drop 1 newInput)
then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
iterateContents :: Int64 -> ByteString -> Either ParseError [Content]
iterateContents startXref input =
@ -78,17 +92,17 @@ iterateContents startXref input =
where
stopOrFollow c@(Content {trailer}) =
case Map.lookup "Prev" trailer of
Nothing -> Right [c {startOffset = nextLine input}]
Nothing -> Right [c {startOffset = findNextLine input}]
Just (Number newStartXref) ->
let offset = truncate newStartXref in
let startOffset = nextSection offset (BS.drop offset input) in
let startOffset = findNextSection offset (BS.drop offset input) in
(c {startOffset}:) <$> (iterateContents offset input)
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either ParseError Document
parseDocument input = do
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> eol) "" input
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
startXref <- readStartXref eolStyle input
structures <- iterateContents startXref input
let contents = populate input <$> structures
return $ Document {pdfVersion, contents}
return $ Document {pdfVersion, eolStyle, contents}

View file

@ -8,10 +8,11 @@ import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
import Data.Int (Int64)
import Data.Map ((!))
import qualified Data.Map as Map (insert, lookup)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
, blank, eol, eolCharset, dictionary, directObject, integer, line
, blank, dictionary, directObject, integer, line
)
import Text.Parsec
@ -36,7 +37,7 @@ pushOccurrence newOccurrence = modifyContent $ \content -> content {
}
comment :: Parser u String
comment = char '%' *> many (noneOf eolCharset) <* eol
comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser
lookupOffset :: Int -> XRefSection -> Maybe Int64
lookupOffset _ [] = Nothing

View file

@ -7,7 +7,7 @@ module PDF.EOL (
import Data.ByteString.Lazy.Char8 (ByteString)
import Text.Parsec ((<|>), Parsec, string, try)
data Style = CR | LF | CRLF
data Style = CR | LF | CRLF deriving Show
charset :: String
charset = "\r\n"

View file

@ -23,11 +23,11 @@ module PDF.Object (
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Int (Int64)
import Data.Map (Map)
import Data.Map (Map, (!))
import qualified Data.Map as Map (empty, elems, fromList, toList)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (string)
import PDF.Output (Output(..), byteString, join, newLine, nextLine)
import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine)
import Text.Parsec
import Text.Printf (printf)
@ -60,12 +60,18 @@ data DirectObject =
| Reference IndirectObjCoordinates
deriving Show
outputFloat :: Float -> OBuilder
outputFloat f = Output.string $
case properFraction f of
(n, 0) -> printf "%d" (n :: Int)
_ -> printf "%f" f
instance Output DirectObject where
output (Boolean b) = output b
output (Number n) = output n
output (Number n) = outputFloat n
output (String s) = output s
output (Name n) = "/" `mappend` Output.string n
output (Array a) = join " " a
output (Array a) = "[" `mappend` join " " a `mappend` "]"
output (Dictionary d) = output d
output (Null) = "null"
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
@ -85,7 +91,7 @@ instance Output Object where
output header
`nextLine` "stream"
`nextLine` byteString streamContent
`mappend` "endstream" `mappend` newLine
`mappend` "endstream"
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: Int
@ -94,6 +100,14 @@ data IndirectObjCoordinates = IndirectObjCoordinates {
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
outputOccurrence _ (Comment c) =
Output.string (printf "%%%s" c) `mappend` newLine
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
Output.string (printf "%d %d obj" objectId versionNumber)
`nextLine` output (objects ! objectId)
`nextLine` "endobj" `mappend` newLine
data XRefEntry = InUse {
offset :: Int64
, generation :: Int
@ -117,7 +131,7 @@ data XRefSubSection = XRefSubSection {
instance Output XRefSubSection where
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
Output.string (printf "%d %d" firstObjectId entriesNumber)
`nextLine` output (Map.elems entries) `mappend` newLine
`nextLine` output (Map.elems entries)
type XRefSection = [XRefSubSection]
@ -131,9 +145,11 @@ data Content = Content {
} deriving Show
instance Output Content where
output (Content {xrefSection, startXrefPosition}) =
output xrefSection
output (Content {body, objects, trailer, xrefSection, startXrefPosition}) =
output (outputOccurrence objects <$> body)
`mappend` output xrefSection
`mappend` "trailer"
`nextLine` output trailer
`nextLine` "startxref"
`nextLine` Output.string (printf "%d" startXrefPosition)
`nextLine` byteString eofMarker

View file

@ -53,9 +53,6 @@ instance Output Bool where
output False = string "false"
output True = string "true"
instance Output Float where
output = string . show
instance Output a => Output [a] where
output = foldl mappend mempty . fmap output