Implement output
This commit is contained in:
parent
088637b2c0
commit
e23618da68
5 changed files with 60 additions and 32 deletions
50
src/PDF.hs
50
src/PDF.hs
|
@ -1,10 +1,11 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE NamedFieldPuns #-}
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
module PDF (
|
module PDF (
|
||||||
parseDocument
|
Document(..)
|
||||||
, Document(..)
|
|
||||||
, Content(..)
|
, Content(..)
|
||||||
, DirectObject(..)
|
, DirectObject(..)
|
||||||
|
, parseDocument
|
||||||
|
, render
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
|
@ -14,20 +15,33 @@ import qualified Data.ByteString.Lazy.Char8 as BS (
|
||||||
import Data.ByteString.Lazy.Char8.Util (previous, subBS)
|
import Data.ByteString.Lazy.Char8.Util (previous, subBS)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
import qualified Data.Map as Map (lookup)
|
import qualified Data.Map as Map (lookup)
|
||||||
import PDF.Object (Content(..), DirectObject(..), EOLStyle(..), content, eol, eolCharset)
|
|
||||||
import PDF.Body (populate)
|
import PDF.Body (populate)
|
||||||
|
import qualified PDF.EOL as EOL (Style(..), charset, parser)
|
||||||
|
import PDF.Object (Content(..), DirectObject(..), content, eofMarker, magicNumber)
|
||||||
|
import qualified PDF.Output as Output (render, string)
|
||||||
|
import PDF.Output (Output(..), nextLine)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
import Text.Parsec.ByteString.Lazy (Parser)
|
import Text.Parsec.ByteString.Lazy (Parser)
|
||||||
import Text.Parsec.Pos (newPos)
|
import Text.Parsec.Pos (newPos)
|
||||||
import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage)
|
import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage)
|
||||||
|
import Text.Printf (printf)
|
||||||
|
|
||||||
data Document = Document {
|
data Document = Document {
|
||||||
pdfVersion :: String
|
pdfVersion :: String
|
||||||
|
, eolStyle :: EOL.Style
|
||||||
, contents :: [Content]
|
, contents :: [Content]
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
|
instance Output Document where
|
||||||
|
output (Document {pdfVersion, contents}) =
|
||||||
|
Output.string (printf "%%PDF-%s" pdfVersion)
|
||||||
|
`nextLine` output contents
|
||||||
|
|
||||||
|
render :: Document -> ByteString
|
||||||
|
render document@(Document {eolStyle}) = Output.render eolStyle document
|
||||||
|
|
||||||
version :: Parser String
|
version :: Parser String
|
||||||
version = string magicNumber *> many (noneOf eolCharset)
|
version = string magicNumber *> many (noneOf EOL.charset)
|
||||||
|
|
||||||
parseError :: String -> Either ParseError a
|
parseError :: String -> Either ParseError a
|
||||||
parseError errorMessage =
|
parseError errorMessage =
|
||||||
|
@ -36,15 +50,15 @@ parseError errorMessage =
|
||||||
check :: Bool -> String -> Either ParseError ()
|
check :: Bool -> String -> Either ParseError ()
|
||||||
check test errorMessage = if test then return () else parseError errorMessage
|
check test errorMessage = if test then return () else parseError errorMessage
|
||||||
|
|
||||||
readStartXref :: EOLStyle -> ByteString -> Either ParseError Int64
|
readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64
|
||||||
readStartXref eolStyle input =
|
readStartXref eolStyle input =
|
||||||
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
|
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
|
||||||
"Badly formed document : missing EOF marker at the end"
|
"Badly formed document : missing EOF marker at the end"
|
||||||
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
|
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
|
||||||
where
|
where
|
||||||
(eolOffset, eolLastByte) = case eolStyle of
|
(eolOffset, eolLastByte) = case eolStyle of
|
||||||
CRLF -> (2, '\n')
|
EOL.CRLF -> (2, '\n')
|
||||||
CR -> (1, '\r')
|
EOL.CR -> (1, '\r')
|
||||||
_ -> (1, '\n')
|
_ -> (1, '\n')
|
||||||
eofMarkerPosition =
|
eofMarkerPosition =
|
||||||
BS.length input - BS.length eofMarker
|
BS.length input - BS.length eofMarker
|
||||||
|
@ -53,24 +67,24 @@ readStartXref eolStyle input =
|
||||||
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
|
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
|
||||||
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
|
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
|
||||||
|
|
||||||
nextLine :: ByteString -> Int64
|
findNextLine :: ByteString -> Int64
|
||||||
nextLine input =
|
findNextLine input =
|
||||||
let (line, eolPrefixed) = BS.span notInEol input in
|
let (line, eolPrefixed) = BS.span notInEol input in
|
||||||
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
|
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
|
||||||
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
|
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
|
||||||
where
|
where
|
||||||
notInEol = not . (`elem` eolCharset)
|
notInEol = not . (`elem` EOL.charset)
|
||||||
|
|
||||||
nextSection :: Int64 -> ByteString -> Int64
|
findNextSection :: Int64 -> ByteString -> Int64
|
||||||
nextSection offset input =
|
findNextSection offset input =
|
||||||
case BS.findIndex (== BS.head eofMarker) input of
|
case BS.findIndex (== BS.head eofMarker) input of
|
||||||
Nothing -> 0
|
Nothing -> 0
|
||||||
Just delta ->
|
Just delta ->
|
||||||
let newInput = BS.drop delta input in
|
let newInput = BS.drop delta input in
|
||||||
let newOffset = offset + delta in
|
let newOffset = offset + delta in
|
||||||
if BS.isPrefixOf eofMarker newInput
|
if BS.isPrefixOf eofMarker newInput
|
||||||
then newOffset + nextLine newInput
|
then newOffset + findNextLine newInput
|
||||||
else nextSection (newOffset + 1) (BS.drop 1 newInput)
|
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
|
||||||
|
|
||||||
iterateContents :: Int64 -> ByteString -> Either ParseError [Content]
|
iterateContents :: Int64 -> ByteString -> Either ParseError [Content]
|
||||||
iterateContents startXref input =
|
iterateContents startXref input =
|
||||||
|
@ -78,17 +92,17 @@ iterateContents startXref input =
|
||||||
where
|
where
|
||||||
stopOrFollow c@(Content {trailer}) =
|
stopOrFollow c@(Content {trailer}) =
|
||||||
case Map.lookup "Prev" trailer of
|
case Map.lookup "Prev" trailer of
|
||||||
Nothing -> Right [c {startOffset = nextLine input}]
|
Nothing -> Right [c {startOffset = findNextLine input}]
|
||||||
Just (Number newStartXref) ->
|
Just (Number newStartXref) ->
|
||||||
let offset = truncate newStartXref in
|
let offset = truncate newStartXref in
|
||||||
let startOffset = nextSection offset (BS.drop offset input) in
|
let startOffset = findNextSection offset (BS.drop offset input) in
|
||||||
(c {startOffset}:) <$> (iterateContents offset input)
|
(c {startOffset}:) <$> (iterateContents offset input)
|
||||||
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
|
||||||
|
|
||||||
parseDocument :: ByteString -> Either ParseError Document
|
parseDocument :: ByteString -> Either ParseError Document
|
||||||
parseDocument input = do
|
parseDocument input = do
|
||||||
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> eol) "" input
|
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
|
||||||
startXref <- readStartXref eolStyle input
|
startXref <- readStartXref eolStyle input
|
||||||
structures <- iterateContents startXref input
|
structures <- iterateContents startXref input
|
||||||
let contents = populate input <$> structures
|
let contents = populate input <$> structures
|
||||||
return $ Document {pdfVersion, contents}
|
return $ Document {pdfVersion, eolStyle, contents}
|
||||||
|
|
|
@ -8,10 +8,11 @@ import qualified Data.ByteString.Lazy.Char8 as BS (drop, pack)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
import Data.Map ((!))
|
import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (insert, lookup)
|
import qualified Data.Map as Map (insert, lookup)
|
||||||
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
Content(..), DirectObject(..), IndirectObjCoordinates(..), Object(..)
|
||||||
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
, Occurrence(..), Parser, XRefEntry(..), XRefSection, XRefSubSection(..)
|
||||||
, blank, eol, eolCharset, dictionary, directObject, integer, line
|
, blank, dictionary, directObject, integer, line
|
||||||
)
|
)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
|
|
||||||
|
@ -36,7 +37,7 @@ pushOccurrence newOccurrence = modifyContent $ \content -> content {
|
||||||
}
|
}
|
||||||
|
|
||||||
comment :: Parser u String
|
comment :: Parser u String
|
||||||
comment = char '%' *> many (noneOf eolCharset) <* eol
|
comment = char '%' *> many (noneOf EOL.charset) <* EOL.parser
|
||||||
|
|
||||||
lookupOffset :: Int -> XRefSection -> Maybe Int64
|
lookupOffset :: Int -> XRefSection -> Maybe Int64
|
||||||
lookupOffset _ [] = Nothing
|
lookupOffset _ [] = Nothing
|
||||||
|
|
|
@ -7,7 +7,7 @@ module PDF.EOL (
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import Text.Parsec ((<|>), Parsec, string, try)
|
import Text.Parsec ((<|>), Parsec, string, try)
|
||||||
|
|
||||||
data Style = CR | LF | CRLF
|
data Style = CR | LF | CRLF deriving Show
|
||||||
|
|
||||||
charset :: String
|
charset :: String
|
||||||
charset = "\r\n"
|
charset = "\r\n"
|
||||||
|
|
|
@ -23,11 +23,11 @@ module PDF.Object (
|
||||||
|
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import Data.Int (Int64)
|
import Data.Int (Int64)
|
||||||
import Data.Map (Map)
|
import Data.Map (Map, (!))
|
||||||
import qualified Data.Map as Map (empty, elems, fromList, toList)
|
import qualified Data.Map as Map (empty, elems, fromList, toList)
|
||||||
import qualified PDF.EOL as EOL (charset, parser)
|
import qualified PDF.EOL as EOL (charset, parser)
|
||||||
import qualified PDF.Output as Output (string)
|
import qualified PDF.Output as Output (string)
|
||||||
import PDF.Output (Output(..), byteString, join, newLine, nextLine)
|
import PDF.Output (OBuilder, Output(..), byteString, join, newLine, nextLine)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
|
@ -60,12 +60,18 @@ data DirectObject =
|
||||||
| Reference IndirectObjCoordinates
|
| Reference IndirectObjCoordinates
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
|
outputFloat :: Float -> OBuilder
|
||||||
|
outputFloat f = Output.string $
|
||||||
|
case properFraction f of
|
||||||
|
(n, 0) -> printf "%d" (n :: Int)
|
||||||
|
_ -> printf "%f" f
|
||||||
|
|
||||||
instance Output DirectObject where
|
instance Output DirectObject where
|
||||||
output (Boolean b) = output b
|
output (Boolean b) = output b
|
||||||
output (Number n) = output n
|
output (Number n) = outputFloat n
|
||||||
output (String s) = output s
|
output (String s) = output s
|
||||||
output (Name n) = "/" `mappend` Output.string n
|
output (Name n) = "/" `mappend` Output.string n
|
||||||
output (Array a) = join " " a
|
output (Array a) = "[" `mappend` join " " a `mappend` "]"
|
||||||
output (Dictionary d) = output d
|
output (Dictionary d) = output d
|
||||||
output (Null) = "null"
|
output (Null) = "null"
|
||||||
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
|
@ -85,7 +91,7 @@ instance Output Object where
|
||||||
output header
|
output header
|
||||||
`nextLine` "stream"
|
`nextLine` "stream"
|
||||||
`nextLine` byteString streamContent
|
`nextLine` byteString streamContent
|
||||||
`mappend` "endstream" `mappend` newLine
|
`mappend` "endstream"
|
||||||
|
|
||||||
data IndirectObjCoordinates = IndirectObjCoordinates {
|
data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||||
objectId :: Int
|
objectId :: Int
|
||||||
|
@ -94,6 +100,14 @@ data IndirectObjCoordinates = IndirectObjCoordinates {
|
||||||
|
|
||||||
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
|
||||||
|
|
||||||
|
outputOccurrence :: Map Int Object -> Occurrence -> OBuilder
|
||||||
|
outputOccurrence _ (Comment c) =
|
||||||
|
Output.string (printf "%%%s" c) `mappend` newLine
|
||||||
|
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
|
||||||
|
Output.string (printf "%d %d obj" objectId versionNumber)
|
||||||
|
`nextLine` output (objects ! objectId)
|
||||||
|
`nextLine` "endobj" `mappend` newLine
|
||||||
|
|
||||||
data XRefEntry = InUse {
|
data XRefEntry = InUse {
|
||||||
offset :: Int64
|
offset :: Int64
|
||||||
, generation :: Int
|
, generation :: Int
|
||||||
|
@ -117,7 +131,7 @@ data XRefSubSection = XRefSubSection {
|
||||||
instance Output XRefSubSection where
|
instance Output XRefSubSection where
|
||||||
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
output (XRefSubSection {firstObjectId, entriesNumber, entries}) =
|
||||||
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
Output.string (printf "%d %d" firstObjectId entriesNumber)
|
||||||
`nextLine` output (Map.elems entries) `mappend` newLine
|
`nextLine` output (Map.elems entries)
|
||||||
|
|
||||||
type XRefSection = [XRefSubSection]
|
type XRefSection = [XRefSubSection]
|
||||||
|
|
||||||
|
@ -131,9 +145,11 @@ data Content = Content {
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
instance Output Content where
|
instance Output Content where
|
||||||
output (Content {xrefSection, startXrefPosition}) =
|
output (Content {body, objects, trailer, xrefSection, startXrefPosition}) =
|
||||||
output xrefSection
|
output (outputOccurrence objects <$> body)
|
||||||
|
`mappend` output xrefSection
|
||||||
`mappend` "trailer"
|
`mappend` "trailer"
|
||||||
|
`nextLine` output trailer
|
||||||
`nextLine` "startxref"
|
`nextLine` "startxref"
|
||||||
`nextLine` Output.string (printf "%d" startXrefPosition)
|
`nextLine` Output.string (printf "%d" startXrefPosition)
|
||||||
`nextLine` byteString eofMarker
|
`nextLine` byteString eofMarker
|
||||||
|
|
|
@ -53,9 +53,6 @@ instance Output Bool where
|
||||||
output False = string "false"
|
output False = string "false"
|
||||||
output True = string "true"
|
output True = string "true"
|
||||||
|
|
||||||
instance Output Float where
|
|
||||||
output = string . show
|
|
||||||
|
|
||||||
instance Output a => Output [a] where
|
instance Output a => Output [a] where
|
||||||
output = foldl mappend mempty . fmap output
|
output = foldl mappend mempty . fmap output
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue