Hufflepdf/src/PDF.hs

110 lines
4.1 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
module PDF (
Document(..)
, parseDocument
, render
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (
drop, findIndex, head, isPrefixOf, last, length, span, unpack
)
import Data.ByteString.Lazy.Char8.Util (previous, subBS)
import Data.Int (Int64)
import qualified Data.Map as Map (lookup)
import PDF.Body (populate)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import PDF.Object (
Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
, Structure(..)
, eofMarker, magicNumber, structure
)
import qualified PDF.Output as Output (render, string)
import PDF.Output (Output(..), nextLine)
import Text.Parsec
import Text.Parsec.ByteString.Lazy (Parser)
import Text.Parsec.Pos (newPos)
import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage)
import Text.Printf (printf)
data Document = Document {
pdfVersion :: String
, eolStyle :: EOL.Style
, contents :: [Content]
} deriving Show
instance Output Document where
output (Document {pdfVersion, contents}) =
Output.string (printf "%%PDF-%s" pdfVersion)
`nextLine` output contents
render :: Document -> ByteString
render document@(Document {eolStyle}) = Output.render eolStyle document
version :: Parser String
version = string magicNumber *> many (noneOf EOL.charset)
parseError :: String -> Either ParseError a
parseError errorMessage =
Left $ newErrorMessage (Message errorMessage) (newPos "" 0 0)
check :: Bool -> String -> Either ParseError ()
check test errorMessage = if test then return () else parseError errorMessage
readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64
readStartXref eolStyle input =
check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input))
"Badly formed document : missing EOF marker at the end"
>> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input)
where
(eolOffset, eolLastByte) = case eolStyle of
EOL.CRLF -> (2, '\n')
EOL.CR -> (1, '\r')
_ -> (1, '\n')
eofMarkerPosition =
BS.length input - BS.length eofMarker
- if BS.last input == BS.last eofMarker then 0 else eolOffset
startXrefPosition =
previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1
startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition
findNextLine :: ByteString -> Int64
findNextLine input =
let (line, eolPrefixed) = BS.span notInEol input in
let nextNotInEol = BS.findIndex notInEol eolPrefixed in
BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol)
where
notInEol = not . (`elem` EOL.charset)
findNextSection :: Int64 -> ByteString -> Int64
findNextSection offset input =
case BS.findIndex (== BS.head eofMarker) input of
Nothing -> 0
Just delta ->
let newInput = BS.drop delta input in
let newOffset = offset + delta in
if BS.isPrefixOf eofMarker newInput
then newOffset + findNextLine newInput
else findNextSection (newOffset + 1) (BS.drop 1 newInput)
readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure]
readStructures startXref input =
parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow
where
stopOrFollow s@(Structure {trailer}) =
case Map.lookup (Name "Prev") trailer of
Nothing -> Right [InputStructure (findNextLine input) s]
Just (NumberObject (Number newStartXref)) ->
let offset = truncate newStartXref in
let startOffset = findNextSection offset (BS.drop offset input) in
(InputStructure startOffset s:) <$> (readStructures offset input)
Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v
parseDocument :: ByteString -> Either ParseError Document
parseDocument input = do
(pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input
startXref <- readStartXref eolStyle input
structuresRead <- readStructures startXref input
let contents = populate input <$> structuresRead
return $ Document {pdfVersion, eolStyle, contents}