{-# LANGUAGE NamedFieldPuns #-} module PDF ( Document(..) , DirectObject(..) , parseDocument , render ) where import Data.ByteString.Lazy.Char8 (ByteString) import qualified Data.ByteString.Lazy.Char8 as BS ( drop, findIndex, head, isPrefixOf, last, length, span, unpack ) import Data.ByteString.Lazy.Char8.Util (previous, subBS) import Data.Int (Int64) import qualified Data.Map as Map (lookup) import PDF.Body (populate) import qualified PDF.EOL as EOL (Style(..), charset, parser) import PDF.Object ( Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..) , Structure(..) , eofMarker, magicNumber, structure ) import qualified PDF.Output as Output (render, string) import PDF.Output (Output(..), nextLine) import Text.Parsec import Text.Parsec.ByteString.Lazy (Parser) import Text.Parsec.Pos (newPos) import Text.ParserCombinators.Parsec.Error (Message(..), newErrorMessage) import Text.Printf (printf) data Document = Document { pdfVersion :: String , eolStyle :: EOL.Style , contents :: [Content] } deriving Show instance Output Document where output (Document {pdfVersion, contents}) = Output.string (printf "%%PDF-%s" pdfVersion) `nextLine` output contents render :: Document -> ByteString render document@(Document {eolStyle}) = Output.render eolStyle document version :: Parser String version = string magicNumber *> many (noneOf EOL.charset) parseError :: String -> Either ParseError a parseError errorMessage = Left $ newErrorMessage (Message errorMessage) (newPos "" 0 0) check :: Bool -> String -> Either ParseError () check test errorMessage = if test then return () else parseError errorMessage readStartXref :: EOL.Style -> ByteString -> Either ParseError Int64 readStartXref eolStyle input = check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input)) "Badly formed document : missing EOF marker at the end" >> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input) where (eolOffset, eolLastByte) = case eolStyle of EOL.CRLF -> (2, '\n') EOL.CR -> (1, '\r') _ -> (1, '\n') eofMarkerPosition = BS.length input - BS.length eofMarker - if BS.last input == BS.last eofMarker then 0 else eolOffset startXrefPosition = previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1 startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition findNextLine :: ByteString -> Int64 findNextLine input = let (line, eolPrefixed) = BS.span notInEol input in let nextNotInEol = BS.findIndex notInEol eolPrefixed in BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol) where notInEol = not . (`elem` EOL.charset) findNextSection :: Int64 -> ByteString -> Int64 findNextSection offset input = case BS.findIndex (== BS.head eofMarker) input of Nothing -> 0 Just delta -> let newInput = BS.drop delta input in let newOffset = offset + delta in if BS.isPrefixOf eofMarker newInput then newOffset + findNextLine newInput else findNextSection (newOffset + 1) (BS.drop 1 newInput) readStructures :: Int64 -> ByteString -> Either ParseError [InputStructure] readStructures startXref input = parse structure ('@':show startXref) (BS.drop startXref input) >>= stopOrFollow where stopOrFollow s@(Structure {trailer}) = case Map.lookup (Name "Prev") trailer of Nothing -> Right [InputStructure (findNextLine input) s] Just (NumberObject (Number newStartXref)) -> let offset = truncate newStartXref in let startOffset = findNextSection offset (BS.drop offset input) in (InputStructure startOffset s:) <$> (readStructures offset input) Just v -> parseError $ "Bad value for Prev entry in trailer: " ++ show v parseDocument :: ByteString -> Either ParseError Document parseDocument input = do (pdfVersion, eolStyle) <- parse ((,) <$> version <*> EOL.parser) "" input startXref <- readStartXref eolStyle input structuresRead <- readStructures startXref input let contents = populate input <$> structuresRead return $ Document {pdfVersion, eolStyle, contents}