{-# LANGUAGE NamedFieldPuns #-} module PDF ( Document(..) , parseDocument , render ) where import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS ( drop, findIndex, head, isPrefixOf, last, length, span, unpack ) import Data.ByteString.Char8.Util (previous, subBS) import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.Map as Map (lookup) import PDF.Body (populate) import qualified PDF.EOL as EOL (Style(..), charset, parser) import PDF.Object ( Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..) , Structure(..) , eofMarker, magicNumber, structure ) import qualified PDF.Output as Output (render, line) import PDF.Output (Output(..)) import PDF.Parser (Parser, evalParser, string, takeAll) import Text.Printf (printf) data Document = Document { pdfVersion :: String , eolStyle :: EOL.Style , updates :: [Content] } deriving Show instance Output Document where output (Document {pdfVersion, updates}) = Output.line (printf "%%PDF-%s" pdfVersion) `mappend` output updates render :: Document -> Lazy.ByteString render document@(Document {eolStyle}) = Output.render eolStyle document version :: Parser () String version = BS.unpack <$> (string magicNumber *> takeAll (not . (`elem` EOL.charset))) check :: Bool -> String -> Either String () check test errorMessage = if test then return () else Left errorMessage readStartXref :: EOL.Style -> ByteString -> Either String Int readStartXref eolStyle input = check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input)) "Badly formed document : missing EOF marker at the end" >> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input) where (eolOffset, eolLastByte) = case eolStyle of EOL.CRLF -> (2, '\n') EOL.CR -> (1, '\r') _ -> (1, '\n') eofMarkerPosition = BS.length input - BS.length eofMarker - if BS.last input == BS.last eofMarker then 0 else eolOffset startXrefPosition = previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1 startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition findNextLine :: ByteString -> Int findNextLine input = let (line, eolPrefixed) = BS.span notInEol input in let nextNotInEol = BS.findIndex notInEol eolPrefixed in BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol) where notInEol = not . (`elem` EOL.charset) findNextSection :: Int -> ByteString -> Int findNextSection offset input = case BS.findIndex (== BS.head eofMarker) input of Nothing -> 0 Just delta -> let newInput = BS.drop delta input in let newOffset = offset + delta in if BS.isPrefixOf eofMarker newInput then newOffset + findNextLine newInput else findNextSection (newOffset + 1) (BS.drop 1 newInput) readStructures :: Int -> ByteString -> Either String [InputStructure] readStructures startXref input = evalParser structure () (BS.drop startXref input) >>= stopOrFollow where stopOrFollow s@(Structure {trailer}) = case Map.lookup (Name "Prev") trailer of Nothing -> Right [InputStructure (findNextLine input) s] Just (NumberObject (Number newStartXref)) -> let offset = truncate newStartXref in let startOffset = findNextSection offset (BS.drop offset input) in (InputStructure startOffset s:) <$> (readStructures offset input) Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v parseDocument :: ByteString -> Either String Document parseDocument input = do (pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input startXref <- readStartXref eolStyle input structuresRead <- readStructures startXref input let updates = populate input <$> structuresRead return $ Document {pdfVersion, eolStyle, updates}