{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF ( Document(..) , parseDocument , render ) where import Control.Monad.Fail (MonadFail(..)) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS ( drop, findIndex, head, isPrefixOf, last, length, span, unpack ) import Data.ByteString.Char8.Util (previous, subBS) import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.Map as Map (lookup) import PDF.Body (populate) import PDF.Box (Box(..), edit) import qualified PDF.EOL as EOL (Style(..), charset, parser) import PDF.Layer (Layer, unify) import PDF.Object ( DirectObject(..), InputStructure(..), Name(..), Number(..) , Structure(..) , eofMarker, magicNumber, structure ) import qualified PDF.Output as Output (render, line) import PDF.Output (Output(..)) import PDF.Parser (Parser, evalParser, string, takeAll) import Prelude hiding (fail) import Text.Printf (printf) data Document = Document { pdfVersion :: String , eolStyle :: EOL.Style , layers :: [Layer] } deriving Show instance Output Document where output (Document {pdfVersion, layers}) = Output.line (printf "%%PDF-%s" pdfVersion) `mappend` output layers data EOLStyleK = EOLStyleK data AllLayers = AllLayers data UnifiedLayers = UnifiedLayers instance Monad m => Box m EOLStyleK Document EOL.Style where r EOLStyleK = return . eolStyle w EOLStyleK document eolStyle = return $ document {eolStyle} instance Monad m => Box m UnifiedLayers Document Layer where r UnifiedLayers = return . unify . layers w UnifiedLayers document layer = w AllLayers document [layer] instance Monad m => Box m AllLayers Document [Layer] where r AllLayers = return . layers w AllLayers document layers = return $ document {layers = layers} instance MonadFail m => Box m Int Document Layer where r i = at i . layers where at _ [] = fail $ "Layer out of bounds " ++ show i at 0 (x:_) = return x at k (_:xs) = at (k+1) xs w i document layer = edit AllLayers (setAt i) document where setAt _ [] = fail $ "Layer out of bounds " ++ show i setAt 0 (_:xs) = return (layer:xs) setAt k (x:xs) = (x:)<$>(setAt (k-1) xs) render :: Document -> Lazy.ByteString render document@(Document {eolStyle}) = Output.render eolStyle document version :: Parser () String version = BS.unpack <$> (string magicNumber *> takeAll (not . (`elem` EOL.charset))) check :: Bool -> String -> Either String () check test errorMessage = if test then return () else Left errorMessage readStartXref :: EOL.Style -> ByteString -> Either String Int readStartXref eolStyle input = check (eofMarker `BS.isPrefixOf` (BS.drop eofMarkerPosition input)) "Badly formed document : missing EOF marker at the end" >> return (read . BS.unpack $ subBS startXrefPosition startXrefLength input) where (eolOffset, eolLastByte) = case eolStyle of EOL.CRLF -> (2, '\n') EOL.CR -> (1, '\r') _ -> (1, '\n') eofMarkerPosition = BS.length input - BS.length eofMarker - if BS.last input == BS.last eofMarker then 0 else eolOffset startXrefPosition = previous eolLastByte (eofMarkerPosition - eolOffset - 1) input + 1 startXrefLength = eofMarkerPosition - eolOffset - startXrefPosition findNextLine :: ByteString -> Int findNextLine input = let (line, eolPrefixed) = BS.span notInEol input in let nextNotInEol = BS.findIndex notInEol eolPrefixed in BS.length line + (maybe (BS.length eolPrefixed) id nextNotInEol) where notInEol = not . (`elem` EOL.charset) findNextSection :: Int -> ByteString -> Int findNextSection offset input = case BS.findIndex (== BS.head eofMarker) input of Nothing -> 0 Just delta -> let newInput = BS.drop delta input in let newOffset = offset + delta in if BS.isPrefixOf eofMarker newInput then newOffset + findNextLine newInput else findNextSection (newOffset + 1) (BS.drop 1 newInput) readStructures :: Int -> ByteString -> Either String [InputStructure] readStructures startXref input = evalParser structure () (BS.drop startXref input) >>= stopOrFollow where stopOrFollow s@(Structure {trailer}) = case Map.lookup (Name "Prev") trailer of Nothing -> Right [InputStructure (findNextLine input) s] Just (NumberObject (Number newStartXref)) -> let offset = truncate newStartXref in let startOffset = findNextSection offset (BS.drop offset input) in (InputStructure startOffset s:) <$> (readStructures offset input) Just v -> Left $ "Bad value for Prev entry in trailer: " ++ show v parseDocument :: ByteString -> Either String Document parseDocument input = do (pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input startXref <- readStartXref eolStyle input structuresRead <- readStructures startXref input let layers = populate input <$> structuresRead return $ Document {pdfVersion, eolStyle, layers}