{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) , Id(..) , Indexed , TextContext , content , parse ) where import Control.Applicative ((<|>)) import Control.Monad.Reader (asks, runReader) import Control.Monad.State (MonadState, evalStateT, gets, modify, runStateT) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) import Data.Map (Map, (!)) import qualified Data.Map as Map (empty, insert, size) import PDF.Box (Box(..)) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) import PDF.Parser (MonadParser, evalParser, string) newtype Id a = Id Int deriving (Eq, Ord, Show) data Instructions = Instructions type Indexed a = Map (Id a) a data GraphicContextUnit = GraphicInstruction (Id Instruction) | ContentUnit ContentUnit deriving Show type TextContext = [Id Instruction] data ContentUnit = GraphicContext [GraphicContextUnit] | TextContext TextContext deriving Show data Content = Content { contentUnits :: [ContentUnit] , indexedInstructions :: Indexed Instruction } deriving Show type InstructionParser m = (MonadParser m, MonadState (Indexed Instruction) m) instance Monad m => Box m Instructions Content (Indexed Instruction) where r Instructions = return . indexedInstructions w Instructions indexedInstructions someContent = return $ someContent {indexedInstructions} register :: MonadState (Indexed Instruction) m => Instruction -> m (Id Instruction) register newInstruction = do newInstructionID <- gets (Id . Map.size) modify (Map.insert newInstructionID newInstruction) return newInstructionID content :: MonadParser m => m Content content = do (contentUnits, indexedInstructions) <- runInstructionParser return $ Content {contentUnits, indexedInstructions} where runInstructionParser = runStateT (contentUnit `sepBy` blank) Map.empty contentUnit :: InstructionParser m => m ContentUnit contentUnit = (GraphicContext <$> graphicContext) <|> (TextContext <$> textContext) where graphicContext = string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" graphicContextUnit :: InstructionParser m => m GraphicContextUnit graphicContextUnit = (GraphicInstruction <$> instruction) <|> (ContentUnit <$> contentUnit) instruction :: InstructionParser m => m (Id Instruction) instruction = evalStateT stackParser [] >>= register where stackParser = ((directObject <* blank) >>= push) <|> operator push arg = modify (arg:) *> stackParser parse :: ByteString -> Either String Content parse = evalParser content () textContext :: InstructionParser m => m TextContext textContext = string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" instance Output Content where output (Content {contentUnits, indexedInstructions}) = runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions where outputCU (GraphicContext gc) = do inside <- mconcat <$> mapM outputGCU gc return (line "q" `mappend` inside `mappend` line "Q") outputCU (TextContext tc) = do inside <- mconcat <$> mapM outputIId tc return (line "BT" `mappend` inside `mappend` line "ET") outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (ContentUnit cu) = outputCU cu outputIId instructionId = asks (output . (! instructionId))