{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) , Id(..) , Indexed , TextContext , parse ) where import Control.Applicative ((<|>)) import Control.Monad.Fail (MonadFail) import Control.Monad.Reader (asks, runReader) import Control.Monad.State (evalStateT, gets, modify) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) import Data.Map (Map, (!)) import qualified Data.Map as Map (empty, insert, size) import PDF.Box (Box(..)) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) import PDF.Parser (Parser, runParser, string) newtype Id a = Id Int deriving (Eq, Ord, Show) data Instructions = Instructions type Indexed a = Map (Id a) a data GraphicContextUnit = GraphicInstruction (Id Instruction) | ContentUnit ContentUnit deriving Show type TextContext = [Id Instruction] data ContentUnit = GraphicContext [GraphicContextUnit] | TextContext TextContext deriving Show data Content = Content { contentUnits :: [ContentUnit] , indexedInstructions :: Indexed Instruction } deriving Show type InstructionParser = Parser (Indexed Instruction) instance Monad m => Box m Instructions Content (Indexed Instruction) where r Instructions = return . indexedInstructions w Instructions indexedInstructions someContent = return $ someContent {indexedInstructions} register :: Instruction -> InstructionParser (Id Instruction) register newInstruction = do newInstructionID <- gets (Id . Map.size) modify (Map.insert newInstructionID newInstruction) return newInstructionID parse :: MonadFail m => ByteString -> m Content parse = either fail (return . uncurry Content) . runParser contentUnits Map.empty where contentUnits = contentUnit `sepBy` blank contentUnit :: InstructionParser ContentUnit contentUnit = (GraphicContext <$> graphicContext) <|> (TextContext <$> textContext) where graphicContext = string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" graphicContextUnit :: InstructionParser GraphicContextUnit graphicContextUnit = (GraphicInstruction <$> instruction) <|> (ContentUnit <$> contentUnit) instruction :: InstructionParser (Id Instruction) instruction = evalStateT stackParser [] >>= register where stackParser = ((directObject <* blank) >>= push) <|> operator push arg = modify (arg:) *> stackParser textContext :: InstructionParser TextContext textContext = string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" instance Output Content where output (Content {contentUnits, indexedInstructions}) = runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions where outputCU (GraphicContext gc) = do inside <- mconcat <$> mapM outputGCU gc return (line "q" `mappend` inside `mappend` line "Q") outputCU (TextContext tc) = do inside <- mconcat <$> mapM outputIId tc return (line "BT" `mappend` inside `mappend` line "ET") outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (ContentUnit cu) = outputCU cu outputIId instructionId = asks (output . (! instructionId))