{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) , IndexedInstructions , InstructionId , TextContext , content , parse ) where import Control.Applicative ((<|>)) import Control.Monad.Reader (asks, runReader) import Control.Monad.State (MonadState, evalStateT, gets, modify, runStateT) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) import Data.Map (Map, (!)) import qualified Data.Map as Map (empty, insert, size) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) import PDF.Parser (MonadParser, evalParser, string) newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show) type IndexedInstructions = Map InstructionId Instruction data GraphicContextUnit = GraphicInstruction InstructionId | ContentUnit ContentUnit deriving Show type TextContext = [InstructionId] data ContentUnit = GraphicContext [GraphicContextUnit] | TextContext TextContext deriving Show data Content = Content { contentUnits :: [ContentUnit] , indexedInstructions :: IndexedInstructions } deriving Show type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m) register :: MonadState IndexedInstructions m => Instruction -> m InstructionId register newInstruction = do newInstructionID <- gets (InstructionId . Map.size) modify (Map.insert newInstructionID newInstruction) return newInstructionID content :: MonadParser m => m Content content = do (contentUnits, indexedInstructions) <- runInstructionParser return $ Content {contentUnits, indexedInstructions} where runInstructionParser = runStateT (contentUnit `sepBy` blank) Map.empty contentUnit :: InstructionParser m => m ContentUnit contentUnit = (GraphicContext <$> graphicContext) <|> (TextContext <$> textContext) where graphicContext = string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" graphicContextUnit :: InstructionParser m => m GraphicContextUnit graphicContextUnit = (GraphicInstruction <$> instruction) <|> (ContentUnit <$> contentUnit) instruction :: InstructionParser m => m InstructionId instruction = evalStateT stackParser [] >>= register where stackParser = ((directObject <* blank) >>= push) <|> operator push arg = modify (arg:) *> stackParser parse :: ByteString -> Either String Content parse = evalParser content () textContext :: InstructionParser m => m TextContext textContext = string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" instance Output Content where output (Content {contentUnits, indexedInstructions}) = runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions where outputCU (GraphicContext gc) = do inside <- mconcat <$> mapM outputGCU gc return (line "q" `mappend` inside `mappend` line "Q") outputCU (TextContext tc) = do inside <- mconcat <$> mapM outputIId tc return (line "BT" `mappend` inside `mappend` line "ET") outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (ContentUnit cu) = outputCU cu outputIId instructionId = asks (output . (! instructionId))