{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) , TextContext , parse ) where import Control.Applicative ((<|>)) import Control.Monad.Fail (MonadFail) import Control.Monad.Reader (asks, runReader) import Control.Monad.State (evalStateT, modify) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) import Data.Id (Id(..), Indexed, at, empty, register) import PDF.Box (Box(..)) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) import PDF.Parser (Parser, runParser, string) data Instructions = Instructions data GraphicContextUnit = GraphicInstruction (Id Instruction) | ContentUnit ContentUnit deriving Show type TextContext = [Id Instruction] data ContentUnit = GraphicContext [GraphicContextUnit] | TextContext TextContext deriving Show data Content = Content { contentUnits :: [ContentUnit] , indexedInstructions :: Indexed Instruction } deriving Show type InstructionParser = Parser (Indexed Instruction) instance Monad m => Box m Instructions Content (Indexed Instruction) where r Instructions = return . indexedInstructions w Instructions indexedInstructions someContent = return $ someContent {indexedInstructions} parse :: MonadFail m => ByteString -> m Content parse = either fail (return . uncurry Content) . runParser contentUnits empty where contentUnits = contentUnit `sepBy` blank contentUnit :: InstructionParser ContentUnit contentUnit = (GraphicContext <$> graphicContext) <|> (TextContext <$> textContext) where graphicContext = string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" graphicContextUnit :: InstructionParser GraphicContextUnit graphicContextUnit = (GraphicInstruction <$> instruction) <|> (ContentUnit <$> contentUnit) instruction :: InstructionParser (Id Instruction) instruction = evalStateT stackParser [] >>= register where stackParser = ((directObject <* blank) >>= push) <|> operator push arg = modify (arg:) *> stackParser textContext :: InstructionParser TextContext textContext = string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" instance Output Content where output (Content {contentUnits, indexedInstructions}) = runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions where outputCU (GraphicContext gc) = do inside <- mconcat <$> mapM outputGCU gc return (line "q" `mappend` inside `mappend` line "Q") outputCU (TextContext tc) = do inside <- mconcat <$> mapM outputIId tc return (line "BT" `mappend` inside `mappend` line "ET") outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (ContentUnit cu) = outputCU cu outputIId instructionId = asks (output . (`at` instructionId))