{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) , IdContentUnit , IdGraphicContextUnit , IdTextContext , Instructions(..) , TextContext , parse ) where import Control.Applicative ((<|>)) import Control.Monad.Reader (asks, runReader) import Control.Monad.State.Strict (runState, evalStateT, modify) import Data.Attoparsec.ByteString.Char8 (sepBy) import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser, parseOnly) import Data.ByteString (ByteString) import Data.Id (Id(..), Indexed, at, empty, register) import PDF.Box (Box(..)) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) import PDF.Parser (string) data Instructions = Instructions data GraphicContextUnit a = GraphicInstruction a | ContentUnit (ContentUnit a) deriving Show type TextContext a = [a] data ContentUnit a = GraphicContext [GraphicContextUnit a] | TextContext (TextContext a) deriving Show data Content = Content { contentUnits :: [IdContentUnit] , indexedInstructions :: Indexed Instruction , firstError :: Maybe String } deriving Show type TmpContentUnit = ContentUnit Instruction type TmpGraphicContextUnit = GraphicContextUnit Instruction type TmpTextContext = TextContext Instruction type IdContentUnit = ContentUnit (Id Instruction) type IdGraphicContextUnit = GraphicContextUnit (Id Instruction) type IdTextContext = TextContext (Id Instruction) instance Monad m => Box m Instructions Content (Indexed Instruction) where r Instructions = return . indexedInstructions w Instructions indexedInstructions someContent = return $ someContent {indexedInstructions} parse :: ByteString -> Content parse input = let result = Atto.parseOnly (contentUnit `sepBy` blank) input in let (contentUnits, indexedInstructions) = either (const ([], empty)) buildContent result in let firstError = either Just (const Nothing) result in Content {contentUnits, indexedInstructions, firstError} buildContent :: [TmpContentUnit] -> ([IdContentUnit], Indexed Instruction) buildContent instructionContentUnits = runState (mapM registerContentUnit instructionContentUnits) empty where registerContentUnit (GraphicContext gc) = GraphicContext <$> (mapM registerGraphicContext gc) registerContentUnit (TextContext tc) = TextContext <$> (mapM register tc) registerGraphicContext (GraphicInstruction gi) = GraphicInstruction <$> (register gi) registerGraphicContext (ContentUnit cu) = ContentUnit <$> (registerContentUnit cu) contentUnit :: Atto.Parser TmpContentUnit contentUnit = (GraphicContext <$> graphicContext) <|> (TextContext <$> textContext) where graphicContext = string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" graphicContextUnit :: Atto.Parser TmpGraphicContextUnit graphicContextUnit = (GraphicInstruction <$> instruction) <|> (ContentUnit <$> contentUnit) instruction :: Atto.Parser Instruction instruction = evalStateT stackParser [] where stackParser = ((directObject <* blank) >>= push) <|> operator push arg = modify (arg:) *> stackParser textContext :: Atto.Parser TmpTextContext textContext = string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" instance Output Content where output (Content {contentUnits, indexedInstructions}) = runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions where outputCU (GraphicContext gc) = do inside <- mconcat <$> mapM outputGCU gc return (line "q" `mappend` inside `mappend` line "Q") outputCU (TextContext tc) = do inside <- mconcat <$> mapM outputIId tc return (line "BT" `mappend` inside `mappend` line "ET") outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (ContentUnit cu) = outputCU cu outputIId instructionId = asks (output . (`at` instructionId))