{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content.Text ( Chunks(..) , format , renderText ) where import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT) import Control.Monad.State (MonadState(..), StateT, evalStateT) import Data.Id (Id(..), Indexed) import Data.Map ((!)) import qualified Data.Map as Map (empty, singleton) import Data.Text (Text, breakOn) import qualified Data.Text as Text (drop) import PDF.Box (Box(..)) import PDF.Content ( Content, ContentUnit(..), GraphicContextUnit(..), contentUnits ) import qualified PDF.Content as Content (Content(..)) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font(..), FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import Prelude hiding (fail) data ROContext = ROContext { indexedInstructions :: Indexed Instruction , fontSet :: FontSet } type TextContent m = ReaderT ROContext m type FontContext m = StateT Font (TextContent m) decodeString :: MonadFail m => StringObject -> FontContext m Text decodeString input = do Font {decode} <- get either fail return . decode $ toByteString input data Chunks = Chunks instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where r Chunks content = do fontSet <- ask renderText fontSet content renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text) renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) = runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext where roContext = ROContext {indexedInstructions, fontSet} renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text) renderContentUnit (GraphicContext graphicContextUnits) = mconcat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructionIds) = evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text) renderGraphicContextUnit (GraphicInstruction _) = return Map.empty renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text) renderInstructionId instructionId@(Id n) = toMap <$> (asks ((! instructionId) . indexedInstructions) >>= renderInstruction) where toMap = maybe Map.empty (Map.singleton (Id n)) renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text) renderInstruction (Text Tf, [NameObject fontName, _]) = asks ((! fontName) . fontSet) >>= put >> return Nothing renderInstruction (Text Tstar, []) = return $ Just "\n" renderInstruction (Text TJ, [Array arrayObject]) = Just <$> foldM appendText "" arrayObject where appendText t (StringObject outputString) = mappend t <$> decodeString outputString appendText t _ = return t renderInstruction (Text Tj, [StringObject outputString]) = Just <$> decodeString outputString renderInstruction (Text Quote, [StringObject outputString]) = (Just . mappend "\n") <$> decodeString outputString renderInstruction (Text DQuote, [_, _, StringObject outputString]) = (Just . mappend "\n") <$> decodeString outputString renderInstruction _ = return Nothing format :: (MonadFail m, MonadReader Font m) => Text -> m [Instruction] format input = case breakOn "\n" input of ("", "") -> return [] ("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left) (line, left) -> (:) <$> tj line <*> format left where tj t = do literal <- either fail (return . Literal) =<< asks (($t) . encode) return (Text Tj, [StringObject literal])