{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content.Text ( format , renderText ) where import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), asks, runReaderT) import Control.Monad.State (MonadState(..), evalStateT) import Data.Map ((!)) import Data.Text (Text, breakOn) import qualified Data.Text as Text (drop) import PDF.Box (Box(..)) import PDF.Content ( Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..) , contentUnits ) import qualified PDF.Content as Content (Content(..)) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font(..), FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import PDF.Pages (Page(..)) import Prelude hiding (fail) data ROContext = ROContext { indexedInstructions :: Indexed Instruction , fontSet :: FontSet } type TextContent m = (MonadReader ROContext m, MonadFail m) type FontContext m = (MonadState Font m, TextContent m) decodeString :: FontContext m => StringObject -> m Text decodeString input = do Font {decode} <- get either fail return . decode $ toByteString input data Chunks = Chunks instance Monad m => Box m Chunks Content (Indexed Text) renderText :: MonadFail m => FontSet -> Content -> m [Text] renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) = runReaderT (concat <$> mapM renderContentUnit contentUnits) roContext where roContext = ROContext {indexedInstructions, fontSet} renderContentUnit :: TextContent m => ContentUnit -> m [Text] renderContentUnit (GraphicContext graphicContextUnits) = concat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructionIds) = evalStateT (concat <$> mapM renderInstructionId instructionIds) emptyFont renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text] renderGraphicContextUnit (GraphicInstruction _) = return [] renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit renderInstructionId :: FontContext m => Id Instruction -> m [Text] renderInstructionId instructionId = asks ((! instructionId) . indexedInstructions) >>= renderInstruction renderInstruction :: FontContext m => Instruction -> m [Text] renderInstruction (Text Tf, [NameObject fontName, _]) = asks ((! fontName) . fontSet) >>= put >> return [] renderInstruction (Text Tstar, []) = return ["\n"] renderInstruction (Text TJ, [Array arrayObject]) = replicate 1 <$> foldM appendText "" arrayObject where appendText t (StringObject outputString) = mappend t <$> decodeString outputString appendText t _ = return t renderInstruction (Text Tj, [StringObject outputString]) = replicate 1 <$> decodeString outputString renderInstruction (Text Quote, [StringObject outputString]) = (\t -> ["\n", t]) <$> decodeString outputString renderInstruction (Text DQuote, [StringObject outputString]) = (\t -> ["\n", t]) <$> decodeString outputString renderInstruction _ = return [] format :: (MonadFail m, MonadReader Font m) => Text -> m [Instruction] format input = case breakOn "\n" input of ("", "") -> return [] ("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left) (line, left) -> (:) <$> tj line <*> format left where tj t = do literal <- either fail (return . Literal) =<< asks (($t) . encode) return (Text Tj, [StringObject literal])