{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content.Text ( Chunks(..) , chunk , format , renderText ) where import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT) import Control.Monad.State ( MonadState(..), StateT, evalStateT, gets, modify, runStateT ) import Control.Monad.Trans (lift) import Data.Id (Id(..), Indexed, at, empty, singleton) import qualified Data.Id as Id (delete, lookup, register) import Data.Map ((!)) import Data.Text (Text, breakOn) import qualified Data.Text as Text (drop) import PDF.Box (Box(..)) import PDF.Content ( Content, ContentUnit(..), GraphicContextUnit(..), contentUnits ) import qualified PDF.Content as Content (Content(..)) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font(..), FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import Prelude hiding (fail) data ROContext = ROContext { indexedInstructions :: Indexed Instruction , fontSet :: FontSet } type TextContent m = ReaderT ROContext m type FontContext m = StateT Font (TextContent m) type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m)) type TextUpdater m = StateT Font (Updater m) decodeString :: MonadFail m => StringObject -> FontContext m Text decodeString input = do Font {decode} <- get either fail return . decode $ toByteString input data Chunks = Chunks chunk :: Int -> Id Text chunk = Id instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where r Chunks content = do fontSet <- ask renderText fontSet content w Chunks indexedText (Content.Content {contentUnits, Content.indexedInstructions}) = uncurry Content.Content <$> runReaderT ( runStateT (mapM updateContentUnit contentUnits) indexedInstructions ) indexedText renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text) renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) = runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext where roContext = ROContext {indexedInstructions, fontSet} renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text) renderContentUnit (GraphicContext graphicContextUnits) = mconcat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructionIds) = evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont updateContentUnit :: MonadFail m => ContentUnit -> Updater m ContentUnit updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$> mapM updateGraphicContextUnit graphicContextUnits updateContentUnit (TextContext instructionIds) = TextContext . concat <$> evalStateT (mapM updateInstructionId instructionIds) emptyFont renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text) renderGraphicContextUnit (GraphicInstruction _) = return empty renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit updateGraphicContextUnit :: MonadFail m => GraphicContextUnit -> Updater m GraphicContextUnit updateGraphicContextUnit gI@(GraphicInstruction _) = return gI updateGraphicContextUnit (ContentUnit contentUnit) = ContentUnit <$> updateContentUnit contentUnit renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text) renderInstructionId instructionId@(Id n) = toMap <$> (asks ((`at` instructionId) . indexedInstructions) >>= renderInstruction) where toMap = maybe empty (singleton (Id n)) updateInstructionId :: MonadFail m => Id Instruction -> TextUpdater m [Id Instruction] updateInstructionId instructionId = lift (gets (`at` instructionId)) >>= updateInstruction instructionId renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text) renderInstruction (Text Tf, [NameObject fontName, _]) = asks ((! fontName) . fontSet) >>= put >> return Nothing renderInstruction (Text Tstar, []) = return $ Just "\n" renderInstruction (Text TJ, [Array arrayObject]) = Just <$> foldM appendText "" arrayObject where appendText t (StringObject outputString) = mappend t <$> decodeString outputString appendText t _ = return t renderInstruction (Text Tj, [StringObject outputString]) = Just <$> decodeString outputString renderInstruction (Text Quote, [StringObject outputString]) = (Just . mappend "\n") <$> decodeString outputString renderInstruction (Text DQuote, [_, _, StringObject outputString]) = (Just . mappend "\n") <$> decodeString outputString renderInstruction _ = return Nothing updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TextUpdater m [Id Instruction] updateInstruction instructionId (Text Tf, [NameObject fontName, _]) = (lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId] updateInstruction instructionId@(Id n) instruction = do if emitsText $ fst instruction then asks (Id.lookup (Id n)) >>= replaceText else return [instructionId] where emitsText (Text Tstar) = True emitsText (Text TJ) = True emitsText (Text Tj) = True emitsText (Text Quote) = True emitsText (Text DQuote) = True emitsText _ = False replaceText = maybe (return []) $ \text -> do lift $ modify (Id.delete instructionId) format text >>= mapM (lift . Id.register) format :: MonadFail m => Text -> StateT Font m [Instruction] format input = do case breakOn "\n" input of ("", "") -> return [] ("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left) (line, left) -> (:) <$> tj line <*> format left where tj t = do literal <- either fail (return . Literal) =<< gets (($t) . encode) return (Text Tj, [StringObject literal])