{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content.Text ( Chunks(..) , chunk , format ) where import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (ReaderT, asks, runReaderT) import Control.Monad.State ( MonadState(..), StateT, evalStateT, gets, modify, runStateT ) import Control.Monad.Trans (lift) import qualified Data.ByteString.Char8 as BS (concatMap, singleton) import Data.Id (Id(..), Indexed, at, empty, singleton) import qualified Data.Id as Id (delete, lookup, register) import Data.Map ((!)) import Data.Text (Text, breakOn) import qualified Data.Text as Text (drop) import PDF.Box (Box(..)) import PDF.Content ( Content(..), ContentUnit(..), GraphicContextUnit(..), IdContentUnit , IdGraphicContextUnit ) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font(..), FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import Prelude hiding (fail) type TmpFont = StateT Font type Renderer m = ReaderT (Indexed Instruction) (ReaderT FontSet m) type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m)) decodeString :: MonadFail m => StringObject -> TmpFont (Renderer m) Text decodeString input = do Font {decode} <- get either fail return . decode $ toByteString input data Chunks = Chunks chunk :: Int -> Id Text chunk = Id instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where r Chunks content = runReaderT (mconcat <$> renderer) $ indexedInstructions content where renderer = mapM renderContentUnit (contentUnits content) w Chunks indexedText content = do (contentUnits, indexedInstructions) <- runReaderT readerUpdate indexedText return $ content {contentUnits, indexedInstructions} where stateUpdate = mapM updateContentUnit (contentUnits content) readerUpdate = runStateT stateUpdate (indexedInstructions content) renderContentUnit :: MonadFail m => IdContentUnit -> Renderer m (Indexed Text) renderContentUnit (GraphicContext graphicContextUnits) = mconcat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructionIds) = evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont updateContentUnit :: MonadFail m => IdContentUnit -> Updater m IdContentUnit updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$> mapM updateGraphicContextUnit graphicContextUnits updateContentUnit (TextContext instructionIds) = TextContext . concat <$> evalStateT (mapM updateInstructionId instructionIds) emptyFont renderGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Renderer m (Indexed Text) renderGraphicContextUnit (GraphicInstruction _) = return empty renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit updateGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Updater m IdGraphicContextUnit updateGraphicContextUnit gI@(GraphicInstruction _) = return gI updateGraphicContextUnit (ContentUnit contentUnit) = ContentUnit <$> updateContentUnit contentUnit renderInstructionId :: MonadFail m => Id Instruction -> TmpFont (Renderer m) (Indexed Text) renderInstructionId instructionId@(Id n) = toMap <$> (asks ((`at` instructionId)) >>= renderInstruction) where toMap = maybe empty (singleton (Id n)) updateInstructionId :: MonadFail m => Id Instruction -> TmpFont (Updater m) [Id Instruction] updateInstructionId instructionId = lift (gets (`at` instructionId)) >>= updateInstruction instructionId renderInstruction :: MonadFail m => Instruction -> TmpFont (Renderer m) (Maybe Text) renderInstruction (Text Tf, [NameObject fontName, _]) = lift (lift $ asks (! fontName)) >>= put >> return Nothing renderInstruction (Text Tstar, []) = return $ Just "\n" renderInstruction (Text TJ, [Array arrayObject]) = Just <$> foldM appendText "" arrayObject where appendText t (StringObject outputString) = mappend t <$> decodeString outputString appendText t _ = return t renderInstruction (Text Tj, [StringObject outputString]) = Just <$> decodeString outputString renderInstruction (Text Quote, [StringObject outputString]) = (Just . mappend "\n") <$> decodeString outputString renderInstruction (Text DQuote, [_, _, StringObject outputString]) = (Just . mappend "\n") <$> decodeString outputString renderInstruction _ = return Nothing updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TmpFont (Updater m) [Id Instruction] updateInstruction instructionId (Text Tf, [NameObject fontName, _]) = (lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId] updateInstruction instructionId@(Id n) instruction = do if emitsText $ fst instruction then asks (Id.lookup (Id n)) >>= replaceText else return [instructionId] where emitsText (Text Tstar) = True emitsText (Text TJ) = True emitsText (Text Tj) = True emitsText (Text Quote) = True emitsText (Text DQuote) = True emitsText _ = False replaceText = maybe (return []) $ \text -> do lift $ modify (Id.delete instructionId) format text >>= mapM (lift . Id.register) format :: MonadFail m => Text -> StateT Font m [Instruction] format input = do case breakOn "\n" input of ("", "") -> return [] ("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left) (line, left) -> (:) <$> tj line <*> format left where tj t = do encoded <- either fail return =<< gets (($t) . encode) return (Text Tj, [StringObject . Literal $ BS.concatMap escape encoded]) escape '\\' = "\\\\" escape '(' = "\\(" escape ')' = "\\)" escape c = BS.singleton c