{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} module PDF.Content.Text ( format , renderText ) where import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), asks, runReaderT) import Control.Monad.State (MonadState(..), evalStateT) import Data.ByteString.Char8 (pack) import Data.Map ((!)) import Data.Text (Text) import PDF.Box (Box(..)) import PDF.Content ( Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..) , contentUnits ) import qualified PDF.Content as Content (Content(..)) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font(..), FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import Prelude hiding (fail) data ROContext = ROContext { indexedInstructions :: Indexed Instruction , fontSet :: FontSet } type TextContent m = (MonadReader ROContext m, MonadFail m) type FontContext m = (MonadState Font m, TextContent m) decodeString :: FontContext m => StringObject -> m Text decodeString input = do Font {decode} <- get either fail return . decode $ toByteString input data Chunks = Chunks instance Monad m => Box m Chunks Content (Indexed Text) renderText :: MonadFail m => FontSet -> Content -> m [Text] renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) = runReaderT (concat <$> mapM renderContentUnit contentUnits) roContext where roContext = ROContext {indexedInstructions, fontSet} renderContentUnit :: TextContent m => ContentUnit -> m [Text] renderContentUnit (GraphicContext graphicContextUnits) = concat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructionIds) = evalStateT (concat <$> mapM renderInstructionId instructionIds) emptyFont renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text] renderGraphicContextUnit (GraphicInstruction _) = return [] renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit renderInstructionId :: FontContext m => Id Instruction -> m [Text] renderInstructionId instructionId = asks ((! instructionId) . indexedInstructions) >>= renderInstruction renderInstruction :: FontContext m => Instruction -> m [Text] renderInstruction (Text Tf, [NameObject fontName, _]) = asks ((! fontName) . fontSet) >>= put >> return [] renderInstruction (Text Tstar, []) = return ["\n"] renderInstruction (Text TJ, [Array arrayObject]) = replicate 1 <$> foldM appendText "" arrayObject where appendText t (StringObject outputString) = mappend t <$> decodeString outputString appendText t _ = return t renderInstruction (Text Tj, [StringObject outputString]) = replicate 1 <$> decodeString outputString renderInstruction (Text Quote, [StringObject outputString]) = (\t -> ["\n", t]) <$> decodeString outputString renderInstruction (Text DQuote, [StringObject outputString]) = (\t -> ["\n", t]) <$> decodeString outputString renderInstruction _ = return [] format :: String -> [Instruction] format s = case break (== '\n') s of ("", "") -> [] ("", left) -> (Text Tstar, []) : format (drop 1 left) (line, left) -> (Text Tj, [StringObject . Literal $ pack line]) : format left