{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} module PDF.Content.Text ( format , renderText ) where import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), asks, runReaderT) import Control.Monad.State (MonadState(..), evalStateT) import Data.ByteString.Char8 (pack) import Data.Map ((!)) import Data.Text (Text) import PDF.Content (Content(..), ContentUnit(..), GraphicContextUnit(..)) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font, FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import Prelude hiding (fail) type TextContent m = (MonadReader FontSet m, MonadFail m) type FontContext m = (MonadState Font m, TextContent m) decodeString :: FontContext m => StringObject -> m Text decodeString input = do font <- get either fail return . font $ toByteString input renderText :: MonadFail m => FontSet -> Content -> m [Text] renderText fontSet (Content contentUnits) = runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet renderContentUnit :: TextContent m => ContentUnit -> m [Text] renderContentUnit (GraphicContext graphicContextUnits) = concat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructions) = evalStateT (concat <$> mapM renderInstruction instructions) emptyFont renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text] renderGraphicContextUnit (GraphicInstruction _) = return [] renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit renderInstruction :: FontContext m => Instruction -> m [Text] renderInstruction (Text Tf, [NameObject fontName, _]) = asks (! fontName) >>= put >> return [] renderInstruction (Text Tstar, []) = return ["\n"] renderInstruction (Text TJ, [Array arrayObject]) = replicate 1 <$> foldM appendText "" arrayObject where appendText t (StringObject outputString) = mappend t <$> decodeString outputString appendText t _ = return t renderInstruction (Text Tj, [StringObject outputString]) = replicate 1 <$> decodeString outputString renderInstruction (Text Quote, [StringObject outputString]) = (\t -> ["\n", t]) <$> decodeString outputString renderInstruction (Text DQuote, [StringObject outputString]) = (\t -> ["\n", t]) <$> decodeString outputString renderInstruction _ = return [] format :: String -> [Instruction] format s = case break (== '\n') s of ("", "") -> [] ("", left) -> (Text Tstar, []) : format (drop 1 left) (line, left) -> (Text Tj, [StringObject . Literal $ pack line]) : format left