Hufflepdf/src/PDF/Content/Text.hs

111 lines
4.0 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
Chunks(..)
, format
, renderText
) where
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
import Control.Monad.State (MonadState(..), StateT, evalStateT)
import Data.Map ((!))
import qualified Data.Map as Map (empty, fromList, singleton)
import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop)
import PDF.Box (Box(..))
import PDF.Content (
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
, contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
data ROContext = ROContext {
indexedInstructions :: Indexed Instruction
, fontSet :: FontSet
}
type TextContent m = ReaderT ROContext m
type FontContext m = StateT Font (TextContent m)
decodeString :: MonadFail m => StringObject -> FontContext m Text
decodeString input = do
Font {decode} <- get
either fail return . decode $ toByteString input
data Chunks = Chunks
instance (MonadReader FontSet m, MonadFail m) => Box m Chunks Content (Indexed Text) where
r Chunks content = do
fontSet <- ask
renderText fontSet content
renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text)
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text)
renderContentUnit (GraphicContext graphicContextUnits) =
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructionIds) =
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text)
renderGraphicContextUnit (GraphicInstruction _) = return Map.empty
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text)
renderInstructionId instructionId@(Id n) = toMap <$>
(asks ((! instructionId) . indexedInstructions) >>= renderInstruction)
where
toMap = maybe Map.empty (Map.singleton (Id n))
renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text)
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks ((! fontName) . fontSet) >>= put >> return Nothing
renderInstruction (Text Tstar, []) = return $ Just "\n"
renderInstruction (Text TJ, [Array arrayObject]) =
Just <$> foldM appendText "" arrayObject
where
appendText t (StringObject outputString) =
mappend t <$> decodeString outputString
appendText t _ = return t
renderInstruction (Text Tj, [StringObject outputString]) =
Just <$> decodeString outputString
renderInstruction (Text Quote, [StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction _ = return Nothing
format :: (MonadFail m, MonadReader Font m) => Text -> m [Instruction]
format input =
case breakOn "\n" input of
("", "") -> return []
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
(line, left) -> (:) <$> tj line <*> format left
where
tj t = do
literal <- either fail (return . Literal) =<< asks (($t) . encode)
return (Text Tj, [StringObject literal])