Hufflepdf/src/PDF/Content/Text.hs

160 lines
6.0 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
Chunks(..)
, chunk
, format
, renderText
) where
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
import Control.Monad.State (
MonadState(..), StateT, evalStateT, gets, modify, runStateT
)
import Control.Monad.Trans (lift)
import Data.Id (Id(..), Indexed, at, empty, singleton)
import qualified Data.Id as Id (delete, lookup, register)
import Data.Map ((!))
import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop)
import PDF.Box (Box(..))
import PDF.Content (
Content, ContentUnit(..), GraphicContextUnit(..), contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
data ROContext = ROContext {
indexedInstructions :: Indexed Instruction
, fontSet :: FontSet
}
type TextContent m = ReaderT ROContext m
type FontContext m = StateT Font (TextContent m)
type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m))
type TextUpdater m = StateT Font (Updater m)
decodeString :: MonadFail m => StringObject -> FontContext m Text
decodeString input = do
Font {decode} <- get
either fail return . decode $ toByteString input
data Chunks = Chunks
chunk :: Int -> Id Text
chunk = Id
instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
r Chunks content = do
fontSet <- ask
renderText fontSet content
w Chunks indexedText (Content.Content {contentUnits, Content.indexedInstructions}) =
uncurry Content.Content <$> runReaderT (
runStateT (mapM updateContentUnit contentUnits) indexedInstructions
) indexedText
renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text)
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text)
renderContentUnit (GraphicContext graphicContextUnits) =
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructionIds) =
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
updateContentUnit :: MonadFail m => ContentUnit -> Updater m ContentUnit
updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$>
mapM updateGraphicContextUnit graphicContextUnits
updateContentUnit (TextContext instructionIds) = TextContext . concat <$>
evalStateT (mapM updateInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text)
renderGraphicContextUnit (GraphicInstruction _) = return empty
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
updateGraphicContextUnit :: MonadFail m => GraphicContextUnit -> Updater m GraphicContextUnit
updateGraphicContextUnit gI@(GraphicInstruction _) = return gI
updateGraphicContextUnit (ContentUnit contentUnit) =
ContentUnit <$> updateContentUnit contentUnit
renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text)
renderInstructionId instructionId@(Id n) = toMap <$>
(asks ((`at` instructionId) . indexedInstructions) >>= renderInstruction)
where
toMap = maybe empty (singleton (Id n))
updateInstructionId :: MonadFail m => Id Instruction -> TextUpdater m [Id Instruction]
updateInstructionId instructionId =
lift (gets (`at` instructionId)) >>= updateInstruction instructionId
renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text)
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks ((! fontName) . fontSet) >>= put >> return Nothing
renderInstruction (Text Tstar, []) = return $ Just "\n"
renderInstruction (Text TJ, [Array arrayObject]) =
Just <$> foldM appendText "" arrayObject
where
appendText t (StringObject outputString) =
mappend t <$> decodeString outputString
appendText t _ = return t
renderInstruction (Text Tj, [StringObject outputString]) =
Just <$> decodeString outputString
renderInstruction (Text Quote, [StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction _ = return Nothing
updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TextUpdater m [Id Instruction]
updateInstruction instructionId (Text Tf, [NameObject fontName, _]) =
(lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId]
updateInstruction instructionId@(Id n) instruction = do
if emitsText $ fst instruction
then asks (Id.lookup (Id n)) >>= replaceText
else return [instructionId]
where
emitsText (Text Tstar) = True
emitsText (Text TJ) = True
emitsText (Text Tj) = True
emitsText (Text Quote) = True
emitsText (Text DQuote) = True
emitsText _ = False
replaceText = maybe (return []) $ \text -> do
lift $ modify (Id.delete instructionId)
format text >>= mapM (lift . Id.register)
format :: MonadFail m => Text -> StateT Font m [Instruction]
format input = do
case breakOn "\n" input of
("", "") -> return []
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
(line, left) -> (:) <$> tj line <*> format left
where
tj t = do
literal <- either fail (return . Literal) =<< gets (($t) . encode)
return (Text Tj, [StringObject literal])