Hufflepdf/src/PDF/Content/Text.hs

110 lines
3.9 KiB
Haskell
Raw Normal View History

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
Chunks(..)
, format
2020-02-11 17:26:47 +01:00
, renderText
) where
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
import Control.Monad.State (MonadState(..), StateT, evalStateT)
2020-03-15 15:13:00 +01:00
import Data.Id (Id(..), Indexed, at, empty, singleton)
import Data.Map ((!))
import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop)
import PDF.Box (Box(..))
import PDF.Content (
Content, ContentUnit(..), GraphicContextUnit(..), contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font(..), FontSet, emptyFont)
2020-02-11 17:26:47 +01:00
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
data ROContext = ROContext {
indexedInstructions :: Indexed Instruction
, fontSet :: FontSet
}
type TextContent m = ReaderT ROContext m
type FontContext m = StateT Font (TextContent m)
decodeString :: MonadFail m => StringObject -> FontContext m Text
decodeString input = do
Font {decode} <- get
either fail return . decode $ toByteString input
data Chunks = Chunks
instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
r Chunks content = do
fontSet <- ask
renderText fontSet content
renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text)
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text)
renderContentUnit (GraphicContext graphicContextUnits) =
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructionIds) =
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text)
2020-03-15 15:13:00 +01:00
renderGraphicContextUnit (GraphicInstruction _) = return empty
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text)
renderInstructionId instructionId@(Id n) = toMap <$>
2020-03-15 15:13:00 +01:00
(asks ((`at` instructionId) . indexedInstructions) >>= renderInstruction)
where
2020-03-15 15:13:00 +01:00
toMap = maybe empty (singleton (Id n))
renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text)
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks ((! fontName) . fontSet) >>= put >> return Nothing
renderInstruction (Text Tstar, []) = return $ Just "\n"
renderInstruction (Text TJ, [Array arrayObject]) =
Just <$> foldM appendText "" arrayObject
where
appendText t (StringObject outputString) =
mappend t <$> decodeString outputString
appendText t _ = return t
renderInstruction (Text Tj, [StringObject outputString]) =
Just <$> decodeString outputString
renderInstruction (Text Quote, [StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction _ = return Nothing
2020-02-11 17:26:47 +01:00
format :: (MonadFail m, MonadReader Font m) => Text -> m [Instruction]
format input =
case breakOn "\n" input of
("", "") -> return []
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
(line, left) -> (:) <$> tj line <*> format left
where
tj t = do
literal <- either fail (return . Literal) =<< asks (($t) . encode)
return (Text Tj, [StringObject literal])