111 lines
4.0 KiB
Haskell
111 lines
4.0 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
{-# LANGUAGE TypeSynonymInstances #-}
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
module PDF.Content.Text (
|
|
Chunks(..)
|
|
, format
|
|
, renderText
|
|
) where
|
|
|
|
import Control.Monad (foldM)
|
|
import Control.Monad.Fail (MonadFail(..))
|
|
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
|
|
import Control.Monad.State (MonadState(..), StateT, evalStateT)
|
|
import Data.Map ((!))
|
|
import qualified Data.Map as Map (empty, singleton)
|
|
import Data.Text (Text, breakOn)
|
|
import qualified Data.Text as Text (drop)
|
|
import PDF.Box (Box(..))
|
|
import PDF.Content (
|
|
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
|
|
, contentUnits
|
|
)
|
|
import qualified PDF.Content as Content (Content(..))
|
|
import PDF.Content.Operator (Instruction, Operator(..))
|
|
import PDF.Content.Operator.Text (Operator(..))
|
|
import PDF.Font (Font(..), FontSet, emptyFont)
|
|
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
|
|
import Prelude hiding (fail)
|
|
|
|
data ROContext = ROContext {
|
|
indexedInstructions :: Indexed Instruction
|
|
, fontSet :: FontSet
|
|
}
|
|
|
|
type TextContent m = ReaderT ROContext m
|
|
type FontContext m = StateT Font (TextContent m)
|
|
|
|
decodeString :: MonadFail m => StringObject -> FontContext m Text
|
|
decodeString input = do
|
|
Font {decode} <- get
|
|
either fail return . decode $ toByteString input
|
|
|
|
data Chunks = Chunks
|
|
|
|
instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
|
|
r Chunks content = do
|
|
fontSet <- ask
|
|
renderText fontSet content
|
|
|
|
renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text)
|
|
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
|
|
runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext
|
|
where
|
|
roContext = ROContext {indexedInstructions, fontSet}
|
|
|
|
renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text)
|
|
renderContentUnit (GraphicContext graphicContextUnits) =
|
|
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
|
|
renderContentUnit (TextContext instructionIds) =
|
|
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
|
|
|
|
renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text)
|
|
renderGraphicContextUnit (GraphicInstruction _) = return Map.empty
|
|
renderGraphicContextUnit (ContentUnit contentUnit) =
|
|
renderContentUnit contentUnit
|
|
|
|
renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text)
|
|
renderInstructionId instructionId@(Id n) = toMap <$>
|
|
(asks ((! instructionId) . indexedInstructions) >>= renderInstruction)
|
|
where
|
|
toMap = maybe Map.empty (Map.singleton (Id n))
|
|
|
|
renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text)
|
|
renderInstruction (Text Tf, [NameObject fontName, _]) =
|
|
asks ((! fontName) . fontSet) >>= put >> return Nothing
|
|
|
|
renderInstruction (Text Tstar, []) = return $ Just "\n"
|
|
|
|
renderInstruction (Text TJ, [Array arrayObject]) =
|
|
Just <$> foldM appendText "" arrayObject
|
|
where
|
|
appendText t (StringObject outputString) =
|
|
mappend t <$> decodeString outputString
|
|
appendText t _ = return t
|
|
|
|
renderInstruction (Text Tj, [StringObject outputString]) =
|
|
Just <$> decodeString outputString
|
|
|
|
renderInstruction (Text Quote, [StringObject outputString]) =
|
|
(Just . mappend "\n") <$> decodeString outputString
|
|
|
|
renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
|
|
(Just . mappend "\n") <$> decodeString outputString
|
|
|
|
renderInstruction _ = return Nothing
|
|
|
|
format :: (MonadFail m, MonadReader Font m) => Text -> m [Instruction]
|
|
format input =
|
|
case breakOn "\n" input of
|
|
("", "") -> return []
|
|
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
|
|
(line, left) -> (:) <$> tj line <*> format left
|
|
where
|
|
tj t = do
|
|
literal <- either fail (return . Literal) =<< asks (($t) . encode)
|
|
return (Text Tj, [StringObject literal])
|