Hufflepdf/src/PDF/Content/Text.hs

105 lines
3.7 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
format
, renderText
) where
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), asks, runReaderT)
import Control.Monad.State (MonadState(..), evalStateT)
import Data.Map ((!))
import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop)
import PDF.Box (Box(..))
import PDF.Content (
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
, contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import PDF.Pages (Page(..))
import Prelude hiding (fail)
data ROContext = ROContext {
indexedInstructions :: Indexed Instruction
, fontSet :: FontSet
}
type TextContent m = (MonadReader ROContext m, MonadFail m)
type FontContext m = (MonadState Font m, TextContent m)
decodeString :: FontContext m => StringObject -> m Text
decodeString input = do
Font {decode} <- get
either fail return . decode $ toByteString input
data Chunks = Chunks
instance Monad m => Box m Chunks Content (Indexed Text)
renderText :: MonadFail m => FontSet -> Content -> m [Text]
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (concat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: TextContent m => ContentUnit -> m [Text]
renderContentUnit (GraphicContext graphicContextUnits) =
concat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructionIds) =
evalStateT (concat <$> mapM renderInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text]
renderGraphicContextUnit (GraphicInstruction _) = return []
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
renderInstructionId :: FontContext m => Id Instruction -> m [Text]
renderInstructionId instructionId =
asks ((! instructionId) . indexedInstructions) >>= renderInstruction
renderInstruction :: FontContext m => Instruction -> m [Text]
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks ((! fontName) . fontSet) >>= put >> return []
renderInstruction (Text Tstar, []) = return ["\n"]
renderInstruction (Text TJ, [Array arrayObject]) =
replicate 1 <$> foldM appendText "" arrayObject
where
appendText t (StringObject outputString) =
mappend t <$> decodeString outputString
appendText t _ = return t
renderInstruction (Text Tj, [StringObject outputString]) =
replicate 1 <$> decodeString outputString
renderInstruction (Text Quote, [StringObject outputString]) =
(\t -> ["\n", t]) <$> decodeString outputString
renderInstruction (Text DQuote, [StringObject outputString]) =
(\t -> ["\n", t]) <$> decodeString outputString
renderInstruction _ = return []
format :: (MonadFail m, MonadReader Font m) => Text -> m [Instruction]
format input =
case breakOn "\n" input of
("", "") -> return []
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
(line, left) -> (:) <$> tj line <*> format left
where
tj t = do
literal <- either fail (return . Literal) =<< asks (($t) . encode)
return (Text Tj, [StringObject literal])