Hufflepdf/src/PDF/Content/Text.hs

152 lines
5.8 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
Chunks(..)
, chunk
, format
) where
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (ReaderT, asks, runReaderT)
import Control.Monad.State (
MonadState(..), StateT, evalStateT, gets, modify, runStateT
)
import Control.Monad.Trans (lift)
import qualified Data.ByteString.Char8 as BS (concatMap, singleton)
import Data.Id (Id(..), Indexed, at, empty, singleton)
import qualified Data.Id as Id (delete, lookup, register)
import Data.Map ((!))
import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop)
import PDF.Box (Box(..))
import PDF.Content (
Content(..), ContentUnit(..), GraphicContextUnit(..), IdContentUnit
, IdGraphicContextUnit
)
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
type TmpFont = StateT Font
type Renderer m = ReaderT (Indexed Instruction) (ReaderT FontSet m)
type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m))
decodeString :: MonadFail m => StringObject -> TmpFont (Renderer m) Text
decodeString input = do
Font {decode} <- get
either fail return . decode $ toByteString input
data Chunks = Chunks
chunk :: Int -> Id Text
chunk = Id
instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
r Chunks content =
runReaderT (mconcat <$> renderer) $ indexedInstructions content
where
renderer = mapM renderContentUnit (contentUnits content)
w Chunks indexedText content = do
(contentUnits, indexedInstructions) <- runReaderT readerUpdate indexedText
return $ content {contentUnits, indexedInstructions}
where
stateUpdate = mapM updateContentUnit (contentUnits content)
readerUpdate = runStateT stateUpdate (indexedInstructions content)
renderContentUnit :: MonadFail m => IdContentUnit -> Renderer m (Indexed Text)
renderContentUnit (GraphicContext graphicContextUnits) =
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructionIds) =
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
updateContentUnit :: MonadFail m => IdContentUnit -> Updater m IdContentUnit
updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$>
mapM updateGraphicContextUnit graphicContextUnits
updateContentUnit (TextContext instructionIds) = TextContext . concat <$>
evalStateT (mapM updateInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Renderer m (Indexed Text)
renderGraphicContextUnit (GraphicInstruction _) = return empty
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
updateGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Updater m IdGraphicContextUnit
updateGraphicContextUnit gI@(GraphicInstruction _) = return gI
updateGraphicContextUnit (ContentUnit contentUnit) =
ContentUnit <$> updateContentUnit contentUnit
renderInstructionId :: MonadFail m => Id Instruction -> TmpFont (Renderer m) (Indexed Text)
renderInstructionId instructionId@(Id n) = toMap <$>
(asks ((`at` instructionId)) >>= renderInstruction)
where
toMap = maybe empty (singleton (Id n))
updateInstructionId :: MonadFail m => Id Instruction -> TmpFont (Updater m) [Id Instruction]
updateInstructionId instructionId =
lift (gets (`at` instructionId)) >>= updateInstruction instructionId
renderInstruction :: MonadFail m => Instruction -> TmpFont (Renderer m) (Maybe Text)
renderInstruction (Text Tf, [NameObject fontName, _]) =
lift (lift $ asks (! fontName)) >>= put >> return Nothing
renderInstruction (Text Tstar, []) = return $ Just "\n"
renderInstruction (Text TJ, [Array arrayObject]) =
Just <$> foldM appendText "" arrayObject
where
appendText t (StringObject outputString) =
mappend t <$> decodeString outputString
appendText t _ = return t
renderInstruction (Text Tj, [StringObject outputString]) =
Just <$> decodeString outputString
renderInstruction (Text Quote, [StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
(Just . mappend "\n") <$> decodeString outputString
renderInstruction _ = return Nothing
updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TmpFont (Updater m) [Id Instruction]
updateInstruction instructionId (Text Tf, [NameObject fontName, _]) =
(lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId]
updateInstruction instructionId@(Id n) instruction = do
if emitsText $ fst instruction
then asks (Id.lookup (Id n)) >>= replaceText
else return [instructionId]
where
emitsText (Text Tstar) = True
emitsText (Text TJ) = True
emitsText (Text Tj) = True
emitsText (Text Quote) = True
emitsText (Text DQuote) = True
emitsText _ = False
replaceText = maybe (return []) $ \text -> do
lift $ modify (Id.delete instructionId)
format text >>= mapM (lift . Id.register)
format :: MonadFail m => Text -> StateT Font m [Instruction]
format input = do
case breakOn "\n" input of
("", "") -> return []
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
(line, left) -> (:) <$> tj line <*> format left
where
tj t = do
encoded <- either fail return =<< gets (($t) . encode)
return (Text Tj, [StringObject . Literal $ BS.concatMap escape encoded])
escape '\\' = "\\\\"
escape '(' = "\\("
escape ')' = "\\)"
escape c = BS.singleton c