152 lines
5.8 KiB
Haskell
152 lines
5.8 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
module PDF.Content.Text (
|
|
Chunks(..)
|
|
, chunk
|
|
, format
|
|
) where
|
|
|
|
import Control.Monad (foldM)
|
|
import Control.Monad.Fail (MonadFail(..))
|
|
import Control.Monad.Reader (ReaderT, asks, runReaderT)
|
|
import Control.Monad.State (
|
|
MonadState(..), StateT, evalStateT, gets, modify, runStateT
|
|
)
|
|
import Control.Monad.Trans (lift)
|
|
import qualified Data.ByteString.Char8 as BS (concatMap, singleton)
|
|
import Data.Id (Id(..), Indexed, at, empty, singleton)
|
|
import qualified Data.Id as Id (delete, lookup, register)
|
|
import Data.Map ((!))
|
|
import Data.Text (Text, breakOn)
|
|
import qualified Data.Text as Text (drop)
|
|
import PDF.Box (Box(..))
|
|
import PDF.Content (
|
|
Content(..), ContentUnit(..), GraphicContextUnit(..), IdContentUnit
|
|
, IdGraphicContextUnit
|
|
)
|
|
import PDF.Content.Operator (Instruction, Operator(..))
|
|
import PDF.Content.Operator.Text (Operator(..))
|
|
import PDF.Font (Font(..), FontSet, emptyFont)
|
|
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
|
|
import Prelude hiding (fail)
|
|
|
|
type TmpFont = StateT Font
|
|
type Renderer m = ReaderT (Indexed Instruction) (ReaderT FontSet m)
|
|
type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m))
|
|
|
|
decodeString :: MonadFail m => StringObject -> TmpFont (Renderer m) Text
|
|
decodeString input = do
|
|
Font {decode} <- get
|
|
either fail return . decode $ toByteString input
|
|
|
|
data Chunks = Chunks
|
|
|
|
chunk :: Int -> Id Text
|
|
chunk = Id
|
|
|
|
instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
|
|
r Chunks content =
|
|
runReaderT (mconcat <$> renderer) $ indexedInstructions content
|
|
where
|
|
renderer = mapM renderContentUnit (contentUnits content)
|
|
|
|
w Chunks indexedText content = do
|
|
(contentUnits, indexedInstructions) <- runReaderT readerUpdate indexedText
|
|
return $ content {contentUnits, indexedInstructions}
|
|
where
|
|
stateUpdate = mapM updateContentUnit (contentUnits content)
|
|
readerUpdate = runStateT stateUpdate (indexedInstructions content)
|
|
|
|
renderContentUnit :: MonadFail m => IdContentUnit -> Renderer m (Indexed Text)
|
|
renderContentUnit (GraphicContext graphicContextUnits) =
|
|
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
|
|
renderContentUnit (TextContext instructionIds) =
|
|
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
|
|
|
|
updateContentUnit :: MonadFail m => IdContentUnit -> Updater m IdContentUnit
|
|
updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$>
|
|
mapM updateGraphicContextUnit graphicContextUnits
|
|
updateContentUnit (TextContext instructionIds) = TextContext . concat <$>
|
|
evalStateT (mapM updateInstructionId instructionIds) emptyFont
|
|
|
|
renderGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Renderer m (Indexed Text)
|
|
renderGraphicContextUnit (GraphicInstruction _) = return empty
|
|
renderGraphicContextUnit (ContentUnit contentUnit) =
|
|
renderContentUnit contentUnit
|
|
|
|
updateGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Updater m IdGraphicContextUnit
|
|
updateGraphicContextUnit gI@(GraphicInstruction _) = return gI
|
|
updateGraphicContextUnit (ContentUnit contentUnit) =
|
|
ContentUnit <$> updateContentUnit contentUnit
|
|
|
|
renderInstructionId :: MonadFail m => Id Instruction -> TmpFont (Renderer m) (Indexed Text)
|
|
renderInstructionId instructionId@(Id n) = toMap <$>
|
|
(asks ((`at` instructionId)) >>= renderInstruction)
|
|
where
|
|
toMap = maybe empty (singleton (Id n))
|
|
|
|
updateInstructionId :: MonadFail m => Id Instruction -> TmpFont (Updater m) [Id Instruction]
|
|
updateInstructionId instructionId =
|
|
lift (gets (`at` instructionId)) >>= updateInstruction instructionId
|
|
|
|
renderInstruction :: MonadFail m => Instruction -> TmpFont (Renderer m) (Maybe Text)
|
|
renderInstruction (Text Tf, [NameObject fontName, _]) =
|
|
lift (lift $ asks (! fontName)) >>= put >> return Nothing
|
|
|
|
renderInstruction (Text Tstar, []) = return $ Just "\n"
|
|
|
|
renderInstruction (Text TJ, [Array arrayObject]) =
|
|
Just <$> foldM appendText "" arrayObject
|
|
where
|
|
appendText t (StringObject outputString) =
|
|
mappend t <$> decodeString outputString
|
|
appendText t _ = return t
|
|
|
|
renderInstruction (Text Tj, [StringObject outputString]) =
|
|
Just <$> decodeString outputString
|
|
|
|
renderInstruction (Text Quote, [StringObject outputString]) =
|
|
(Just . mappend "\n") <$> decodeString outputString
|
|
|
|
renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
|
|
(Just . mappend "\n") <$> decodeString outputString
|
|
|
|
renderInstruction _ = return Nothing
|
|
|
|
updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TmpFont (Updater m) [Id Instruction]
|
|
updateInstruction instructionId (Text Tf, [NameObject fontName, _]) =
|
|
(lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId]
|
|
|
|
updateInstruction instructionId@(Id n) instruction = do
|
|
if emitsText $ fst instruction
|
|
then asks (Id.lookup (Id n)) >>= replaceText
|
|
else return [instructionId]
|
|
where
|
|
emitsText (Text Tstar) = True
|
|
emitsText (Text TJ) = True
|
|
emitsText (Text Tj) = True
|
|
emitsText (Text Quote) = True
|
|
emitsText (Text DQuote) = True
|
|
emitsText _ = False
|
|
replaceText = maybe (return []) $ \text -> do
|
|
lift $ modify (Id.delete instructionId)
|
|
format text >>= mapM (lift . Id.register)
|
|
|
|
format :: MonadFail m => Text -> StateT Font m [Instruction]
|
|
format input = do
|
|
case breakOn "\n" input of
|
|
("", "") -> return []
|
|
("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
|
|
(line, left) -> (:) <$> tj line <*> format left
|
|
where
|
|
tj t = do
|
|
encoded <- either fail return =<< gets (($t) . encode)
|
|
return (Text Tj, [StringObject . Literal $ BS.concatMap escape encoded])
|
|
escape '\\' = "\\\\"
|
|
escape '(' = "\\("
|
|
escape ')' = "\\)"
|
|
escape c = BS.singleton c
|