Generalize the Indexed type to handle both arbitrary Content instructions and text-related ones that can be viewed as text chunks

This commit is contained in:
Tissevert 2020-03-06 19:21:16 +01:00
parent 1ec47c5d07
commit 44bc898ed3
2 changed files with 27 additions and 19 deletions

View file

@ -9,8 +9,8 @@ module PDF.Content (
Content(..)
, ContentUnit(..)
, GraphicContextUnit(..)
, IndexedInstructions
, InstructionId
, Id(..)
, Indexed
, TextContext
, content
, parse
@ -29,34 +29,34 @@ import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, evalParser, string)
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show)
newtype Id a = Id Int deriving (Eq, Ord, Show)
data Instructions = Instructions
type IndexedInstructions = Map InstructionId Instruction
type Indexed a = Map (Id a) a
data GraphicContextUnit =
GraphicInstruction InstructionId
GraphicInstruction (Id Instruction)
| ContentUnit ContentUnit
deriving Show
type TextContext = [InstructionId]
type TextContext = [Id Instruction]
data ContentUnit =
GraphicContext [GraphicContextUnit]
| TextContext TextContext
deriving Show
data Content = Content {
contentUnits :: [ContentUnit]
, indexedInstructions :: IndexedInstructions
, indexedInstructions :: Indexed Instruction
} deriving Show
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m)
type InstructionParser m = (MonadParser m, MonadState (Indexed Instruction) m)
instance Monad m => Box m Instructions Content IndexedInstructions where
instance Monad m => Box m Instructions Content (Indexed Instruction) where
r Instructions = return . indexedInstructions
w Instructions indexedInstructions someContent =
return $ someContent {indexedInstructions}
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId
register :: MonadState (Indexed Instruction) m => Instruction -> m (Id Instruction)
register newInstruction = do
newInstructionID <- gets (InstructionId . Map.size)
newInstructionID <- gets (Id . Map.size)
modify (Map.insert newInstructionID newInstruction)
return newInstructionID
@ -80,7 +80,7 @@ graphicContextUnit =
(GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit)
instruction :: InstructionParser m => m InstructionId
instruction :: InstructionParser m => m (Id Instruction)
instruction = evalStateT stackParser [] >>= register
where
stackParser = ((directObject <* blank) >>= push) <|> operator

View file

@ -2,6 +2,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
format
, renderText
@ -14,19 +17,20 @@ import Control.Monad.State (MonadState(..), evalStateT)
import Data.ByteString.Char8 (pack)
import Data.Map ((!))
import Data.Text (Text)
import PDF.Box (Box(..))
import PDF.Content (
Content, ContentUnit(..), IndexedInstructions, InstructionId
, GraphicContextUnit(..), contentUnits
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
, contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font, FontSet, emptyFont)
import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
data ROContext = ROContext {
indexedInstructions :: IndexedInstructions
indexedInstructions :: Indexed Instruction
, fontSet :: FontSet
}
@ -35,8 +39,12 @@ type FontContext m = (MonadState Font m, TextContent m)
decodeString :: FontContext m => StringObject -> m Text
decodeString input = do
font <- get
either fail return . font $ toByteString input
Font {decode} <- get
either fail return . decode $ toByteString input
data Chunks = Chunks
instance Monad m => Box m Chunks Content (Indexed Text)
renderText :: MonadFail m => FontSet -> Content -> m [Text]
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
@ -55,7 +63,7 @@ renderGraphicContextUnit (GraphicInstruction _) = return []
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
renderInstructionId :: FontContext m => InstructionId -> m [Text]
renderInstructionId :: FontContext m => Id Instruction -> m [Text]
renderInstructionId instructionId =
asks ((! instructionId) . indexedInstructions) >>= renderInstruction