Generalize the Indexed type to handle both arbitrary Content instructions and text-related ones that can be viewed as text chunks

This commit is contained in:
Tissevert 2020-03-06 19:21:16 +01:00
parent 1ec47c5d07
commit 44bc898ed3
2 changed files with 27 additions and 19 deletions

View file

@ -9,8 +9,8 @@ module PDF.Content (
Content(..) Content(..)
, ContentUnit(..) , ContentUnit(..)
, GraphicContextUnit(..) , GraphicContextUnit(..)
, IndexedInstructions , Id(..)
, InstructionId , Indexed
, TextContext , TextContext
, content , content
, parse , parse
@ -29,34 +29,34 @@ import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line) import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, evalParser, string) import PDF.Parser (MonadParser, evalParser, string)
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show) newtype Id a = Id Int deriving (Eq, Ord, Show)
data Instructions = Instructions data Instructions = Instructions
type IndexedInstructions = Map InstructionId Instruction type Indexed a = Map (Id a) a
data GraphicContextUnit = data GraphicContextUnit =
GraphicInstruction InstructionId GraphicInstruction (Id Instruction)
| ContentUnit ContentUnit | ContentUnit ContentUnit
deriving Show deriving Show
type TextContext = [InstructionId] type TextContext = [Id Instruction]
data ContentUnit = data ContentUnit =
GraphicContext [GraphicContextUnit] GraphicContext [GraphicContextUnit]
| TextContext TextContext | TextContext TextContext
deriving Show deriving Show
data Content = Content { data Content = Content {
contentUnits :: [ContentUnit] contentUnits :: [ContentUnit]
, indexedInstructions :: IndexedInstructions , indexedInstructions :: Indexed Instruction
} deriving Show } deriving Show
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m) type InstructionParser m = (MonadParser m, MonadState (Indexed Instruction) m)
instance Monad m => Box m Instructions Content IndexedInstructions where instance Monad m => Box m Instructions Content (Indexed Instruction) where
r Instructions = return . indexedInstructions r Instructions = return . indexedInstructions
w Instructions indexedInstructions someContent = w Instructions indexedInstructions someContent =
return $ someContent {indexedInstructions} return $ someContent {indexedInstructions}
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId register :: MonadState (Indexed Instruction) m => Instruction -> m (Id Instruction)
register newInstruction = do register newInstruction = do
newInstructionID <- gets (InstructionId . Map.size) newInstructionID <- gets (Id . Map.size)
modify (Map.insert newInstructionID newInstruction) modify (Map.insert newInstructionID newInstruction)
return newInstructionID return newInstructionID
@ -80,7 +80,7 @@ graphicContextUnit =
(GraphicInstruction <$> instruction) (GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit) <|> (ContentUnit <$> contentUnit)
instruction :: InstructionParser m => m InstructionId instruction :: InstructionParser m => m (Id Instruction)
instruction = evalStateT stackParser [] >>= register instruction = evalStateT stackParser [] >>= register
where where
stackParser = ((directObject <* blank) >>= push) <|> operator stackParser = ((directObject <* blank) >>= push) <|> operator

View file

@ -2,6 +2,9 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text ( module PDF.Content.Text (
format format
, renderText , renderText
@ -14,19 +17,20 @@ import Control.Monad.State (MonadState(..), evalStateT)
import Data.ByteString.Char8 (pack) import Data.ByteString.Char8 (pack)
import Data.Map ((!)) import Data.Map ((!))
import Data.Text (Text) import Data.Text (Text)
import PDF.Box (Box(..))
import PDF.Content ( import PDF.Content (
Content, ContentUnit(..), IndexedInstructions, InstructionId Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
, GraphicContextUnit(..), contentUnits , contentUnits
) )
import qualified PDF.Content as Content (Content(..)) import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..)) import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font, FontSet, emptyFont) import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString) import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail) import Prelude hiding (fail)
data ROContext = ROContext { data ROContext = ROContext {
indexedInstructions :: IndexedInstructions indexedInstructions :: Indexed Instruction
, fontSet :: FontSet , fontSet :: FontSet
} }
@ -35,8 +39,12 @@ type FontContext m = (MonadState Font m, TextContent m)
decodeString :: FontContext m => StringObject -> m Text decodeString :: FontContext m => StringObject -> m Text
decodeString input = do decodeString input = do
font <- get Font {decode} <- get
either fail return . font $ toByteString input either fail return . decode $ toByteString input
data Chunks = Chunks
instance Monad m => Box m Chunks Content (Indexed Text)
renderText :: MonadFail m => FontSet -> Content -> m [Text] renderText :: MonadFail m => FontSet -> Content -> m [Text]
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) = renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
@ -55,7 +63,7 @@ renderGraphicContextUnit (GraphicInstruction _) = return []
renderGraphicContextUnit (ContentUnit contentUnit) = renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit renderContentUnit contentUnit
renderInstructionId :: FontContext m => InstructionId -> m [Text] renderInstructionId :: FontContext m => Id Instruction -> m [Text]
renderInstructionId instructionId = renderInstructionId instructionId =
asks ((! instructionId) . indexedInstructions) >>= renderInstruction asks ((! instructionId) . indexedInstructions) >>= renderInstruction