Generalize the Indexed type to handle both arbitrary Content instructions and text-related ones that can be viewed as text chunks
This commit is contained in:
parent
1ec47c5d07
commit
44bc898ed3
2 changed files with 27 additions and 19 deletions
|
@ -9,8 +9,8 @@ module PDF.Content (
|
|||
Content(..)
|
||||
, ContentUnit(..)
|
||||
, GraphicContextUnit(..)
|
||||
, IndexedInstructions
|
||||
, InstructionId
|
||||
, Id(..)
|
||||
, Indexed
|
||||
, TextContext
|
||||
, content
|
||||
, parse
|
||||
|
@ -29,34 +29,34 @@ import PDF.Object (blank, directObject)
|
|||
import PDF.Output (Output(..), line)
|
||||
import PDF.Parser (MonadParser, evalParser, string)
|
||||
|
||||
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show)
|
||||
newtype Id a = Id Int deriving (Eq, Ord, Show)
|
||||
data Instructions = Instructions
|
||||
type IndexedInstructions = Map InstructionId Instruction
|
||||
type Indexed a = Map (Id a) a
|
||||
|
||||
data GraphicContextUnit =
|
||||
GraphicInstruction InstructionId
|
||||
GraphicInstruction (Id Instruction)
|
||||
| ContentUnit ContentUnit
|
||||
deriving Show
|
||||
type TextContext = [InstructionId]
|
||||
type TextContext = [Id Instruction]
|
||||
data ContentUnit =
|
||||
GraphicContext [GraphicContextUnit]
|
||||
| TextContext TextContext
|
||||
deriving Show
|
||||
data Content = Content {
|
||||
contentUnits :: [ContentUnit]
|
||||
, indexedInstructions :: IndexedInstructions
|
||||
, indexedInstructions :: Indexed Instruction
|
||||
} deriving Show
|
||||
|
||||
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m)
|
||||
type InstructionParser m = (MonadParser m, MonadState (Indexed Instruction) m)
|
||||
|
||||
instance Monad m => Box m Instructions Content IndexedInstructions where
|
||||
instance Monad m => Box m Instructions Content (Indexed Instruction) where
|
||||
r Instructions = return . indexedInstructions
|
||||
w Instructions indexedInstructions someContent =
|
||||
return $ someContent {indexedInstructions}
|
||||
|
||||
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId
|
||||
register :: MonadState (Indexed Instruction) m => Instruction -> m (Id Instruction)
|
||||
register newInstruction = do
|
||||
newInstructionID <- gets (InstructionId . Map.size)
|
||||
newInstructionID <- gets (Id . Map.size)
|
||||
modify (Map.insert newInstructionID newInstruction)
|
||||
return newInstructionID
|
||||
|
||||
|
@ -80,7 +80,7 @@ graphicContextUnit =
|
|||
(GraphicInstruction <$> instruction)
|
||||
<|> (ContentUnit <$> contentUnit)
|
||||
|
||||
instruction :: InstructionParser m => m InstructionId
|
||||
instruction :: InstructionParser m => m (Id Instruction)
|
||||
instruction = evalStateT stackParser [] >>= register
|
||||
where
|
||||
stackParser = ((directObject <* blank) >>= push) <|> operator
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE TypeSynonymInstances #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
module PDF.Content.Text (
|
||||
format
|
||||
, renderText
|
||||
|
@ -14,19 +17,20 @@ import Control.Monad.State (MonadState(..), evalStateT)
|
|||
import Data.ByteString.Char8 (pack)
|
||||
import Data.Map ((!))
|
||||
import Data.Text (Text)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.Content (
|
||||
Content, ContentUnit(..), IndexedInstructions, InstructionId
|
||||
, GraphicContextUnit(..), contentUnits
|
||||
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
|
||||
, contentUnits
|
||||
)
|
||||
import qualified PDF.Content as Content (Content(..))
|
||||
import PDF.Content.Operator (Instruction, Operator(..))
|
||||
import PDF.Content.Operator.Text (Operator(..))
|
||||
import PDF.Font (Font, FontSet, emptyFont)
|
||||
import PDF.Font (Font(..), FontSet, emptyFont)
|
||||
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
|
||||
import Prelude hiding (fail)
|
||||
|
||||
data ROContext = ROContext {
|
||||
indexedInstructions :: IndexedInstructions
|
||||
indexedInstructions :: Indexed Instruction
|
||||
, fontSet :: FontSet
|
||||
}
|
||||
|
||||
|
@ -35,8 +39,12 @@ type FontContext m = (MonadState Font m, TextContent m)
|
|||
|
||||
decodeString :: FontContext m => StringObject -> m Text
|
||||
decodeString input = do
|
||||
font <- get
|
||||
either fail return . font $ toByteString input
|
||||
Font {decode} <- get
|
||||
either fail return . decode $ toByteString input
|
||||
|
||||
data Chunks = Chunks
|
||||
|
||||
instance Monad m => Box m Chunks Content (Indexed Text)
|
||||
|
||||
renderText :: MonadFail m => FontSet -> Content -> m [Text]
|
||||
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
|
||||
|
@ -55,7 +63,7 @@ renderGraphicContextUnit (GraphicInstruction _) = return []
|
|||
renderGraphicContextUnit (ContentUnit contentUnit) =
|
||||
renderContentUnit contentUnit
|
||||
|
||||
renderInstructionId :: FontContext m => InstructionId -> m [Text]
|
||||
renderInstructionId :: FontContext m => Id Instruction -> m [Text]
|
||||
renderInstructionId instructionId =
|
||||
asks ((! instructionId) . indexedInstructions) >>= renderInstruction
|
||||
|
||||
|
|
Loading…
Reference in a new issue