Generalize the Indexed type to handle both arbitrary Content instructions and text-related ones that can be viewed as text chunks
This commit is contained in:
parent
1ec47c5d07
commit
44bc898ed3
2 changed files with 27 additions and 19 deletions
|
@ -9,8 +9,8 @@ module PDF.Content (
|
||||||
Content(..)
|
Content(..)
|
||||||
, ContentUnit(..)
|
, ContentUnit(..)
|
||||||
, GraphicContextUnit(..)
|
, GraphicContextUnit(..)
|
||||||
, IndexedInstructions
|
, Id(..)
|
||||||
, InstructionId
|
, Indexed
|
||||||
, TextContext
|
, TextContext
|
||||||
, content
|
, content
|
||||||
, parse
|
, parse
|
||||||
|
@ -29,34 +29,34 @@ import PDF.Object (blank, directObject)
|
||||||
import PDF.Output (Output(..), line)
|
import PDF.Output (Output(..), line)
|
||||||
import PDF.Parser (MonadParser, evalParser, string)
|
import PDF.Parser (MonadParser, evalParser, string)
|
||||||
|
|
||||||
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show)
|
newtype Id a = Id Int deriving (Eq, Ord, Show)
|
||||||
data Instructions = Instructions
|
data Instructions = Instructions
|
||||||
type IndexedInstructions = Map InstructionId Instruction
|
type Indexed a = Map (Id a) a
|
||||||
|
|
||||||
data GraphicContextUnit =
|
data GraphicContextUnit =
|
||||||
GraphicInstruction InstructionId
|
GraphicInstruction (Id Instruction)
|
||||||
| ContentUnit ContentUnit
|
| ContentUnit ContentUnit
|
||||||
deriving Show
|
deriving Show
|
||||||
type TextContext = [InstructionId]
|
type TextContext = [Id Instruction]
|
||||||
data ContentUnit =
|
data ContentUnit =
|
||||||
GraphicContext [GraphicContextUnit]
|
GraphicContext [GraphicContextUnit]
|
||||||
| TextContext TextContext
|
| TextContext TextContext
|
||||||
deriving Show
|
deriving Show
|
||||||
data Content = Content {
|
data Content = Content {
|
||||||
contentUnits :: [ContentUnit]
|
contentUnits :: [ContentUnit]
|
||||||
, indexedInstructions :: IndexedInstructions
|
, indexedInstructions :: Indexed Instruction
|
||||||
} deriving Show
|
} deriving Show
|
||||||
|
|
||||||
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m)
|
type InstructionParser m = (MonadParser m, MonadState (Indexed Instruction) m)
|
||||||
|
|
||||||
instance Monad m => Box m Instructions Content IndexedInstructions where
|
instance Monad m => Box m Instructions Content (Indexed Instruction) where
|
||||||
r Instructions = return . indexedInstructions
|
r Instructions = return . indexedInstructions
|
||||||
w Instructions indexedInstructions someContent =
|
w Instructions indexedInstructions someContent =
|
||||||
return $ someContent {indexedInstructions}
|
return $ someContent {indexedInstructions}
|
||||||
|
|
||||||
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId
|
register :: MonadState (Indexed Instruction) m => Instruction -> m (Id Instruction)
|
||||||
register newInstruction = do
|
register newInstruction = do
|
||||||
newInstructionID <- gets (InstructionId . Map.size)
|
newInstructionID <- gets (Id . Map.size)
|
||||||
modify (Map.insert newInstructionID newInstruction)
|
modify (Map.insert newInstructionID newInstruction)
|
||||||
return newInstructionID
|
return newInstructionID
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ graphicContextUnit =
|
||||||
(GraphicInstruction <$> instruction)
|
(GraphicInstruction <$> instruction)
|
||||||
<|> (ContentUnit <$> contentUnit)
|
<|> (ContentUnit <$> contentUnit)
|
||||||
|
|
||||||
instruction :: InstructionParser m => m InstructionId
|
instruction :: InstructionParser m => m (Id Instruction)
|
||||||
instruction = evalStateT stackParser [] >>= register
|
instruction = evalStateT stackParser [] >>= register
|
||||||
where
|
where
|
||||||
stackParser = ((directObject <* blank) >>= push) <|> operator
|
stackParser = ((directObject <* blank) >>= push) <|> operator
|
||||||
|
|
|
@ -2,6 +2,9 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE ConstraintKinds #-}
|
{-# LANGUAGE ConstraintKinds #-}
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
|
{-# LANGUAGE TypeSynonymInstances #-}
|
||||||
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
module PDF.Content.Text (
|
module PDF.Content.Text (
|
||||||
format
|
format
|
||||||
, renderText
|
, renderText
|
||||||
|
@ -14,19 +17,20 @@ import Control.Monad.State (MonadState(..), evalStateT)
|
||||||
import Data.ByteString.Char8 (pack)
|
import Data.ByteString.Char8 (pack)
|
||||||
import Data.Map ((!))
|
import Data.Map ((!))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
import PDF.Box (Box(..))
|
||||||
import PDF.Content (
|
import PDF.Content (
|
||||||
Content, ContentUnit(..), IndexedInstructions, InstructionId
|
Content, ContentUnit(..), Id(..), Indexed, GraphicContextUnit(..)
|
||||||
, GraphicContextUnit(..), contentUnits
|
, contentUnits
|
||||||
)
|
)
|
||||||
import qualified PDF.Content as Content (Content(..))
|
import qualified PDF.Content as Content (Content(..))
|
||||||
import PDF.Content.Operator (Instruction, Operator(..))
|
import PDF.Content.Operator (Instruction, Operator(..))
|
||||||
import PDF.Content.Operator.Text (Operator(..))
|
import PDF.Content.Operator.Text (Operator(..))
|
||||||
import PDF.Font (Font, FontSet, emptyFont)
|
import PDF.Font (Font(..), FontSet, emptyFont)
|
||||||
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
|
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
|
||||||
import Prelude hiding (fail)
|
import Prelude hiding (fail)
|
||||||
|
|
||||||
data ROContext = ROContext {
|
data ROContext = ROContext {
|
||||||
indexedInstructions :: IndexedInstructions
|
indexedInstructions :: Indexed Instruction
|
||||||
, fontSet :: FontSet
|
, fontSet :: FontSet
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,8 +39,12 @@ type FontContext m = (MonadState Font m, TextContent m)
|
||||||
|
|
||||||
decodeString :: FontContext m => StringObject -> m Text
|
decodeString :: FontContext m => StringObject -> m Text
|
||||||
decodeString input = do
|
decodeString input = do
|
||||||
font <- get
|
Font {decode} <- get
|
||||||
either fail return . font $ toByteString input
|
either fail return . decode $ toByteString input
|
||||||
|
|
||||||
|
data Chunks = Chunks
|
||||||
|
|
||||||
|
instance Monad m => Box m Chunks Content (Indexed Text)
|
||||||
|
|
||||||
renderText :: MonadFail m => FontSet -> Content -> m [Text]
|
renderText :: MonadFail m => FontSet -> Content -> m [Text]
|
||||||
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
|
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
|
||||||
|
@ -55,7 +63,7 @@ renderGraphicContextUnit (GraphicInstruction _) = return []
|
||||||
renderGraphicContextUnit (ContentUnit contentUnit) =
|
renderGraphicContextUnit (ContentUnit contentUnit) =
|
||||||
renderContentUnit contentUnit
|
renderContentUnit contentUnit
|
||||||
|
|
||||||
renderInstructionId :: FontContext m => InstructionId -> m [Text]
|
renderInstructionId :: FontContext m => Id Instruction -> m [Text]
|
||||||
renderInstructionId instructionId =
|
renderInstructionId instructionId =
|
||||||
asks ((! instructionId) . indexedInstructions) >>= renderInstruction
|
asks ((! instructionId) . indexedInstructions) >>= renderInstruction
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue