Add IDs to Instructions so that they can be selected in a given Content (and modified one day…)

This commit is contained in:
Tissevert 2020-02-23 22:21:09 +01:00
parent 160999a7d7
commit 71e62ee732
2 changed files with 73 additions and 27 deletions

View File

@ -1,37 +1,64 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Content ( module PDF.Content (
Content(..) Content(..)
, ContentUnit(..) , ContentUnit(..)
, GraphicContextUnit(..) , GraphicContextUnit(..)
, IndexedInstructions
, InstructionId
, TextContext , TextContext
, content , content
, parse , parse
) where ) where
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Control.Monad.State (evalStateT, modify) import Control.Monad.Reader (asks, runReader)
import Control.Monad.State (MonadState, evalStateT, gets, modify, runStateT)
import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.Attoparsec.ByteString.Char8 (sepBy)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Map (Map, (!))
import qualified Data.Map as Map (empty, insert, size)
import PDF.Content.Operator (Instruction, operator) import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject) import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line) import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, (<?>), evalParser, string) import PDF.Parser (MonadParser, evalParser, string)
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show)
type IndexedInstructions = Map InstructionId Instruction
data GraphicContextUnit = data GraphicContextUnit =
GraphicInstruction Instruction GraphicInstruction InstructionId
| ContentUnit ContentUnit | ContentUnit ContentUnit
deriving Show deriving Show
type TextContext = [Instruction] type TextContext = [InstructionId]
data ContentUnit = data ContentUnit =
GraphicContext [GraphicContextUnit] GraphicContext [GraphicContextUnit]
| TextContext TextContext | TextContext TextContext
deriving Show deriving Show
newtype Content = Content [ContentUnit] deriving Show data Content = Content {
contentUnits :: [ContentUnit]
, indexedInstructions :: IndexedInstructions
} deriving Show
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m)
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId
register newInstruction = do
newInstructionID <- gets (InstructionId . Map.size)
modify (Map.insert newInstructionID newInstruction)
return newInstructionID
content :: MonadParser m => m Content content :: MonadParser m => m Content
content = Content <$> contentUnit `sepBy` blank <?> "content" content = do
(contentUnits, indexedInstructions) <- runInstructionParser
return $ Content {contentUnits, indexedInstructions}
where
runInstructionParser = runStateT (contentUnit `sepBy` blank) Map.empty
contentUnit :: MonadParser m => m ContentUnit contentUnit :: InstructionParser m => m ContentUnit
contentUnit = contentUnit =
(GraphicContext <$> graphicContext) (GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext) <|> (TextContext <$> textContext)
@ -39,13 +66,13 @@ contentUnit =
graphicContext = graphicContext =
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
graphicContextUnit :: MonadParser m => m GraphicContextUnit graphicContextUnit :: InstructionParser m => m GraphicContextUnit
graphicContextUnit = graphicContextUnit =
(GraphicInstruction <$> instruction) (GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit) <|> (ContentUnit <$> contentUnit)
instruction :: MonadParser m => m Instruction instruction :: InstructionParser m => m InstructionId
instruction = evalStateT stackParser [] instruction = evalStateT stackParser [] >>= register
where where
stackParser = ((directObject <* blank) >>= push) <|> operator stackParser = ((directObject <* blank) >>= push) <|> operator
push arg = modify (arg:) *> stackParser push arg = modify (arg:) *> stackParser
@ -53,17 +80,20 @@ instruction = evalStateT stackParser []
parse :: ByteString -> Either String Content parse :: ByteString -> Either String Content
parse = evalParser content () parse = evalParser content ()
textContext :: MonadParser m => m TextContext textContext :: InstructionParser m => m TextContext
textContext = textContext =
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
instance Output Content where instance Output Content where
output (Content contentUnits) = output contentUnits output (Content {contentUnits, indexedInstructions}) =
runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
instance Output ContentUnit where where
output (GraphicContext gc) = line "q" `mappend` output gc `mappend` line "Q" outputCU (GraphicContext gc) = do
output (TextContext tc) = line "BT" `mappend` output tc `mappend` line "ET" inside <- mconcat <$> mapM outputGCU gc
return (line "q" `mappend` inside `mappend` line "Q")
instance Output GraphicContextUnit where outputCU (TextContext tc) = do
output (GraphicInstruction gi) = output gi inside <- mconcat <$> mapM outputIId tc
output (ContentUnit cu) = output cu return (line "BT" `mappend` inside `mappend` line "ET")
outputGCU (GraphicInstruction gi) = outputIId gi
outputGCU (ContentUnit cu) = outputCU cu
outputIId instructionId = asks (output . (! instructionId))

View File

@ -1,3 +1,4 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
@ -13,14 +14,23 @@ import Control.Monad.State (MonadState(..), evalStateT)
import Data.ByteString.Char8 (pack) import Data.ByteString.Char8 (pack)
import Data.Map ((!)) import Data.Map ((!))
import Data.Text (Text) import Data.Text (Text)
import PDF.Content (Content(..), ContentUnit(..), GraphicContextUnit(..)) import PDF.Content (
Content, ContentUnit(..), IndexedInstructions, InstructionId
, GraphicContextUnit(..), contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..)) import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font, FontSet, emptyFont) import PDF.Font (Font, FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString) import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail) import Prelude hiding (fail)
type TextContent m = (MonadReader FontSet m, MonadFail m) data ROContext = ROContext {
indexedInstructions :: IndexedInstructions
, fontSet :: FontSet
}
type TextContent m = (MonadReader ROContext m, MonadFail m)
type FontContext m = (MonadState Font m, TextContent m) type FontContext m = (MonadState Font m, TextContent m)
decodeString :: FontContext m => StringObject -> m Text decodeString :: FontContext m => StringObject -> m Text
@ -29,23 +39,29 @@ decodeString input = do
either fail return . font $ toByteString input either fail return . font $ toByteString input
renderText :: MonadFail m => FontSet -> Content -> m [Text] renderText :: MonadFail m => FontSet -> Content -> m [Text]
renderText fontSet (Content contentUnits) = renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet runReaderT (concat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: TextContent m => ContentUnit -> m [Text] renderContentUnit :: TextContent m => ContentUnit -> m [Text]
renderContentUnit (GraphicContext graphicContextUnits) = renderContentUnit (GraphicContext graphicContextUnits) =
concat <$> mapM renderGraphicContextUnit graphicContextUnits concat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructions) = renderContentUnit (TextContext instructionIds) =
evalStateT (concat <$> mapM renderInstruction instructions) emptyFont evalStateT (concat <$> mapM renderInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text] renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text]
renderGraphicContextUnit (GraphicInstruction _) = return [] renderGraphicContextUnit (GraphicInstruction _) = return []
renderGraphicContextUnit (ContentUnit contentUnit) = renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit renderContentUnit contentUnit
renderInstructionId :: FontContext m => InstructionId -> m [Text]
renderInstructionId instructionId =
asks ((! instructionId) . indexedInstructions) >>= renderInstruction
renderInstruction :: FontContext m => Instruction -> m [Text] renderInstruction :: FontContext m => Instruction -> m [Text]
renderInstruction (Text Tf, [NameObject fontName, _]) = renderInstruction (Text Tf, [NameObject fontName, _]) =
asks (! fontName) >>= put >> return [] asks ((! fontName) . fontSet) >>= put >> return []
renderInstruction (Text Tstar, []) = return ["\n"] renderInstruction (Text Tstar, []) = return ["\n"]