Add IDs to Instructions so that they can be selected in a given Content (and modified one day…)

This commit is contained in:
Tissevert 2020-02-23 22:21:09 +01:00
parent 160999a7d7
commit 71e62ee732
2 changed files with 73 additions and 27 deletions

View File

@ -1,37 +1,64 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Content (
Content(..)
, ContentUnit(..)
, GraphicContextUnit(..)
, IndexedInstructions
, InstructionId
, TextContext
, content
, parse
) where
import Control.Applicative ((<|>))
import Control.Monad.State (evalStateT, modify)
import Control.Monad.Reader (asks, runReader)
import Control.Monad.State (MonadState, evalStateT, gets, modify, runStateT)
import Data.Attoparsec.ByteString.Char8 (sepBy)
import Data.ByteString (ByteString)
import Data.Map (Map, (!))
import qualified Data.Map as Map (empty, insert, size)
import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, (<?>), evalParser, string)
import PDF.Parser (MonadParser, evalParser, string)
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show)
type IndexedInstructions = Map InstructionId Instruction
data GraphicContextUnit =
GraphicInstruction Instruction
GraphicInstruction InstructionId
| ContentUnit ContentUnit
deriving Show
type TextContext = [Instruction]
type TextContext = [InstructionId]
data ContentUnit =
GraphicContext [GraphicContextUnit]
| TextContext TextContext
deriving Show
newtype Content = Content [ContentUnit] deriving Show
data Content = Content {
contentUnits :: [ContentUnit]
, indexedInstructions :: IndexedInstructions
} deriving Show
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m)
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId
register newInstruction = do
newInstructionID <- gets (InstructionId . Map.size)
modify (Map.insert newInstructionID newInstruction)
return newInstructionID
content :: MonadParser m => m Content
content = Content <$> contentUnit `sepBy` blank <?> "content"
content = do
(contentUnits, indexedInstructions) <- runInstructionParser
return $ Content {contentUnits, indexedInstructions}
where
runInstructionParser = runStateT (contentUnit `sepBy` blank) Map.empty
contentUnit :: MonadParser m => m ContentUnit
contentUnit :: InstructionParser m => m ContentUnit
contentUnit =
(GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext)
@ -39,13 +66,13 @@ contentUnit =
graphicContext =
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
graphicContextUnit :: MonadParser m => m GraphicContextUnit
graphicContextUnit :: InstructionParser m => m GraphicContextUnit
graphicContextUnit =
(GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit)
instruction :: MonadParser m => m Instruction
instruction = evalStateT stackParser []
instruction :: InstructionParser m => m InstructionId
instruction = evalStateT stackParser [] >>= register
where
stackParser = ((directObject <* blank) >>= push) <|> operator
push arg = modify (arg:) *> stackParser
@ -53,17 +80,20 @@ instruction = evalStateT stackParser []
parse :: ByteString -> Either String Content
parse = evalParser content ()
textContext :: MonadParser m => m TextContext
textContext :: InstructionParser m => m TextContext
textContext =
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
instance Output Content where
output (Content contentUnits) = output contentUnits
instance Output ContentUnit where
output (GraphicContext gc) = line "q" `mappend` output gc `mappend` line "Q"
output (TextContext tc) = line "BT" `mappend` output tc `mappend` line "ET"
instance Output GraphicContextUnit where
output (GraphicInstruction gi) = output gi
output (ContentUnit cu) = output cu
output (Content {contentUnits, indexedInstructions}) =
runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
where
outputCU (GraphicContext gc) = do
inside <- mconcat <$> mapM outputGCU gc
return (line "q" `mappend` inside `mappend` line "Q")
outputCU (TextContext tc) = do
inside <- mconcat <$> mapM outputIId tc
return (line "BT" `mappend` inside `mappend` line "ET")
outputGCU (GraphicInstruction gi) = outputIId gi
outputGCU (ContentUnit cu) = outputCU cu
outputIId instructionId = asks (output . (! instructionId))

View File

@ -1,3 +1,4 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
@ -13,14 +14,23 @@ import Control.Monad.State (MonadState(..), evalStateT)
import Data.ByteString.Char8 (pack)
import Data.Map ((!))
import Data.Text (Text)
import PDF.Content (Content(..), ContentUnit(..), GraphicContextUnit(..))
import PDF.Content (
Content, ContentUnit(..), IndexedInstructions, InstructionId
, GraphicContextUnit(..), contentUnits
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font, FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
type TextContent m = (MonadReader FontSet m, MonadFail m)
data ROContext = ROContext {
indexedInstructions :: IndexedInstructions
, fontSet :: FontSet
}
type TextContent m = (MonadReader ROContext m, MonadFail m)
type FontContext m = (MonadState Font m, TextContent m)
decodeString :: FontContext m => StringObject -> m Text
@ -29,23 +39,29 @@ decodeString input = do
either fail return . font $ toByteString input
renderText :: MonadFail m => FontSet -> Content -> m [Text]
renderText fontSet (Content contentUnits) =
runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (concat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: TextContent m => ContentUnit -> m [Text]
renderContentUnit (GraphicContext graphicContextUnits) =
concat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructions) =
evalStateT (concat <$> mapM renderInstruction instructions) emptyFont
renderContentUnit (TextContext instructionIds) =
evalStateT (concat <$> mapM renderInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text]
renderGraphicContextUnit (GraphicInstruction _) = return []
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
renderInstructionId :: FontContext m => InstructionId -> m [Text]
renderInstructionId instructionId =
asks ((! instructionId) . indexedInstructions) >>= renderInstruction
renderInstruction :: FontContext m => Instruction -> m [Text]
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks (! fontName) >>= put >> return []
asks ((! fontName) . fontSet) >>= put >> return []
renderInstruction (Text Tstar, []) = return ["\n"]