Trying to apply the same technique used for directObjects (apparently it ruins the performances)

This commit is contained in:
Tissevert 2020-02-13 16:21:44 +01:00
parent 919f640443
commit e429dbf946
2 changed files with 33 additions and 20 deletions

View file

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Content (
Content(..)
, ContentUnit(..)
@ -8,14 +9,13 @@ module PDF.Content (
, parse
) where
import Control.Applicative ((<|>))
import Control.Monad.State (evalStateT, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy)
import Control.Applicative ((<|>), many)
import Control.Monad.State (MonadState(..), evalStateT, modify)
import Data.ByteString (ByteString)
import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, (<?>), evalParser, string)
import PDF.Parser (MonadParser, endOfInput, evalParser, peek, string)
data GraphicContextUnit =
GraphicInstruction Instruction
@ -29,33 +29,46 @@ data ContentUnit =
newtype Content = Content [ContentUnit] deriving Show
content :: MonadParser m => m Content
content = Content <$> contentUnit `sepBy` blank <?> "content"
content = Content <$> many contentUnit <* blank <* endOfInput
contentUnit :: MonadParser m => m ContentUnit
contentUnit =
(GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext)
contentUnit = next >>= handle
where
graphicContext =
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
handle 'q' = GraphicContext <$> graphicContext
handle 'B' = TextContext <$> textContext
handle _ = fail "content unit"
graphicContextUnit :: MonadParser m => m GraphicContextUnit
graphicContextUnit =
(GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit)
graphicContext :: MonadParser m => m [GraphicContextUnit]
graphicContext = string "q" *> evalStateT graphicContextUnits []
where
graphicContextUnits = next >>= handle
handle 'Q' = string "Q" *> (reverse <$> get)
handle c
| c `elem` ['q', 'B'] =
(ContentUnit <$> contentUnit >>= push) *> graphicContextUnits
| otherwise =
(GraphicInstruction <$> instruction >>= push) *> graphicContextUnits
instruction :: MonadParser m => m Instruction
instruction = evalStateT stackParser []
where
stackParser = ((directObject <* blank) >>= push) <|> operator
push arg = modify (arg:) *> stackParser
stackParser = ((directObject <* blank >>= push) *> stackParser) <|> operator
next :: MonadParser m => m Char
next = blank *> peek
push :: MonadState [a] m => a -> m ()
push a = modify (a:)
parse :: ByteString -> Either String Content
parse = evalParser content ()
textContext :: MonadParser m => m TextContext
textContext =
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
textContext = string "BT" *> evalStateT instructions []
where
instructions = next >>= handle
handle 'E' = string "ET" *> (reverse <$> get)
handle _ = (instruction >>= push) *> instructions
instance Output Content where
output (Content contentUnits) = output contentUnits

View file

@ -19,7 +19,7 @@ import PDF.Content.Operator.Common (Signature)
import qualified PDF.Content.Operator.GraphicState as GraphicState (Operator, signature)
import qualified PDF.Content.Operator.Path as Path (Operator, signature)
import qualified PDF.Content.Operator.Text as Text (Operator, signature)
import PDF.Object (DirectObject, blank, regular)
import PDF.Object (DirectObject, regular)
import PDF.Output (Output(..), join, line)
import PDF.Parser (MonadParser, takeAll1)
import Prelude hiding (fail)
@ -66,7 +66,7 @@ type StackParser m = (MonadState [DirectObject] m, MonadParser m)
operator :: StackParser m => m Instruction
operator = do
chunk <- takeAll1 regular <* blank
chunk <- takeAll1 regular
args <- reverse <$> get
case operatorsTable !? chunk of
Just (op, sig)