Trying to apply the same technique used for directObjects (apparently it ruins the performances)

This commit is contained in:
Tissevert 2020-02-13 16:21:44 +01:00
parent 919f640443
commit e429dbf946
2 changed files with 33 additions and 20 deletions

View file

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Content ( module PDF.Content (
Content(..) Content(..)
, ContentUnit(..) , ContentUnit(..)
@ -8,14 +9,13 @@ module PDF.Content (
, parse , parse
) where ) where
import Control.Applicative ((<|>)) import Control.Applicative ((<|>), many)
import Control.Monad.State (evalStateT, modify) import Control.Monad.State (MonadState(..), evalStateT, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import PDF.Content.Operator (Instruction, operator) import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject) import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line) import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, (<?>), evalParser, string) import PDF.Parser (MonadParser, endOfInput, evalParser, peek, string)
data GraphicContextUnit = data GraphicContextUnit =
GraphicInstruction Instruction GraphicInstruction Instruction
@ -29,33 +29,46 @@ data ContentUnit =
newtype Content = Content [ContentUnit] deriving Show newtype Content = Content [ContentUnit] deriving Show
content :: MonadParser m => m Content content :: MonadParser m => m Content
content = Content <$> contentUnit `sepBy` blank <?> "content" content = Content <$> many contentUnit <* blank <* endOfInput
contentUnit :: MonadParser m => m ContentUnit contentUnit :: MonadParser m => m ContentUnit
contentUnit = contentUnit = next >>= handle
(GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext)
where where
graphicContext = handle 'q' = GraphicContext <$> graphicContext
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" handle 'B' = TextContext <$> textContext
handle _ = fail "content unit"
graphicContextUnit :: MonadParser m => m GraphicContextUnit graphicContext :: MonadParser m => m [GraphicContextUnit]
graphicContextUnit = graphicContext = string "q" *> evalStateT graphicContextUnits []
(GraphicInstruction <$> instruction) where
<|> (ContentUnit <$> contentUnit) graphicContextUnits = next >>= handle
handle 'Q' = string "Q" *> (reverse <$> get)
handle c
| c `elem` ['q', 'B'] =
(ContentUnit <$> contentUnit >>= push) *> graphicContextUnits
| otherwise =
(GraphicInstruction <$> instruction >>= push) *> graphicContextUnits
instruction :: MonadParser m => m Instruction instruction :: MonadParser m => m Instruction
instruction = evalStateT stackParser [] instruction = evalStateT stackParser []
where where
stackParser = ((directObject <* blank) >>= push) <|> operator stackParser = ((directObject <* blank >>= push) *> stackParser) <|> operator
push arg = modify (arg:) *> stackParser
next :: MonadParser m => m Char
next = blank *> peek
push :: MonadState [a] m => a -> m ()
push a = modify (a:)
parse :: ByteString -> Either String Content parse :: ByteString -> Either String Content
parse = evalParser content () parse = evalParser content ()
textContext :: MonadParser m => m TextContext textContext :: MonadParser m => m TextContext
textContext = textContext = string "BT" *> evalStateT instructions []
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" where
instructions = next >>= handle
handle 'E' = string "ET" *> (reverse <$> get)
handle _ = (instruction >>= push) *> instructions
instance Output Content where instance Output Content where
output (Content contentUnits) = output contentUnits output (Content contentUnits) = output contentUnits

View file

@ -19,7 +19,7 @@ import PDF.Content.Operator.Common (Signature)
import qualified PDF.Content.Operator.GraphicState as GraphicState (Operator, signature) import qualified PDF.Content.Operator.GraphicState as GraphicState (Operator, signature)
import qualified PDF.Content.Operator.Path as Path (Operator, signature) import qualified PDF.Content.Operator.Path as Path (Operator, signature)
import qualified PDF.Content.Operator.Text as Text (Operator, signature) import qualified PDF.Content.Operator.Text as Text (Operator, signature)
import PDF.Object (DirectObject, blank, regular) import PDF.Object (DirectObject, regular)
import PDF.Output (Output(..), join, line) import PDF.Output (Output(..), join, line)
import PDF.Parser (MonadParser, takeAll1) import PDF.Parser (MonadParser, takeAll1)
import Prelude hiding (fail) import Prelude hiding (fail)
@ -66,7 +66,7 @@ type StackParser m = (MonadState [DirectObject] m, MonadParser m)
operator :: StackParser m => m Instruction operator :: StackParser m => m Instruction
operator = do operator = do
chunk <- takeAll1 regular <* blank chunk <- takeAll1 regular
args <- reverse <$> get args <- reverse <$> get
case operatorsTable !? chunk of case operatorsTable !? chunk of
Just (op, sig) Just (op, sig)