From cefb08ee50c5b2096770e03cf304f6e7fb98a9f7 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Sat, 30 Nov 2019 21:46:22 +0100 Subject: [PATCH] =?UTF-8?q?Going=20a=20step=20further=20in=20=C2=ABoptimiz?= =?UTF-8?q?ation=C2=BB=20(slowing=20it=20even=20more=E2=80=A6)=20by=20repl?= =?UTF-8?q?acing=20choice=20by=20a=20search=20in=20a=20Map?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/PDF/Text.hs | 78 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/src/PDF/Text.hs b/src/PDF/Text.hs index 70b15b9..a2c7d45 100644 --- a/src/PDF/Text.hs +++ b/src/PDF/Text.hs @@ -14,8 +14,8 @@ import Data.Attoparsec.ByteString.Char8 (choice, sepBy) import Data.ByteString (ByteString) import qualified Data.ByteString as BS (drop, null, take) import Data.ByteString.Char8 (pack, unpack) -import Data.Map ((!)) -import qualified Data.Map as Map (lookup, toList) +import Data.Map ((!), (!?), Map) +import qualified Data.Map as Map (fromList, lookup, toList) import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap) import PDF.Object ( DirectObject(..), StringObject(..) @@ -62,7 +62,20 @@ instance Show TextOperator where show Tr = "Tr" show Ts = "Ts" -stateOperator :: TypeChecker StateOperator +stateOperator :: OperatorTable StateOperator +stateOperator = Map.fromList [ + ("cm", (Cm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) + , ("w", (W, \l -> case l of [Raw _] -> True ; _ -> False)) + , ("J", (J, \l -> case l of [Raw _] -> True ; _ -> False)) + , ("j", (J_, \l -> case l of [Raw _] -> True ; _ -> False)) + , ("M", (M, \l -> case l of [Raw _] -> True ; _ -> False)) + , ("d", (D, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)) + , ("ri", (Ri, \l -> case l of [Raw _] -> True ; _ -> False)) + , ("i", (I, \l -> case l of [Raw _] -> True ; _ -> False)) + , ("gs", (Gs, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)) + ] + +{- stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True stateOperator (W, [Raw _]) = True stateOperator (J, [Raw _]) = True @@ -73,8 +86,28 @@ stateOperator (Ri, [Raw _]) = True stateOperator (I, [Raw _]) = True stateOperator (Gs, [Typed (NameObject _)]) = True stateOperator _ = False +-} -textOperator :: TypeChecker TextOperator +textOperator :: OperatorTable TextOperator +textOperator = Map.fromList $ (\(op, checker) -> (pack $ show op, (op, checker))) <$> [ + (Td, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) + , (TD, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) + , (Tm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (Tstar, \l -> case l of [] -> True ; _ -> False) + , (TJ, \l -> case l of [Typed (Array _)] -> True ; _ -> False) + , (Tj, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False) + , (Quote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False) + , (DQuote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False) + , (Tc, \l -> case l of [Raw _] -> True ; _ -> False) + , (Tw, \l -> case l of [Raw _] -> True ; _ -> False) + , (Tz, \l -> case l of [Raw _] -> True ; _ -> False) + , (TL, \l -> case l of [Raw _] -> True ; _ -> False) + , (Tf, \l -> case l of [Typed (NameObject _), Raw _] -> True ; _ -> False) + , (Tr, \l -> case l of [Raw _] -> True ; _ -> False) + , (Ts, \l -> case l of [Raw _] -> True ; _ -> False) + ] + + {- textOperator (Td, [Raw _, Raw _]) = True textOperator (TD, [Raw _, Raw _]) = True textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True @@ -91,30 +124,39 @@ textOperator (Tf, [Typed (NameObject _), Raw _]) = True textOperator (Tr, [Raw _]) = True textOperator (Ts, [Raw _]) = True textOperator _ = False +-} type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m) type Operator a = (Bounded a, Enum a, Show a) -type TypeChecker a = Call a -> Bool +type OperatorTable a = Map ByteString (TypeChecker a) +type TypeChecker a = (a, [Argument] -> Bool) parseShowable :: (Show a, MonadParser m) => a -> m a parseShowable textOp = string (pack $ show textOp) *> return textOp -callChunk :: (MonadParser m, Operator a) => m (Either a Argument) -callChunk = - Left <$> choice (parseShowable <$> [minBound .. maxBound]) - <|> Right <$> choice [stringArg, nameArg, arrayArg, argument] +callChunk :: MonadParser m => OperatorTable a -> m (Either (TypeChecker a) Argument) +callChunk table = + (Right <$> choice [stringArg, nameArg, arrayArg]) + <|> operatorOrRawArg "call chunk" - -stackParser :: (ArgumentStackParser m, Operator a) => TypeChecker a -> m (Call a) -stackParser typeChecker = either popCall push =<< callChunk where - push arg = modify (arg:) >> stackParser typeChecker - popCall operator = do - call <- (,) operator . reverse <$> get - if typeChecker call then return call else fail (show call) + operatorOrRawArg = do + chunk <- takeAll1 regular <* blank + case table !? chunk of + Nothing -> return . Right $ Raw chunk + Just typeChecker -> return $ Left typeChecker -a :: (Operator a, MonadParser m) => TypeChecker a -> m (Call a) -a typeChecker = evalStateT (stackParser typeChecker) [] +stackParser :: (ArgumentStackParser m, Show a) => OperatorTable a -> m (Call a) +stackParser table = either popCall push =<< (callChunk table) + where + push arg = modify (arg:) >> stackParser table + popCall (operator, predicate) = do + arguments <- reverse <$> get + let call = (operator, arguments) + if predicate arguments then return call else fail (show call) + +a :: (Operator a, MonadParser m) => OperatorTable a -> m (Call a) +a table = evalStateT (stackParser table) [] argument :: MonadParser m => m Argument argument = Raw <$> takeAll1 regular <* blank