Finish implementing the new stack-based call parser

This commit is contained in:
Tissevert 2019-11-30 12:39:40 +01:00
parent 8373bd1ea0
commit afbbcbffc5
1 changed files with 88 additions and 47 deletions

View File

@ -1,17 +1,19 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
module PDF.Text (
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Text {-(
pageContents
) where
)-} where
import Control.Applicative ((<|>))
import Control.Monad (foldM)
import Control.Monad.Reader (ReaderT, runReaderT, asks)
import Control.Monad.State (get, put)
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
import Control.Monad.State (MonadState, evalStateT, get, modify, put)
import Data.Attoparsec.ByteString.Char8 (choice, sepBy)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (drop, null, take)
import Data.ByteString.Char8 (unpack)
import Data.ByteString.Char8 (pack, unpack)
import Data.Map ((!))
import qualified Data.Map as Map (lookup, toList)
import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap)
@ -19,64 +21,103 @@ import PDF.Object (
DirectObject(..), StringObject(..)
, array, blank, name, regular, stringObject, toByteString
)
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll)
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll1)
data StateOperator =
Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state
deriving (Bounded, Enum, Show)
deriving (Bounded, Enum)
data TextOperator =
Td | TD | Tm | Tstar -- text positioning
| TJ | Tj | Quote | DQuote -- text showing
| Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
deriving (Bounded, Enum, Show)
deriving (Bounded, Enum)
data Argument = Raw ByteString | Typed DirectObject deriving Show
type Call a = (a, [Argument])
stateOperator :: MonadParser m => StateOperator -> m (Call StateOperator)
stateOperator Cm = (,) Cm <$> count 6 argument <* string "cm"
stateOperator W = (,) W <$> count 1 argument <* string "w"
stateOperator J = (,) J <$> count 1 argument <* string "J"
stateOperator J_ = (,) J_ <$> count 1 argument <* string "j"
stateOperator M = (,) M <$> count 1 argument <* string "M"
stateOperator D = (,) D <$> count 2 argument <* string "d"
stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri"
stateOperator I = (,) I <$> count 1 argument <* string "i"
stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs"
instance Show StateOperator where
show Cm = "cm"
show W = "w"
show J = "J"
show J_ = "j"
show M = "M"
show D = "d"
show Ri = "ri"
show I = "i"
show Gs = "gs"
{-
textOperator :: MonadParser m => TextOperator -> m (Call TextOperator)
textOperator Td = (,) Td <$> count 2 argument <* string "Td"
textOperator TD = (,) TD <$> count 2 argument <* string "TD"
textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm"
textOperator Tstar = (,) Td <$> return [] <* string "T*"
textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ"
textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj"
textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'"
textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\""
textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc"
textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw"
textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz"
textOperator TL = (,) TL <$> count 1 argument <* string "TL"
textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf"
textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr"
textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts"
-}
instance Show TextOperator where
show Td = "Td"
show TD = "TD"
show Tm = "Tm"
show Tstar = "T*"
show TJ = "TJ"
show Tj = "Tj"
show Quote = "'"
show DQuote = "\""
show Tc = "Tc"
show Tw = "Tw"
show Tz = "Tz"
show TL = "TL"
show Tf = "Tf"
show Tr = "Tr"
show Ts = "Ts"
textOperator :: MonadParser m => TextOperator -> m TextOperator
textOperator textOp = string (show textOp) *> return textOp
stateOperator :: TypeChecker StateOperator
stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
stateOperator (W, [Raw _]) = True
stateOperator (J, [Raw _]) = True
stateOperator (J_, [Raw _]) = True
stateOperator (M, [Raw _]) = True
stateOperator (D, [Raw _, Raw _]) = True
stateOperator (Ri, [Raw _]) = True
stateOperator (I, [Raw _]) = True
stateOperator (Gs, [Typed (NameObject _)]) = True
stateOperator _ = False
textChunk :: MonadParser m => m (Either TextOperator Argument)
textChunk =
choice $ Left . textOperator <$> [minBound .. maxBound]
<|> choice $ Right <$> [stringArg, nameArg, arrayArg, argument]
textOperator :: TypeChecker TextOperator
textOperator (Td, [Raw _, Raw _]) = True
textOperator (TD, [Raw _, Raw _]) = True
textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
textOperator (Tstar, []) = True
textOperator (TJ, [Typed (Array _)]) = True
textOperator (Tj, [Typed (StringObject _)]) = True
textOperator (Quote, [Typed (StringObject _)]) = True
textOperator (DQuote, [Typed (StringObject _)]) = True
textOperator (Tc, [Raw _]) = True
textOperator (Tw, [Raw _]) = True
textOperator (Tz, [Raw _]) = True
textOperator (TL, [Raw _]) = True
textOperator (Tf, [Typed (NameObject _), Raw _]) = True
textOperator (Tr, [Raw _]) = True
textOperator (Ts, [Raw _]) = True
textOperator _ = False
aTextOperator :: MonadParser m => m (Call TextOperator)
type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m)
type Operator a = (Bounded a, Enum a, Show a)
type TypeChecker a = Call a -> Bool
a :: (Bounded o, Enum o, MonadParser m) => (o -> m (Call o)) -> m (Call o)
a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound]
parseShowable :: (Show a, MonadParser m) => a -> m a
parseShowable textOp = string (pack $ show textOp) *> return textOp
callChunk :: (MonadParser m, Operator a) => m (Either a Argument)
callChunk =
Left <$> choice (parseShowable <$> [minBound .. maxBound])
<|> Right <$> choice [stringArg, nameArg, arrayArg, argument]
<?> "call chunk"
stackParser :: (ArgumentStackParser m, Operator a) => TypeChecker a -> m (Call a)
stackParser typeChecker = either popCall push =<< callChunk
where
push arg = modify (arg:) >> stackParser typeChecker
popCall operator = do
call <- (,) operator . reverse <$> get
if typeChecker call then return call else fail (show call)
a :: (Operator a, MonadParser m) => TypeChecker a -> m (Call a)
a typeChecker = evalStateT (stackParser typeChecker) []
argument :: MonadParser m => m Argument
argument = Raw <$> takeAll regular <* blank
argument = Raw <$> takeAll1 regular <* blank
arrayArg :: MonadParser m => m Argument
arrayArg = Typed . Array <$> array <* blank
@ -107,7 +148,7 @@ text :: ParserWithFont [ByteString]
text =
string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
where
commands = concat <$> (aTextOperator >>= runOperator) `sepBy` blank
commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank
runOperator :: Call TextOperator -> ParserWithFont [ByteString]
runOperator (Tf, [Typed (NameObject fontName), _]) =