Finish implementing the new stack-based call parser
This commit is contained in:
parent
8373bd1ea0
commit
afbbcbffc5
135
src/PDF/Text.hs
135
src/PDF/Text.hs
|
@ -1,17 +1,19 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module PDF.Text (
|
||||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
module PDF.Text {-(
|
||||
pageContents
|
||||
) where
|
||||
)-} where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (foldM)
|
||||
import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
||||
import Control.Monad.State (get, put)
|
||||
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
||||
import Control.Monad.State (MonadState, evalStateT, get, modify, put)
|
||||
import Data.Attoparsec.ByteString.Char8 (choice, sepBy)
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString as BS (drop, null, take)
|
||||
import Data.ByteString.Char8 (unpack)
|
||||
import Data.ByteString.Char8 (pack, unpack)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (lookup, toList)
|
||||
import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap)
|
||||
|
@ -19,64 +21,103 @@ import PDF.Object (
|
|||
DirectObject(..), StringObject(..)
|
||||
, array, blank, name, regular, stringObject, toByteString
|
||||
)
|
||||
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll)
|
||||
import PDF.Parser (MonadParser, (<?>), Parser, evalParser, string, takeAll1)
|
||||
|
||||
data StateOperator =
|
||||
Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state
|
||||
deriving (Bounded, Enum, Show)
|
||||
deriving (Bounded, Enum)
|
||||
data TextOperator =
|
||||
Td | TD | Tm | Tstar -- text positioning
|
||||
| TJ | Tj | Quote | DQuote -- text showing
|
||||
| Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
|
||||
deriving (Bounded, Enum, Show)
|
||||
deriving (Bounded, Enum)
|
||||
data Argument = Raw ByteString | Typed DirectObject deriving Show
|
||||
type Call a = (a, [Argument])
|
||||
|
||||
stateOperator :: MonadParser m => StateOperator -> m (Call StateOperator)
|
||||
stateOperator Cm = (,) Cm <$> count 6 argument <* string "cm"
|
||||
stateOperator W = (,) W <$> count 1 argument <* string "w"
|
||||
stateOperator J = (,) J <$> count 1 argument <* string "J"
|
||||
stateOperator J_ = (,) J_ <$> count 1 argument <* string "j"
|
||||
stateOperator M = (,) M <$> count 1 argument <* string "M"
|
||||
stateOperator D = (,) D <$> count 2 argument <* string "d"
|
||||
stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri"
|
||||
stateOperator I = (,) I <$> count 1 argument <* string "i"
|
||||
stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs"
|
||||
instance Show StateOperator where
|
||||
show Cm = "cm"
|
||||
show W = "w"
|
||||
show J = "J"
|
||||
show J_ = "j"
|
||||
show M = "M"
|
||||
show D = "d"
|
||||
show Ri = "ri"
|
||||
show I = "i"
|
||||
show Gs = "gs"
|
||||
|
||||
{-
|
||||
textOperator :: MonadParser m => TextOperator -> m (Call TextOperator)
|
||||
textOperator Td = (,) Td <$> count 2 argument <* string "Td"
|
||||
textOperator TD = (,) TD <$> count 2 argument <* string "TD"
|
||||
textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm"
|
||||
textOperator Tstar = (,) Td <$> return [] <* string "T*"
|
||||
textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ"
|
||||
textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj"
|
||||
textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'"
|
||||
textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\""
|
||||
textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc"
|
||||
textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw"
|
||||
textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz"
|
||||
textOperator TL = (,) TL <$> count 1 argument <* string "TL"
|
||||
textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf"
|
||||
textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr"
|
||||
textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts"
|
||||
-}
|
||||
instance Show TextOperator where
|
||||
show Td = "Td"
|
||||
show TD = "TD"
|
||||
show Tm = "Tm"
|
||||
show Tstar = "T*"
|
||||
show TJ = "TJ"
|
||||
show Tj = "Tj"
|
||||
show Quote = "'"
|
||||
show DQuote = "\""
|
||||
show Tc = "Tc"
|
||||
show Tw = "Tw"
|
||||
show Tz = "Tz"
|
||||
show TL = "TL"
|
||||
show Tf = "Tf"
|
||||
show Tr = "Tr"
|
||||
show Ts = "Ts"
|
||||
|
||||
textOperator :: MonadParser m => TextOperator -> m TextOperator
|
||||
textOperator textOp = string (show textOp) *> return textOp
|
||||
stateOperator :: TypeChecker StateOperator
|
||||
stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
|
||||
stateOperator (W, [Raw _]) = True
|
||||
stateOperator (J, [Raw _]) = True
|
||||
stateOperator (J_, [Raw _]) = True
|
||||
stateOperator (M, [Raw _]) = True
|
||||
stateOperator (D, [Raw _, Raw _]) = True
|
||||
stateOperator (Ri, [Raw _]) = True
|
||||
stateOperator (I, [Raw _]) = True
|
||||
stateOperator (Gs, [Typed (NameObject _)]) = True
|
||||
stateOperator _ = False
|
||||
|
||||
textChunk :: MonadParser m => m (Either TextOperator Argument)
|
||||
textChunk =
|
||||
choice $ Left . textOperator <$> [minBound .. maxBound]
|
||||
<|> choice $ Right <$> [stringArg, nameArg, arrayArg, argument]
|
||||
textOperator :: TypeChecker TextOperator
|
||||
textOperator (Td, [Raw _, Raw _]) = True
|
||||
textOperator (TD, [Raw _, Raw _]) = True
|
||||
textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
|
||||
textOperator (Tstar, []) = True
|
||||
textOperator (TJ, [Typed (Array _)]) = True
|
||||
textOperator (Tj, [Typed (StringObject _)]) = True
|
||||
textOperator (Quote, [Typed (StringObject _)]) = True
|
||||
textOperator (DQuote, [Typed (StringObject _)]) = True
|
||||
textOperator (Tc, [Raw _]) = True
|
||||
textOperator (Tw, [Raw _]) = True
|
||||
textOperator (Tz, [Raw _]) = True
|
||||
textOperator (TL, [Raw _]) = True
|
||||
textOperator (Tf, [Typed (NameObject _), Raw _]) = True
|
||||
textOperator (Tr, [Raw _]) = True
|
||||
textOperator (Ts, [Raw _]) = True
|
||||
textOperator _ = False
|
||||
|
||||
aTextOperator :: MonadParser m => m (Call TextOperator)
|
||||
type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m)
|
||||
type Operator a = (Bounded a, Enum a, Show a)
|
||||
type TypeChecker a = Call a -> Bool
|
||||
|
||||
a :: (Bounded o, Enum o, MonadParser m) => (o -> m (Call o)) -> m (Call o)
|
||||
a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound]
|
||||
parseShowable :: (Show a, MonadParser m) => a -> m a
|
||||
parseShowable textOp = string (pack $ show textOp) *> return textOp
|
||||
|
||||
callChunk :: (MonadParser m, Operator a) => m (Either a Argument)
|
||||
callChunk =
|
||||
Left <$> choice (parseShowable <$> [minBound .. maxBound])
|
||||
<|> Right <$> choice [stringArg, nameArg, arrayArg, argument]
|
||||
<?> "call chunk"
|
||||
|
||||
stackParser :: (ArgumentStackParser m, Operator a) => TypeChecker a -> m (Call a)
|
||||
stackParser typeChecker = either popCall push =<< callChunk
|
||||
where
|
||||
push arg = modify (arg:) >> stackParser typeChecker
|
||||
popCall operator = do
|
||||
call <- (,) operator . reverse <$> get
|
||||
if typeChecker call then return call else fail (show call)
|
||||
|
||||
a :: (Operator a, MonadParser m) => TypeChecker a -> m (Call a)
|
||||
a typeChecker = evalStateT (stackParser typeChecker) []
|
||||
|
||||
argument :: MonadParser m => m Argument
|
||||
argument = Raw <$> takeAll regular <* blank
|
||||
argument = Raw <$> takeAll1 regular <* blank
|
||||
|
||||
arrayArg :: MonadParser m => m Argument
|
||||
arrayArg = Typed . Array <$> array <* blank
|
||||
|
@ -107,7 +148,7 @@ text :: ParserWithFont [ByteString]
|
|||
text =
|
||||
string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
|
||||
where
|
||||
commands = concat <$> (aTextOperator >>= runOperator) `sepBy` blank
|
||||
commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank
|
||||
|
||||
runOperator :: Call TextOperator -> ParserWithFont [ByteString]
|
||||
runOperator (Tf, [Typed (NameObject fontName), _]) =
|
||||
|
|
Loading…
Reference in New Issue