diff --git a/src/PDF/Text.hs b/src/PDF/Text.hs index a880b93..70b15b9 100644 --- a/src/PDF/Text.hs +++ b/src/PDF/Text.hs @@ -1,17 +1,19 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} -module PDF.Text ( +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} +module PDF.Text {-( pageContents - ) where + )-} where import Control.Applicative ((<|>)) import Control.Monad (foldM) import Control.Monad.Reader (ReaderT, runReaderT, asks) -import Control.Monad.State (get, put) -import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy) +import Control.Monad.State (MonadState, evalStateT, get, modify, put) +import Data.Attoparsec.ByteString.Char8 (choice, sepBy) import Data.ByteString (ByteString) import qualified Data.ByteString as BS (drop, null, take) -import Data.ByteString.Char8 (unpack) +import Data.ByteString.Char8 (pack, unpack) import Data.Map ((!)) import qualified Data.Map as Map (lookup, toList) import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap) @@ -19,64 +21,103 @@ import PDF.Object ( DirectObject(..), StringObject(..) , array, blank, name, regular, stringObject, toByteString ) -import PDF.Parser (MonadParser, (), Parser, evalParser, string, takeAll) +import PDF.Parser (MonadParser, (), Parser, evalParser, string, takeAll1) data StateOperator = Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state - deriving (Bounded, Enum, Show) + deriving (Bounded, Enum) data TextOperator = Td | TD | Tm | Tstar -- text positioning | TJ | Tj | Quote | DQuote -- text showing | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state - deriving (Bounded, Enum, Show) + deriving (Bounded, Enum) data Argument = Raw ByteString | Typed DirectObject deriving Show type Call a = (a, [Argument]) -stateOperator :: MonadParser m => StateOperator -> m (Call StateOperator) -stateOperator Cm = (,) Cm <$> count 6 argument <* string "cm" -stateOperator W = (,) W <$> count 1 argument <* string "w" -stateOperator J = (,) J <$> count 1 argument <* string "J" -stateOperator J_ = (,) J_ <$> count 1 argument <* string "j" -stateOperator M = (,) M <$> count 1 argument <* string "M" -stateOperator D = (,) D <$> count 2 argument <* string "d" -stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri" -stateOperator I = (,) I <$> count 1 argument <* string "i" -stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs" +instance Show StateOperator where + show Cm = "cm" + show W = "w" + show J = "J" + show J_ = "j" + show M = "M" + show D = "d" + show Ri = "ri" + show I = "i" + show Gs = "gs" -{- -textOperator :: MonadParser m => TextOperator -> m (Call TextOperator) -textOperator Td = (,) Td <$> count 2 argument <* string "Td" -textOperator TD = (,) TD <$> count 2 argument <* string "TD" -textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm" -textOperator Tstar = (,) Td <$> return [] <* string "T*" -textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ" -textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj" -textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'" -textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\"" -textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc" -textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw" -textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz" -textOperator TL = (,) TL <$> count 1 argument <* string "TL" -textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf" -textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr" -textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts" --} +instance Show TextOperator where + show Td = "Td" + show TD = "TD" + show Tm = "Tm" + show Tstar = "T*" + show TJ = "TJ" + show Tj = "Tj" + show Quote = "'" + show DQuote = "\"" + show Tc = "Tc" + show Tw = "Tw" + show Tz = "Tz" + show TL = "TL" + show Tf = "Tf" + show Tr = "Tr" + show Ts = "Ts" -textOperator :: MonadParser m => TextOperator -> m TextOperator -textOperator textOp = string (show textOp) *> return textOp +stateOperator :: TypeChecker StateOperator +stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True +stateOperator (W, [Raw _]) = True +stateOperator (J, [Raw _]) = True +stateOperator (J_, [Raw _]) = True +stateOperator (M, [Raw _]) = True +stateOperator (D, [Raw _, Raw _]) = True +stateOperator (Ri, [Raw _]) = True +stateOperator (I, [Raw _]) = True +stateOperator (Gs, [Typed (NameObject _)]) = True +stateOperator _ = False -textChunk :: MonadParser m => m (Either TextOperator Argument) -textChunk = - choice $ Left . textOperator <$> [minBound .. maxBound] - <|> choice $ Right <$> [stringArg, nameArg, arrayArg, argument] +textOperator :: TypeChecker TextOperator +textOperator (Td, [Raw _, Raw _]) = True +textOperator (TD, [Raw _, Raw _]) = True +textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True +textOperator (Tstar, []) = True +textOperator (TJ, [Typed (Array _)]) = True +textOperator (Tj, [Typed (StringObject _)]) = True +textOperator (Quote, [Typed (StringObject _)]) = True +textOperator (DQuote, [Typed (StringObject _)]) = True +textOperator (Tc, [Raw _]) = True +textOperator (Tw, [Raw _]) = True +textOperator (Tz, [Raw _]) = True +textOperator (TL, [Raw _]) = True +textOperator (Tf, [Typed (NameObject _), Raw _]) = True +textOperator (Tr, [Raw _]) = True +textOperator (Ts, [Raw _]) = True +textOperator _ = False -aTextOperator :: MonadParser m => m (Call TextOperator) +type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m) +type Operator a = (Bounded a, Enum a, Show a) +type TypeChecker a = Call a -> Bool -a :: (Bounded o, Enum o, MonadParser m) => (o -> m (Call o)) -> m (Call o) -a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound] +parseShowable :: (Show a, MonadParser m) => a -> m a +parseShowable textOp = string (pack $ show textOp) *> return textOp + +callChunk :: (MonadParser m, Operator a) => m (Either a Argument) +callChunk = + Left <$> choice (parseShowable <$> [minBound .. maxBound]) + <|> Right <$> choice [stringArg, nameArg, arrayArg, argument] + "call chunk" + +stackParser :: (ArgumentStackParser m, Operator a) => TypeChecker a -> m (Call a) +stackParser typeChecker = either popCall push =<< callChunk + where + push arg = modify (arg:) >> stackParser typeChecker + popCall operator = do + call <- (,) operator . reverse <$> get + if typeChecker call then return call else fail (show call) + +a :: (Operator a, MonadParser m) => TypeChecker a -> m (Call a) +a typeChecker = evalStateT (stackParser typeChecker) [] argument :: MonadParser m => m Argument -argument = Raw <$> takeAll regular <* blank +argument = Raw <$> takeAll1 regular <* blank arrayArg :: MonadParser m => m Argument arrayArg = Typed . Array <$> array <* blank @@ -107,7 +148,7 @@ text :: ParserWithFont [ByteString] text = string "BT" *> blank *> commands <* blank <* string "ET" "Text operators" where - commands = concat <$> (aTextOperator >>= runOperator) `sepBy` blank + commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank runOperator :: Call TextOperator -> ParserWithFont [ByteString] runOperator (Tf, [Typed (NameObject fontName), _]) =