diff --git a/src/PDF/Text.hs b/src/PDF/Text.hs deleted file mode 100644 index e956b4e..0000000 --- a/src/PDF/Text.hs +++ /dev/null @@ -1,319 +0,0 @@ -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ConstraintKinds #-} -{-# LANGUAGE FlexibleContexts #-} -module PDF.Text {-( - pageContents - )-} where - -import Control.Applicative ((<|>)) -import Control.Monad (foldM) -import Control.Monad.Fail (MonadFail) -import Control.Monad.Reader (ReaderT, runReaderT, asks) -import Control.Monad.State (MonadState, evalStateT, get, modify, put) -import Data.Attoparsec.ByteString.Char8 (choice, sepBy) -import Data.ByteString (ByteString) -import Data.ByteString.Char8 (pack, unpack) -import Data.Char (toLower) -import Data.Map ((!), (!?), Map) -import qualified Data.Map as Map (fromList) -import Data.Text (Text) -import PDF.Font (Font, FontSet, emptyFont) -import PDF.Object ( - DirectObject(..), StringObject(..) - , array, blank, name, regular, stringObject, toByteString - ) -import PDF.Parser (MonadParser(..), (), Parser, evalParser) - -data StateOperator = - C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state - deriving (Bounded, Enum, Show) -data PathOperator = - M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction - | S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting - | W | Wstar -- clipping path - deriving (Bounded, Enum, Show) -data ColorOperator = - CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_ - deriving (Bounded, Enum, Show) -data TextOperator = - Td | TD | Tm | Tstar -- text positioning - | TJ | Tj | Quote | DQuote -- text showing - | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state - deriving (Bounded, Enum, Show) -data Argument = Raw ByteString | Typed DirectObject deriving Show -type Call a = (a, [Argument]) - -type Operator a = (Bounded a, Enum a, Show a) - -code :: Operator a => a -> ByteString -code = pack . expand . show - where - expand "" = "" - expand (c:'_':s) = toLower c : expand s - expand ('s':'t':'a':'r':s) = '*' : expand s - expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s - expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s - expand (c:s) = c : expand s - -{- -instance Show StateOperator where - show Cm = "cm" - show W_ = "w" - show J = "J" - show J_ = "j" - show M = "M" - show D = "d" - show Ri = "ri" - show I = "i" - show Gs = "gs" - -instance Show PathOperator where - show M_ = "m" - show L_ = "l" - show C_ = "c" - show V_ = "v" - show Y_ = "y" - show H_ - ("m", (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)) - , ("l", (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)) - , ("c", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) - , ("v", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) - , ("y", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) - , ("h", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("re", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) - , ("S", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("s", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("f", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("F", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("F*", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("B", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("B*", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("b", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("b*", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("n", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("W", (L_, \l -> case l of [] -> True ; _ -> False)) - , ("W*", (L_, \l -> case l of [] -> True ; _ -> False)) - -instance Show ColorOperator where - -instance Show TextOperator where - show Td = "Td" - show TD = "TD" - show Tm = "Tm" - show Tstar = "T*" - show TJ = "TJ" - show Tj = "Tj" - show Quote = "'" - show DQuote = "\"" - show Tc = "Tc" - show Tw = "Tw" - show Tz = "Tz" - show TL = "TL" - show Tf = "Tf" - show Tr = "Tr" - show Ts = "Ts" --} - -stateOperator :: OperatorTable StateOperator -stateOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ - (C_m, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (W_, \l -> case l of [Raw _] -> True ; _ -> False) - , (J, \l -> case l of [Raw _] -> True ; _ -> False) - , (J_, \l -> case l of [Raw _] -> True ; _ -> False) - , (M, \l -> case l of [Raw _] -> True ; _ -> False) - , (D_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) - , (R_i, \l -> case l of [Raw _] -> True ; _ -> False) - , (I_, \l -> case l of [Raw _] -> True ; _ -> False) - , (G_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False) - ] - -pathOperator :: OperatorTable PathOperator -pathOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ - (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) - , (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) - , (C_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (V_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (Y_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (H_, \l -> case l of [] -> True ; _ -> False) - , (R_e, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (S, \l -> case l of [] -> True ; _ -> False) - , (S_, \l -> case l of [] -> True ; _ -> False) - , (F_, \l -> case l of [] -> True ; _ -> False) - , (F, \l -> case l of [] -> True ; _ -> False) - , (Fstar, \l -> case l of [] -> True ; _ -> False) - , (B, \l -> case l of [] -> True ; _ -> False) - , (Bstar, \l -> case l of [] -> True ; _ -> False) - , (B_, \l -> case l of [] -> True ; _ -> False) - , (B_star, \l -> case l of [] -> True ; _ -> False) - , (N_, \l -> case l of [] -> True ; _ -> False) - , (W, \l -> case l of [] -> True ; _ -> False) - , (Wstar, \l -> case l of [] -> True ; _ -> False) - ] - -colorOperator :: OperatorTable ColorOperator -colorOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ - (CS, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False) - , (C_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False) - , (SC, \_ -> True) - , (SCN, \_ -> True) - , (S_c, \_ -> True) - , (S_cn, \_ -> True) - , (G, \l -> case l of [Raw _] -> True ; _ -> False) - , (G_, \l -> case l of [Raw _] -> True ; _ -> False) - , (RG, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False) - , (R_g, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False) - , (K, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (K_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - ] - -{- -stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True -stateOperator (W, [Raw _]) = True -stateOperator (J, [Raw _]) = True -stateOperator (J_, [Raw _]) = True -stateOperator (M, [Raw _]) = True -stateOperator (D, [Raw _, Raw _]) = True -stateOperator (Ri, [Raw _]) = True -stateOperator (I, [Raw _]) = True -stateOperator (Gs, [Typed (NameObject _)]) = True -stateOperator _ = False --} - -textOperator :: OperatorTable TextOperator -textOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ - (Td, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) - , (TD, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) - , (Tm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) - , (Tstar, \l -> case l of [] -> True ; _ -> False) - , (TJ, \l -> case l of [Typed (Array _)] -> True ; _ -> False) - , (Tj, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False) - , (Quote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False) - , (DQuote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False) - , (Tc, \l -> case l of [Raw _] -> True ; _ -> False) - , (Tw, \l -> case l of [Raw _] -> True ; _ -> False) - , (Tz, \l -> case l of [Raw _] -> True ; _ -> False) - , (TL, \l -> case l of [Raw _] -> True ; _ -> False) - , (Tf, \l -> case l of [Typed (NameObject _), Raw _] -> True ; _ -> False) - , (Tr, \l -> case l of [Raw _] -> True ; _ -> False) - , (Ts, \l -> case l of [Raw _] -> True ; _ -> False) - ] - - {- -textOperator (Td, [Raw _, Raw _]) = True -textOperator (TD, [Raw _, Raw _]) = True -textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True -textOperator (Tstar, []) = True -textOperator (TJ, [Typed (Array _)]) = True -textOperator (Tj, [Typed (StringObject _)]) = True -textOperator (Quote, [Typed (StringObject _)]) = True -textOperator (DQuote, [Typed (StringObject _)]) = True -textOperator (Tc, [Raw _]) = True -textOperator (Tw, [Raw _]) = True -textOperator (Tz, [Raw _]) = True -textOperator (TL, [Raw _]) = True -textOperator (Tf, [Typed (NameObject _), Raw _]) = True -textOperator (Tr, [Raw _]) = True -textOperator (Ts, [Raw _]) = True -textOperator _ = False --} - -type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m) ---type Operator a = (Bounded a, Enum a, Show a) -type OperatorTable a = Map ByteString (TypeChecker a) -type TypeChecker a = (a, [Argument] -> Bool) - -parseShowable :: (Show a, MonadParser m) => a -> m a -parseShowable textOp = string (pack $ show textOp) *> return textOp - -callChunk :: MonadParser m => OperatorTable a -> m (Either (TypeChecker a) Argument) -callChunk table = - (Right <$> choice [stringArg, nameArg, arrayArg]) - <|> operatorOrRawArg - "call chunk" - where - operatorOrRawArg = do - chunk <- takeAll1 regular <* blank - case table !? chunk of - Nothing -> return . Right $ Raw chunk - Just typeChecker -> return $ Left typeChecker - -stackParser :: (ArgumentStackParser m, Operator a) => OperatorTable a -> m (Call a) -stackParser table = either popCall push =<< (callChunk table) - where - push arg = modify (arg:) >> stackParser table - popCall (operator, predicate) = do - arguments <- reverse <$> get - let call = (operator, arguments) - if predicate arguments then return call else fail (unpack $ code operator) - -a :: (Operator a, MonadParser m) => OperatorTable a -> m (Call a) -a table = evalStateT (stackParser table) [] - -argument :: MonadParser m => m Argument -argument = Raw <$> takeAll1 regular <* blank - -arrayArg :: MonadParser m => m Argument -arrayArg = Typed . Array <$> array <* blank - -nameArg :: MonadParser m => m Argument -nameArg = Typed . NameObject <$> name <* blank - -stringArg :: MonadParser m => m Argument -stringArg = Typed . StringObject <$> stringObject <* blank - -type ParserWithFont = ReaderT FontSet (Parser Font) - -pageContents :: FontSet -> ByteString -> Either String [Text] -pageContents fontSet input = - evalParser (runReaderT (page) fontSet) emptyFont input - -several :: MonadParser m => m [a] -> m [a] -several p = concat <$> (p `sepBy` blank) - -page :: ParserWithFont [Text] -page = several (graphicState <|> text) <* blank <* endOfInput "Text page contents" - -graphicState :: ParserWithFont [Text] -graphicState = - string "q" *> blank *> insideQ <* blank <* string "Q" "Graphic state" - where - insideQ = several (command <|> graphicState <|> text) - ignore x = a x *> return [] - command = - ignore stateOperator <|> ignore pathOperator <|> ignore colorOperator - -text :: ParserWithFont [Text] -text = - string "BT" *> blank *> commands <* blank <* string "ET" "Text operators" - where - commands = several (a textOperator >>= runOperator) - -runOperator :: Call TextOperator -> ParserWithFont [Text] -runOperator (Tf, [Typed (NameObject fontName), _]) = - asks (! fontName) >>= put >> return [] - -runOperator (Tstar, []) = return ["\n"] - -runOperator (TJ, [Typed (Array arrayObject)]) = - replicate 1 <$> foldM appendText "" arrayObject - where - appendText bs (StringObject outputString) = - mappend bs <$> decodeString outputString - appendText bs _ = return bs - -runOperator (Tj, [Typed (StringObject outputString)]) = - replicate 1 <$> decodeString outputString - -runOperator (Quote, [Typed (StringObject outputString)]) = - (\bs -> ["\n", bs]) <$> decodeString outputString - -runOperator (DQuote, [Typed (StringObject outputString)]) = - (\bs -> ["\n", bs]) <$> decodeString outputString - -runOperator _ = return [] - -decodeString :: (MonadFail m, MonadState Font m) => StringObject -> m Text -decodeString input = do - font <- get - either fail return . font $ toByteString input