diff --git a/src/PDF/Text.hs b/src/PDF/Text.hs index bdceb64..9b34b13 100644 --- a/src/PDF/Text.hs +++ b/src/PDF/Text.hs @@ -13,7 +13,8 @@ import Control.Monad.Reader (ReaderT, runReaderT, asks) import Control.Monad.State (MonadState, evalStateT, get, modify, put) import Data.Attoparsec.ByteString.Char8 (choice, sepBy) import Data.ByteString (ByteString) -import Data.ByteString.Char8 (pack) +import Data.ByteString.Char8 (pack, unpack) +import Data.Char (toLower) import Data.Map ((!), (!?), Map) import qualified Data.Map as Map (fromList) import PDF.Font (Font, FontSet, emptyFont) @@ -21,22 +22,43 @@ import PDF.Object ( DirectObject(..), StringObject(..) , array, blank, name, regular, stringObject, toByteString ) -import PDF.Parser (MonadParser, (), Parser, evalParser, string, takeAll1) +import PDF.Parser (MonadParser(..), (), Parser, evalParser) data StateOperator = - Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state - deriving (Bounded, Enum) + C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state + deriving (Bounded, Enum, Show) +data PathOperator = + M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction + | S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting + | W | Wstar -- clipping path + deriving (Bounded, Enum, Show) +data ColorOperator = + CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_ + deriving (Bounded, Enum, Show) data TextOperator = Td | TD | Tm | Tstar -- text positioning | TJ | Tj | Quote | DQuote -- text showing | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state - deriving (Bounded, Enum) + deriving (Bounded, Enum, Show) data Argument = Raw ByteString | Typed DirectObject deriving Show type Call a = (a, [Argument]) +type Operator a = (Bounded a, Enum a, Show a) + +code :: Operator a => a -> ByteString +code = pack . expand . show + where + expand "" = "" + expand (c:'_':s) = toLower c : expand s + expand ('s':'t':'a':'r':s) = '*' : expand s + expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s + expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s + expand (c:s) = c : expand s + +{- instance Show StateOperator where show Cm = "cm" - show W = "w" + show W_ = "w" show J = "J" show J_ = "j" show M = "M" @@ -45,6 +67,35 @@ instance Show StateOperator where show I = "i" show Gs = "gs" +instance Show PathOperator where + show M_ = "m" + show L_ = "l" + show C_ = "c" + show V_ = "v" + show Y_ = "y" + show H_ + ("m", (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)) + , ("l", (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)) + , ("c", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) + , ("v", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) + , ("y", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) + , ("h", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("re", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) + , ("S", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("s", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("f", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("F", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("F*", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("B", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("B*", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("b", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("b*", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("n", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("W", (L_, \l -> case l of [] -> True ; _ -> False)) + , ("W*", (L_, \l -> case l of [] -> True ; _ -> False)) + +instance Show ColorOperator where + instance Show TextOperator where show Td = "Td" show TD = "TD" @@ -61,18 +112,58 @@ instance Show TextOperator where show Tf = "Tf" show Tr = "Tr" show Ts = "Ts" +-} stateOperator :: OperatorTable StateOperator -stateOperator = Map.fromList [ - ("cm", (Cm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)) - , ("w", (W, \l -> case l of [Raw _] -> True ; _ -> False)) - , ("J", (J, \l -> case l of [Raw _] -> True ; _ -> False)) - , ("j", (J_, \l -> case l of [Raw _] -> True ; _ -> False)) - , ("M", (M, \l -> case l of [Raw _] -> True ; _ -> False)) - , ("d", (D, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)) - , ("ri", (Ri, \l -> case l of [Raw _] -> True ; _ -> False)) - , ("i", (I, \l -> case l of [Raw _] -> True ; _ -> False)) - , ("gs", (Gs, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)) +stateOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ + (C_m, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (W_, \l -> case l of [Raw _] -> True ; _ -> False) + , (J, \l -> case l of [Raw _] -> True ; _ -> False) + , (J_, \l -> case l of [Raw _] -> True ; _ -> False) + , (M, \l -> case l of [Raw _] -> True ; _ -> False) + , (D_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) + , (R_i, \l -> case l of [Raw _] -> True ; _ -> False) + , (I_, \l -> case l of [Raw _] -> True ; _ -> False) + , (G_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False) + ] + +pathOperator :: OperatorTable PathOperator +pathOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ + (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) + , (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) + , (C_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (V_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (Y_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (H_, \l -> case l of [] -> True ; _ -> False) + , (R_e, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (S, \l -> case l of [] -> True ; _ -> False) + , (S_, \l -> case l of [] -> True ; _ -> False) + , (F_, \l -> case l of [] -> True ; _ -> False) + , (F, \l -> case l of [] -> True ; _ -> False) + , (Fstar, \l -> case l of [] -> True ; _ -> False) + , (B, \l -> case l of [] -> True ; _ -> False) + , (Bstar, \l -> case l of [] -> True ; _ -> False) + , (B_, \l -> case l of [] -> True ; _ -> False) + , (B_star, \l -> case l of [] -> True ; _ -> False) + , (N_, \l -> case l of [] -> True ; _ -> False) + , (W, \l -> case l of [] -> True ; _ -> False) + , (Wstar, \l -> case l of [] -> True ; _ -> False) + ] + +colorOperator :: OperatorTable ColorOperator +colorOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ + (CS, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False) + , (C_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False) + , (SC, \_ -> True) + , (SCN, \_ -> True) + , (S_c, \_ -> True) + , (S_cn, \_ -> True) + , (G, \l -> case l of [Raw _] -> True ; _ -> False) + , (G_, \l -> case l of [Raw _] -> True ; _ -> False) + , (RG, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False) + , (R_g, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False) + , (K, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) + , (K_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) ] {- @@ -89,7 +180,7 @@ stateOperator _ = False -} textOperator :: OperatorTable TextOperator -textOperator = Map.fromList $ (\(op, checker) -> (pack $ show op, (op, checker))) <$> [ +textOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [ (Td, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) , (TD, \l -> case l of [Raw _, Raw _] -> True ; _ -> False) , (Tm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False) @@ -127,7 +218,7 @@ textOperator _ = False -} type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m) -type Operator a = (Bounded a, Enum a, Show a) +--type Operator a = (Bounded a, Enum a, Show a) type OperatorTable a = Map ByteString (TypeChecker a) type TypeChecker a = (a, [Argument] -> Bool) @@ -146,14 +237,14 @@ callChunk table = Nothing -> return . Right $ Raw chunk Just typeChecker -> return $ Left typeChecker -stackParser :: (ArgumentStackParser m, Show a) => OperatorTable a -> m (Call a) +stackParser :: (ArgumentStackParser m, Operator a) => OperatorTable a -> m (Call a) stackParser table = either popCall push =<< (callChunk table) where push arg = modify (arg:) >> stackParser table popCall (operator, predicate) = do arguments <- reverse <$> get let call = (operator, arguments) - if predicate arguments then return call else fail (show call) + if predicate arguments then return call else fail (unpack $ code operator) a :: (Operator a, MonadParser m) => OperatorTable a -> m (Call a) a table = evalStateT (stackParser table) [] @@ -173,24 +264,29 @@ stringArg = Typed . StringObject <$> stringObject <* blank type ParserWithFont = ReaderT FontSet (Parser Font) pageContents :: FontSet -> ByteString -> Either String [ByteString] -pageContents font input = - evalParser (runReaderT page font) emptyFont input +pageContents fontSet input = + evalParser (runReaderT (page) fontSet) emptyFont input + +several :: MonadParser m => m [a] -> m [a] +several p = concat <$> (p `sepBy` blank) page :: ParserWithFont [ByteString] -page = graphicState <|> text "Text page contents" +page = several (graphicState <|> text) <* blank <* endOfInput "Text page contents" graphicState :: ParserWithFont [ByteString] graphicState = - (string "q" *> blank *> insideQ <* blank <* string "Q") "Graphic state" + string "q" *> blank *> insideQ <* blank <* string "Q" "Graphic state" where - insideQ = concat <$> ((command <|> page) `sepBy` blank ) - command = a stateOperator *> return [] + insideQ = several (command <|> graphicState <|> text) + ignore x = a x *> return [] + command = + ignore stateOperator <|> ignore pathOperator <|> ignore colorOperator text :: ParserWithFont [ByteString] text = string "BT" *> blank *> commands <* blank <* string "ET" "Text operators" where - commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank + commands = several (a textOperator >>= runOperator) runOperator :: Call TextOperator -> ParserWithFont [ByteString] runOperator (Tf, [Typed (NameObject fontName), _]) =