Forgot to remove deprecated source file
This commit is contained in:
parent
09bd706748
commit
c491e8a70c
1 changed files with 0 additions and 319 deletions
319
src/PDF/Text.hs
319
src/PDF/Text.hs
|
@ -1,319 +0,0 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
module PDF.Text {-(
|
||||
pageContents
|
||||
)-} where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (foldM)
|
||||
import Control.Monad.Fail (MonadFail)
|
||||
import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
||||
import Control.Monad.State (MonadState, evalStateT, get, modify, put)
|
||||
import Data.Attoparsec.ByteString.Char8 (choice, sepBy)
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.ByteString.Char8 (pack, unpack)
|
||||
import Data.Char (toLower)
|
||||
import Data.Map ((!), (!?), Map)
|
||||
import qualified Data.Map as Map (fromList)
|
||||
import Data.Text (Text)
|
||||
import PDF.Font (Font, FontSet, emptyFont)
|
||||
import PDF.Object (
|
||||
DirectObject(..), StringObject(..)
|
||||
, array, blank, name, regular, stringObject, toByteString
|
||||
)
|
||||
import PDF.Parser (MonadParser(..), (<?>), Parser, evalParser)
|
||||
|
||||
data StateOperator =
|
||||
C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state
|
||||
deriving (Bounded, Enum, Show)
|
||||
data PathOperator =
|
||||
M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction
|
||||
| S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting
|
||||
| W | Wstar -- clipping path
|
||||
deriving (Bounded, Enum, Show)
|
||||
data ColorOperator =
|
||||
CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_
|
||||
deriving (Bounded, Enum, Show)
|
||||
data TextOperator =
|
||||
Td | TD | Tm | Tstar -- text positioning
|
||||
| TJ | Tj | Quote | DQuote -- text showing
|
||||
| Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
|
||||
deriving (Bounded, Enum, Show)
|
||||
data Argument = Raw ByteString | Typed DirectObject deriving Show
|
||||
type Call a = (a, [Argument])
|
||||
|
||||
type Operator a = (Bounded a, Enum a, Show a)
|
||||
|
||||
code :: Operator a => a -> ByteString
|
||||
code = pack . expand . show
|
||||
where
|
||||
expand "" = ""
|
||||
expand (c:'_':s) = toLower c : expand s
|
||||
expand ('s':'t':'a':'r':s) = '*' : expand s
|
||||
expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s
|
||||
expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s
|
||||
expand (c:s) = c : expand s
|
||||
|
||||
{-
|
||||
instance Show StateOperator where
|
||||
show Cm = "cm"
|
||||
show W_ = "w"
|
||||
show J = "J"
|
||||
show J_ = "j"
|
||||
show M = "M"
|
||||
show D = "d"
|
||||
show Ri = "ri"
|
||||
show I = "i"
|
||||
show Gs = "gs"
|
||||
|
||||
instance Show PathOperator where
|
||||
show M_ = "m"
|
||||
show L_ = "l"
|
||||
show C_ = "c"
|
||||
show V_ = "v"
|
||||
show Y_ = "y"
|
||||
show H_
|
||||
("m", (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False))
|
||||
, ("l", (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False))
|
||||
, ("c", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
|
||||
, ("v", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
|
||||
, ("y", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
|
||||
, ("h", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("re", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
|
||||
, ("S", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("s", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("f", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("F", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("F*", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("B", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("B*", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("b", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("b*", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("n", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("W", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
, ("W*", (L_, \l -> case l of [] -> True ; _ -> False))
|
||||
|
||||
instance Show ColorOperator where
|
||||
|
||||
instance Show TextOperator where
|
||||
show Td = "Td"
|
||||
show TD = "TD"
|
||||
show Tm = "Tm"
|
||||
show Tstar = "T*"
|
||||
show TJ = "TJ"
|
||||
show Tj = "Tj"
|
||||
show Quote = "'"
|
||||
show DQuote = "\""
|
||||
show Tc = "Tc"
|
||||
show Tw = "Tw"
|
||||
show Tz = "Tz"
|
||||
show TL = "TL"
|
||||
show Tf = "Tf"
|
||||
show Tr = "Tr"
|
||||
show Ts = "Ts"
|
||||
-}
|
||||
|
||||
stateOperator :: OperatorTable StateOperator
|
||||
stateOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
|
||||
(C_m, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (W_, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (J, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (J_, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (M, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (D_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
|
||||
, (R_i, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (I_, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (G_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)
|
||||
]
|
||||
|
||||
pathOperator :: OperatorTable PathOperator
|
||||
pathOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
|
||||
(M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
|
||||
, (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
|
||||
, (C_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (V_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (Y_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (H_, \l -> case l of [] -> True ; _ -> False)
|
||||
, (R_e, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (S, \l -> case l of [] -> True ; _ -> False)
|
||||
, (S_, \l -> case l of [] -> True ; _ -> False)
|
||||
, (F_, \l -> case l of [] -> True ; _ -> False)
|
||||
, (F, \l -> case l of [] -> True ; _ -> False)
|
||||
, (Fstar, \l -> case l of [] -> True ; _ -> False)
|
||||
, (B, \l -> case l of [] -> True ; _ -> False)
|
||||
, (Bstar, \l -> case l of [] -> True ; _ -> False)
|
||||
, (B_, \l -> case l of [] -> True ; _ -> False)
|
||||
, (B_star, \l -> case l of [] -> True ; _ -> False)
|
||||
, (N_, \l -> case l of [] -> True ; _ -> False)
|
||||
, (W, \l -> case l of [] -> True ; _ -> False)
|
||||
, (Wstar, \l -> case l of [] -> True ; _ -> False)
|
||||
]
|
||||
|
||||
colorOperator :: OperatorTable ColorOperator
|
||||
colorOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
|
||||
(CS, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)
|
||||
, (C_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)
|
||||
, (SC, \_ -> True)
|
||||
, (SCN, \_ -> True)
|
||||
, (S_c, \_ -> True)
|
||||
, (S_cn, \_ -> True)
|
||||
, (G, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (G_, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (RG, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (R_g, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (K, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (K_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
]
|
||||
|
||||
{-
|
||||
stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
|
||||
stateOperator (W, [Raw _]) = True
|
||||
stateOperator (J, [Raw _]) = True
|
||||
stateOperator (J_, [Raw _]) = True
|
||||
stateOperator (M, [Raw _]) = True
|
||||
stateOperator (D, [Raw _, Raw _]) = True
|
||||
stateOperator (Ri, [Raw _]) = True
|
||||
stateOperator (I, [Raw _]) = True
|
||||
stateOperator (Gs, [Typed (NameObject _)]) = True
|
||||
stateOperator _ = False
|
||||
-}
|
||||
|
||||
textOperator :: OperatorTable TextOperator
|
||||
textOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
|
||||
(Td, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
|
||||
, (TD, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
|
||||
, (Tm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
|
||||
, (Tstar, \l -> case l of [] -> True ; _ -> False)
|
||||
, (TJ, \l -> case l of [Typed (Array _)] -> True ; _ -> False)
|
||||
, (Tj, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False)
|
||||
, (Quote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False)
|
||||
, (DQuote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False)
|
||||
, (Tc, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (Tw, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (Tz, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (TL, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (Tf, \l -> case l of [Typed (NameObject _), Raw _] -> True ; _ -> False)
|
||||
, (Tr, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
, (Ts, \l -> case l of [Raw _] -> True ; _ -> False)
|
||||
]
|
||||
|
||||
{-
|
||||
textOperator (Td, [Raw _, Raw _]) = True
|
||||
textOperator (TD, [Raw _, Raw _]) = True
|
||||
textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
|
||||
textOperator (Tstar, []) = True
|
||||
textOperator (TJ, [Typed (Array _)]) = True
|
||||
textOperator (Tj, [Typed (StringObject _)]) = True
|
||||
textOperator (Quote, [Typed (StringObject _)]) = True
|
||||
textOperator (DQuote, [Typed (StringObject _)]) = True
|
||||
textOperator (Tc, [Raw _]) = True
|
||||
textOperator (Tw, [Raw _]) = True
|
||||
textOperator (Tz, [Raw _]) = True
|
||||
textOperator (TL, [Raw _]) = True
|
||||
textOperator (Tf, [Typed (NameObject _), Raw _]) = True
|
||||
textOperator (Tr, [Raw _]) = True
|
||||
textOperator (Ts, [Raw _]) = True
|
||||
textOperator _ = False
|
||||
-}
|
||||
|
||||
type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m)
|
||||
--type Operator a = (Bounded a, Enum a, Show a)
|
||||
type OperatorTable a = Map ByteString (TypeChecker a)
|
||||
type TypeChecker a = (a, [Argument] -> Bool)
|
||||
|
||||
parseShowable :: (Show a, MonadParser m) => a -> m a
|
||||
parseShowable textOp = string (pack $ show textOp) *> return textOp
|
||||
|
||||
callChunk :: MonadParser m => OperatorTable a -> m (Either (TypeChecker a) Argument)
|
||||
callChunk table =
|
||||
(Right <$> choice [stringArg, nameArg, arrayArg])
|
||||
<|> operatorOrRawArg
|
||||
<?> "call chunk"
|
||||
where
|
||||
operatorOrRawArg = do
|
||||
chunk <- takeAll1 regular <* blank
|
||||
case table !? chunk of
|
||||
Nothing -> return . Right $ Raw chunk
|
||||
Just typeChecker -> return $ Left typeChecker
|
||||
|
||||
stackParser :: (ArgumentStackParser m, Operator a) => OperatorTable a -> m (Call a)
|
||||
stackParser table = either popCall push =<< (callChunk table)
|
||||
where
|
||||
push arg = modify (arg:) >> stackParser table
|
||||
popCall (operator, predicate) = do
|
||||
arguments <- reverse <$> get
|
||||
let call = (operator, arguments)
|
||||
if predicate arguments then return call else fail (unpack $ code operator)
|
||||
|
||||
a :: (Operator a, MonadParser m) => OperatorTable a -> m (Call a)
|
||||
a table = evalStateT (stackParser table) []
|
||||
|
||||
argument :: MonadParser m => m Argument
|
||||
argument = Raw <$> takeAll1 regular <* blank
|
||||
|
||||
arrayArg :: MonadParser m => m Argument
|
||||
arrayArg = Typed . Array <$> array <* blank
|
||||
|
||||
nameArg :: MonadParser m => m Argument
|
||||
nameArg = Typed . NameObject <$> name <* blank
|
||||
|
||||
stringArg :: MonadParser m => m Argument
|
||||
stringArg = Typed . StringObject <$> stringObject <* blank
|
||||
|
||||
type ParserWithFont = ReaderT FontSet (Parser Font)
|
||||
|
||||
pageContents :: FontSet -> ByteString -> Either String [Text]
|
||||
pageContents fontSet input =
|
||||
evalParser (runReaderT (page) fontSet) emptyFont input
|
||||
|
||||
several :: MonadParser m => m [a] -> m [a]
|
||||
several p = concat <$> (p `sepBy` blank)
|
||||
|
||||
page :: ParserWithFont [Text]
|
||||
page = several (graphicState <|> text) <* blank <* endOfInput <?> "Text page contents"
|
||||
|
||||
graphicState :: ParserWithFont [Text]
|
||||
graphicState =
|
||||
string "q" *> blank *> insideQ <* blank <* string "Q" <?> "Graphic state"
|
||||
where
|
||||
insideQ = several (command <|> graphicState <|> text)
|
||||
ignore x = a x *> return []
|
||||
command =
|
||||
ignore stateOperator <|> ignore pathOperator <|> ignore colorOperator
|
||||
|
||||
text :: ParserWithFont [Text]
|
||||
text =
|
||||
string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
|
||||
where
|
||||
commands = several (a textOperator >>= runOperator)
|
||||
|
||||
runOperator :: Call TextOperator -> ParserWithFont [Text]
|
||||
runOperator (Tf, [Typed (NameObject fontName), _]) =
|
||||
asks (! fontName) >>= put >> return []
|
||||
|
||||
runOperator (Tstar, []) = return ["\n"]
|
||||
|
||||
runOperator (TJ, [Typed (Array arrayObject)]) =
|
||||
replicate 1 <$> foldM appendText "" arrayObject
|
||||
where
|
||||
appendText bs (StringObject outputString) =
|
||||
mappend bs <$> decodeString outputString
|
||||
appendText bs _ = return bs
|
||||
|
||||
runOperator (Tj, [Typed (StringObject outputString)]) =
|
||||
replicate 1 <$> decodeString outputString
|
||||
|
||||
runOperator (Quote, [Typed (StringObject outputString)]) =
|
||||
(\bs -> ["\n", bs]) <$> decodeString outputString
|
||||
|
||||
runOperator (DQuote, [Typed (StringObject outputString)]) =
|
||||
(\bs -> ["\n", bs]) <$> decodeString outputString
|
||||
|
||||
runOperator _ = return []
|
||||
|
||||
decodeString :: (MonadFail m, MonadState Font m) => StringObject -> m Text
|
||||
decodeString input = do
|
||||
font <- get
|
||||
either fail return . font $ toByteString input
|
Loading…
Reference in a new issue