diff --git a/src/PDF/Text.hs b/src/PDF/Text.hs index ec40769..150edc2 100644 --- a/src/PDF/Text.hs +++ b/src/PDF/Text.hs @@ -5,7 +5,7 @@ module PDF.Text {-( )-} where import Control.Applicative ((<|>)) -import Control.Monad (foldM, join) +import Control.Monad (foldM) import Control.Monad.Reader (ReaderT, runReaderT, asks) import Control.Monad.State (get, put) import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy) @@ -18,7 +18,7 @@ import PDF.Object ( DirectObject(..), StringObject(..) , array, blank, name, parseBytes, regular, stringObject ) -import PDF.Parser (MonadParser, Parser, evalParser, string, takeAll) +import PDF.Parser (MonadParser, (), Parser, evalParser, string, takeAll) data StateOperator = Cm | W | J | J_ | M | D | Ri | I | Gs -- general graphic state @@ -40,24 +40,22 @@ stateOperator M = (,) M <$> count 1 argument <* string "M" stateOperator D = (,) D <$> count 2 argument <* string "d" stateOperator Ri = (,) Ri <$> count 1 argument <* string "ri" stateOperator I = (,) I <$> count 1 argument <* string "i" -stateOperator Gs = (,) Gs <$> count 1 argument <* string "gs" +stateOperator Gs = (,) Gs <$> count 1 nameArg <* string "gs" textOperator :: MonadParser m => TextOperator -> m (Call TextOperator) textOperator Td = (,) Td <$> count 2 argument <* string "Td" textOperator TD = (,) TD <$> count 2 argument <* string "TD" textOperator Tm = (,) Tm <$> count 6 argument <* string "Tm" textOperator Tstar = (,) Td <$> return [] <* string "T*" -textOperator TJ = - (,) TJ <$> sequence [Typed . Array <$> array] <* blank <* string "TJ" -textOperator Tj = - (,) Tj <$> sequence [Typed . StringObject <$> stringObject] <* blank <* string "Tj" -textOperator Quote = (,) Quote <$> count 1 argument <* string "'" -textOperator DQuote = (,) DQuote <$> count 1 argument <* string "\"" +textOperator TJ = (,) TJ <$> count 1 arrayArg <* string "TJ" +textOperator Tj = (,) Tj <$> count 1 stringArg <* string "Tj" +textOperator Quote = (,) Quote <$> count 1 stringArg <* string "'" +textOperator DQuote = (,) DQuote <$> count 1 stringArg <* string "\"" textOperator Tc = (,) Tc <$> count 1 argument <* string "Tc" textOperator Tw = (,) Tw <$> count 1 argument <* string "Tw" textOperator Tz = (,) Tz <$> count 1 argument <* string "Tz" textOperator TL = (,) TL <$> count 1 argument <* string "TL" -textOperator Tf = (,) Tf <$> sequence [Typed . NameObject <$> name, argument] <* string "Tf" +textOperator Tf = (,) Tf <$> sequence [nameArg, argument] <* string "Tf" textOperator Tr = (,) Tr <$> count 1 argument <* string "Tr" textOperator Ts = (,) Ts <$> count 1 argument <* string "Ts" @@ -67,6 +65,15 @@ a parserGenerator = choice $ parserGenerator <$> [minBound .. maxBound] argument :: MonadParser m => m Argument argument = Raw <$> takeAll regular <* blank +arrayArg :: MonadParser m => m Argument +arrayArg = Typed . Array <$> array <* blank + +nameArg :: MonadParser m => m Argument +nameArg = Typed . NameObject <$> name <* blank + +stringArg :: MonadParser m => m Argument +stringArg = Typed . StringObject <$> stringObject <* blank + data PageContents = PageContents { chunks :: [ByteString] } @@ -78,20 +85,20 @@ pageContents font input = evalParser (runReaderT (PageContents <$> page) font) emptyCMap input page :: ParserWithFont [ByteString] -page = graphicState <|> text +page = graphicState <|> text "Text page contents" graphicState :: ParserWithFont [ByteString] graphicState = - string "q" *> blank *> insideQ <* string "Q" + (string "q" *> blank *> insideQ <* blank <* string "Q") "Graphic state" where - insideQ = join <$> ((command <|> page) `sepBy` blank ) + insideQ = concat <$> ((command <|> page) `sepBy` blank ) command = a stateOperator *> return [] text :: ParserWithFont [ByteString] text = - string "BT" *> blank *> commands <* blank <* string "ET" + string "BT" *> blank *> commands <* blank <* string "ET" "Text operators" where - commands = join <$> (a textOperator >>= runOperator) `sepBy` blank + commands = concat <$> (a textOperator >>= runOperator) `sepBy` blank runOperator :: Call TextOperator -> ParserWithFont [ByteString] runOperator (Tf, [Typed (NameObject fontName), _]) =