{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module PDF.Text ( CMap , CMappers , PageContents(..) , cMap , pageContents ) where import Control.Applicative ((<|>)) import Control.Monad.State (MonadState) import Data.ByteString.Lazy.Char8 (ByteString) import Data.Map (Map) import PDF.Object (Dictionary, blank, name, regular) import PDF.Output (ObjectId) import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll) type CMappers = Map ObjectId CMap type CMap = Map Int ByteString data TextOperator = TJ | Tj | Tf | Other cMap :: ByteString -> CMap cMap = undefined data PageContents = PageContents { chunks :: [ByteString] } pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents) pageContents font = runParser page page :: Parser u PageContents page = PageContents <$> (graphicState <|> text) graphicState = string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q" where command = count 6 argument *> string "cm" <|> name *> blank *> string "gs" argument = takeAll regular <* blank