Hufflepdf/src/PDF/Text.hs

74 lines
1.7 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module PDF.Text (
CMap
, CMappers
, PageContents(..)
, cMap
, pageContents
) where
import Control.Applicative ((<|>))
import Control.Monad (join)
import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (state)
import Data.Attoparsec.ByteString.Char8 (count, sepBy)
import Data.ByteString.Char8 (ByteString)
import Data.Map (Map)
import qualified Data.Map as Map (empty)
import PDF.Object (Content, Name, blank, name, regular)
import PDF.Output (ObjectId)
import PDF.Parser (Parser, evalParser, string, takeAll)
type CMappers = Map Name CMap
type CMap = Map Int ByteString
emptyCMap :: CMap
emptyCMap = Map.empty
data TextOperator = TJ | Tj | Tf | Other
cMap :: ByteString -> CMap
cMap = undefined
data PageContents = PageContents {
chunks :: [ByteString]
}
type ParserWithFont = ReaderT CMappers (Parser CMap)
{-
data FontContext = FontContext {
cMappers :: CMappers
, currentFont :: CMap
}
initFontContext cMappers = FontContext {
cMappers
, currentFont = emptyCMap
}
-}
pageContents :: CMappers -> ByteString -> Either String PageContents
pageContents font input =
evalParser (runReaderT (PageContents <$> page) font) emptyCMap input
page :: ParserWithFont [ByteString]
page = graphicState <|> text
graphicState :: ParserWithFont [ByteString]
graphicState =
string "q" *> blank *> insideQ <* string "Q"
where
insideQ = join <$> (command <|> page `sepBy` blank )
command =
count 6 argument *> string "cm" *> return []
<|> name *> blank *> string "gs" *> return []
argument = takeAll regular <* blank
text :: ParserWithFont [ByteString]
text = undefined
textOperator :: ParserWithFont TextOperator
textOperator = undefined