Hufflepdf/src/PDF/Text.hs

46 lines
1.1 KiB
Haskell

{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module PDF.Text (
CMap
, CMappers
, PageContents(..)
, cMap
, pageContents
) where
import Control.Applicative ((<|>))
import Control.Monad.State (MonadState)
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Map (Map)
import PDF.Object (Dictionary, blank, name, regular)
import PDF.Output (ObjectId)
import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll)
type CMappers = Map ObjectId CMap
type CMap = Map Int ByteString
data TextOperator = TJ | Tj | Tf | Other
cMap :: ByteString -> CMap
cMap = undefined
data PageContents = PageContents {
chunks :: [ByteString]
}
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents)
pageContents font = runParser page
page :: Parser u PageContents
page = PageContents <$> (graphicState <|> text)
graphicState =
string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q"
where
command =
count 6 argument *> string "cm"
<|> name *> blank *> string "gs"
argument = takeAll regular <* blank