74 lines
1.7 KiB
Haskell
74 lines
1.7 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
module PDF.Text (
|
|
CMap
|
|
, CMappers
|
|
, PageContents(..)
|
|
, cMap
|
|
, pageContents
|
|
) where
|
|
|
|
import Control.Applicative ((<|>))
|
|
import Control.Monad (join)
|
|
import Control.Monad.Reader (ReaderT, runReaderT)
|
|
import Control.Monad.State (state)
|
|
import Data.Attoparsec.ByteString.Char8 (count, sepBy)
|
|
import Data.ByteString.Char8 (ByteString)
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map (empty)
|
|
import PDF.Object (Content, Name, blank, name, regular)
|
|
import PDF.Output (ObjectId)
|
|
import PDF.Parser (Parser, evalParser, string, takeAll)
|
|
|
|
type CMappers = Map Name CMap
|
|
type CMap = Map Int ByteString
|
|
|
|
emptyCMap :: CMap
|
|
emptyCMap = Map.empty
|
|
|
|
data TextOperator = TJ | Tj | Tf | Other
|
|
|
|
cMap :: ByteString -> CMap
|
|
cMap = undefined
|
|
|
|
data PageContents = PageContents {
|
|
chunks :: [ByteString]
|
|
}
|
|
|
|
type ParserWithFont = ReaderT CMappers (Parser CMap)
|
|
|
|
{-
|
|
data FontContext = FontContext {
|
|
cMappers :: CMappers
|
|
, currentFont :: CMap
|
|
}
|
|
|
|
initFontContext cMappers = FontContext {
|
|
cMappers
|
|
, currentFont = emptyCMap
|
|
}
|
|
-}
|
|
|
|
pageContents :: CMappers -> ByteString -> Either String PageContents
|
|
pageContents font input =
|
|
evalParser (runReaderT (PageContents <$> page) font) emptyCMap input
|
|
|
|
page :: ParserWithFont [ByteString]
|
|
page = graphicState <|> text
|
|
|
|
graphicState :: ParserWithFont [ByteString]
|
|
graphicState =
|
|
string "q" *> blank *> insideQ <* string "Q"
|
|
where
|
|
insideQ = join <$> (command <|> page `sepBy` blank )
|
|
command =
|
|
count 6 argument *> string "cm" *> return []
|
|
<|> name *> blank *> string "gs" *> return []
|
|
argument = takeAll regular <* blank
|
|
|
|
text :: ParserWithFont [ByteString]
|
|
text = undefined
|
|
|
|
textOperator :: ParserWithFont TextOperator
|
|
textOperator = undefined
|
|
|