2019-09-23 18:00:47 +02:00
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
2019-09-23 23:19:27 +02:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2019-09-23 18:00:47 +02:00
|
|
|
module PDF.Text (
|
|
|
|
CMap
|
|
|
|
, CMappers
|
|
|
|
, PageContents(..)
|
|
|
|
, cMap
|
|
|
|
, pageContents
|
|
|
|
) where
|
|
|
|
|
2019-09-23 23:19:27 +02:00
|
|
|
import Control.Applicative ((<|>))
|
2019-09-23 18:00:47 +02:00
|
|
|
import Control.Monad.State (MonadState)
|
|
|
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
|
|
|
import Data.Map (Map)
|
2019-09-23 23:19:27 +02:00
|
|
|
import PDF.Object (Dictionary, blank, name, regular)
|
2019-09-23 18:00:47 +02:00
|
|
|
import PDF.Output (ObjectId)
|
2019-09-23 23:19:27 +02:00
|
|
|
import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll)
|
2019-09-23 18:00:47 +02:00
|
|
|
|
|
|
|
type CMappers = Map ObjectId CMap
|
|
|
|
type CMap = Map Int ByteString
|
|
|
|
|
2019-09-23 23:19:27 +02:00
|
|
|
data TextOperator = TJ | Tj | Tf | Other
|
|
|
|
|
2019-09-23 18:00:47 +02:00
|
|
|
cMap :: ByteString -> CMap
|
|
|
|
cMap = undefined
|
|
|
|
|
|
|
|
data PageContents = PageContents {
|
|
|
|
chunks :: [ByteString]
|
|
|
|
}
|
|
|
|
|
2019-09-23 23:19:27 +02:00
|
|
|
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents)
|
|
|
|
pageContents font = runParser page
|
|
|
|
|
|
|
|
page :: Parser u PageContents
|
|
|
|
page = PageContents <$> (graphicState <|> text)
|
|
|
|
|
|
|
|
graphicState =
|
|
|
|
string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q"
|
|
|
|
where
|
|
|
|
command =
|
|
|
|
count 6 argument *> string "cm"
|
|
|
|
<|> name *> blank *> string "gs"
|
|
|
|
argument = takeAll regular <* blank
|
|
|
|
|
|
|
|
|