Ugly commit, breaks everything, still trying to figure a grammar for text
This commit is contained in:
parent
6f3c159ea7
commit
51db57ec67
2 changed files with 23 additions and 4 deletions
|
@ -23,6 +23,8 @@ module PDF.Object (
|
|||
, integer
|
||||
, line
|
||||
, magicNumber
|
||||
, name
|
||||
, regular
|
||||
, structure
|
||||
) where
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module PDF.Text (
|
||||
CMap
|
||||
, CMappers
|
||||
|
@ -7,16 +8,19 @@ module PDF.Text (
|
|||
, pageContents
|
||||
) where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad.State (MonadState)
|
||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||
import Data.Map (Map)
|
||||
import PDF.Object (Dictionary)
|
||||
import PDF.Object (Dictionary, blank, name, regular)
|
||||
import PDF.Output (ObjectId)
|
||||
import PDF.Parser (Parser)
|
||||
import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll)
|
||||
|
||||
type CMappers = Map ObjectId CMap
|
||||
type CMap = Map Int ByteString
|
||||
|
||||
data TextOperator = TJ | Tj | Tf | Other
|
||||
|
||||
cMap :: ByteString -> CMap
|
||||
cMap = undefined
|
||||
|
||||
|
@ -24,5 +28,18 @@ data PageContents = PageContents {
|
|||
chunks :: [ByteString]
|
||||
}
|
||||
|
||||
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m PageContents
|
||||
pageContents font page = undefined
|
||||
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents)
|
||||
pageContents font = runParser page
|
||||
|
||||
page :: Parser u PageContents
|
||||
page = PageContents <$> (graphicState <|> text)
|
||||
|
||||
graphicState =
|
||||
string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q"
|
||||
where
|
||||
command =
|
||||
count 6 argument *> string "cm"
|
||||
<|> name *> blank *> string "gs"
|
||||
argument = takeAll regular <* blank
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue