Ugly commit, breaks everything, still trying to figure a grammar for text

This commit is contained in:
Tissevert 2019-09-23 23:19:27 +02:00
parent 6f3c159ea7
commit 51db57ec67
2 changed files with 23 additions and 4 deletions

View file

@ -23,6 +23,8 @@ module PDF.Object (
, integer , integer
, line , line
, magicNumber , magicNumber
, name
, regular
, structure , structure
) where ) where

View file

@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module PDF.Text ( module PDF.Text (
CMap CMap
, CMappers , CMappers
@ -7,16 +8,19 @@ module PDF.Text (
, pageContents , pageContents
) where ) where
import Control.Applicative ((<|>))
import Control.Monad.State (MonadState) import Control.Monad.State (MonadState)
import Data.ByteString.Lazy.Char8 (ByteString) import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Map (Map) import Data.Map (Map)
import PDF.Object (Dictionary) import PDF.Object (Dictionary, blank, name, regular)
import PDF.Output (ObjectId) import PDF.Output (ObjectId)
import PDF.Parser (Parser) import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll)
type CMappers = Map ObjectId CMap type CMappers = Map ObjectId CMap
type CMap = Map Int ByteString type CMap = Map Int ByteString
data TextOperator = TJ | Tj | Tf | Other
cMap :: ByteString -> CMap cMap :: ByteString -> CMap
cMap = undefined cMap = undefined
@ -24,5 +28,18 @@ data PageContents = PageContents {
chunks :: [ByteString] chunks :: [ByteString]
} }
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m PageContents pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents)
pageContents font page = undefined pageContents font = runParser page
page :: Parser u PageContents
page = PageContents <$> (graphicState <|> text)
graphicState =
string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q"
where
command =
count 6 argument *> string "cm"
<|> name *> blank *> string "gs"
argument = takeAll regular <* blank