Ugly commit, breaks everything, still trying to figure a grammar for text
This commit is contained in:
parent
6f3c159ea7
commit
51db57ec67
2 changed files with 23 additions and 4 deletions
|
@ -23,6 +23,8 @@ module PDF.Object (
|
||||||
, integer
|
, integer
|
||||||
, line
|
, line
|
||||||
, magicNumber
|
, magicNumber
|
||||||
|
, name
|
||||||
|
, regular
|
||||||
, structure
|
, structure
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
module PDF.Text (
|
module PDF.Text (
|
||||||
CMap
|
CMap
|
||||||
, CMappers
|
, CMappers
|
||||||
|
@ -7,16 +8,19 @@ module PDF.Text (
|
||||||
, pageContents
|
, pageContents
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Control.Applicative ((<|>))
|
||||||
import Control.Monad.State (MonadState)
|
import Control.Monad.State (MonadState)
|
||||||
import Data.ByteString.Lazy.Char8 (ByteString)
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
||||||
import Data.Map (Map)
|
import Data.Map (Map)
|
||||||
import PDF.Object (Dictionary)
|
import PDF.Object (Dictionary, blank, name, regular)
|
||||||
import PDF.Output (ObjectId)
|
import PDF.Output (ObjectId)
|
||||||
import PDF.Parser (Parser)
|
import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll)
|
||||||
|
|
||||||
type CMappers = Map ObjectId CMap
|
type CMappers = Map ObjectId CMap
|
||||||
type CMap = Map Int ByteString
|
type CMap = Map Int ByteString
|
||||||
|
|
||||||
|
data TextOperator = TJ | Tj | Tf | Other
|
||||||
|
|
||||||
cMap :: ByteString -> CMap
|
cMap :: ByteString -> CMap
|
||||||
cMap = undefined
|
cMap = undefined
|
||||||
|
|
||||||
|
@ -24,5 +28,18 @@ data PageContents = PageContents {
|
||||||
chunks :: [ByteString]
|
chunks :: [ByteString]
|
||||||
}
|
}
|
||||||
|
|
||||||
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m PageContents
|
pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents)
|
||||||
pageContents font page = undefined
|
pageContents font = runParser page
|
||||||
|
|
||||||
|
page :: Parser u PageContents
|
||||||
|
page = PageContents <$> (graphicState <|> text)
|
||||||
|
|
||||||
|
graphicState =
|
||||||
|
string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q"
|
||||||
|
where
|
||||||
|
command =
|
||||||
|
count 6 argument *> string "cm"
|
||||||
|
<|> name *> blank *> string "gs"
|
||||||
|
argument = takeAll regular <* blank
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue