diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 8c69849..1828718 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -23,6 +23,8 @@ module PDF.Object ( , integer , line , magicNumber + , name + , regular , structure ) where diff --git a/src/PDF/Text.hs b/src/PDF/Text.hs index 7b2852a..022e628 100644 --- a/src/PDF/Text.hs +++ b/src/PDF/Text.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} module PDF.Text ( CMap , CMappers @@ -7,16 +8,19 @@ module PDF.Text ( , pageContents ) where +import Control.Applicative ((<|>)) import Control.Monad.State (MonadState) import Data.ByteString.Lazy.Char8 (ByteString) import Data.Map (Map) -import PDF.Object (Dictionary) +import PDF.Object (Dictionary, blank, name, regular) import PDF.Output (ObjectId) -import PDF.Parser (Parser) +import PDF.Parser (Parser, count, runParser, sepBy, string, takeAll) type CMappers = Map ObjectId CMap type CMap = Map Int ByteString +data TextOperator = TJ | Tj | Tf | Other + cMap :: ByteString -> CMap cMap = undefined @@ -24,5 +28,18 @@ data PageContents = PageContents { chunks :: [ByteString] } -pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m PageContents -pageContents font page = undefined +pageContents :: MonadState CMappers m => Dictionary -> ByteString -> m (Either String PageContents) +pageContents font = runParser page + +page :: Parser u PageContents +page = PageContents <$> (graphicState <|> text) + +graphicState = + string "q" *> blank *> (command <|> page) `sepBy` blank <* string "Q" + where + command = + count 6 argument *> string "cm" + <|> name *> blank *> string "gs" + argument = takeAll regular <* blank + +