Hufflepdf/src/PDF/Content.hs

83 lines
2.6 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Content (
Content(..)
, ContentUnit(..)
, GraphicContextUnit(..)
, TextContext
, content
, parse
) where
import Control.Applicative ((<|>), many)
import Control.Monad.State (MonadState(..), evalStateT, modify)
import Data.ByteString (ByteString)
import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line)
import PDF.Parser (MonadParser, endOfInput, evalParser, peek, string)
data GraphicContextUnit =
GraphicInstruction Instruction
| ContentUnit ContentUnit
deriving Show
type TextContext = [Instruction]
data ContentUnit =
GraphicContext [GraphicContextUnit]
| TextContext TextContext
deriving Show
newtype Content = Content [ContentUnit] deriving Show
content :: MonadParser m => m Content
content = Content <$> many contentUnit <* blank <* endOfInput
contentUnit :: MonadParser m => m ContentUnit
contentUnit = next >>= handle
where
handle 'q' = GraphicContext <$> graphicContext
handle 'B' = TextContext <$> textContext
handle _ = fail "content unit"
graphicContext :: MonadParser m => m [GraphicContextUnit]
graphicContext = string "q" *> evalStateT graphicContextUnits []
where
graphicContextUnits = next >>= handle
handle 'Q' = string "Q" *> (reverse <$> get)
handle c
| c `elem` ['q', 'B'] =
(ContentUnit <$> contentUnit >>= push) *> graphicContextUnits
| otherwise =
(GraphicInstruction <$> instruction >>= push) *> graphicContextUnits
instruction :: MonadParser m => m Instruction
instruction = evalStateT stackParser []
where
stackParser = ((directObject <* blank >>= push) *> stackParser) <|> operator
next :: MonadParser m => m Char
next = blank *> peek
push :: MonadState [a] m => a -> m ()
push a = modify (a:)
parse :: ByteString -> Either String Content
parse = evalParser content ()
textContext :: MonadParser m => m TextContext
textContext = string "BT" *> evalStateT instructions []
where
instructions = next >>= handle
handle 'E' = string "ET" *> (reverse <$> get)
handle _ = (instruction >>= push) *> instructions
instance Output Content where
output (Content contentUnits) = output contentUnits
instance Output ContentUnit where
output (GraphicContext gc) = line "q" `mappend` output gc `mappend` line "Q"
output (TextContext tc) = line "BT" `mappend` output tc `mappend` line "ET"
instance Output GraphicContextUnit where
output (GraphicInstruction gi) = output gi
output (ContentUnit cu) = output cu