99 lines
3.3 KiB
Haskell
99 lines
3.3 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
module PDF.Content (
|
|
Content(..)
|
|
, ContentUnit(..)
|
|
, GraphicContextUnit(..)
|
|
, IndexedInstructions
|
|
, InstructionId
|
|
, TextContext
|
|
, content
|
|
, parse
|
|
) where
|
|
|
|
import Control.Applicative ((<|>))
|
|
import Control.Monad.Reader (asks, runReader)
|
|
import Control.Monad.State (MonadState, evalStateT, gets, modify, runStateT)
|
|
import Data.Attoparsec.ByteString.Char8 (sepBy)
|
|
import Data.ByteString (ByteString)
|
|
import Data.Map (Map, (!))
|
|
import qualified Data.Map as Map (empty, insert, size)
|
|
import PDF.Content.Operator (Instruction, operator)
|
|
import PDF.Object (blank, directObject)
|
|
import PDF.Output (Output(..), line)
|
|
import PDF.Parser (MonadParser, evalParser, string)
|
|
|
|
newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show)
|
|
|
|
type IndexedInstructions = Map InstructionId Instruction
|
|
|
|
data GraphicContextUnit =
|
|
GraphicInstruction InstructionId
|
|
| ContentUnit ContentUnit
|
|
deriving Show
|
|
type TextContext = [InstructionId]
|
|
data ContentUnit =
|
|
GraphicContext [GraphicContextUnit]
|
|
| TextContext TextContext
|
|
deriving Show
|
|
data Content = Content {
|
|
contentUnits :: [ContentUnit]
|
|
, indexedInstructions :: IndexedInstructions
|
|
} deriving Show
|
|
|
|
type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m)
|
|
|
|
register :: MonadState IndexedInstructions m => Instruction -> m InstructionId
|
|
register newInstruction = do
|
|
newInstructionID <- gets (InstructionId . Map.size)
|
|
modify (Map.insert newInstructionID newInstruction)
|
|
return newInstructionID
|
|
|
|
content :: MonadParser m => m Content
|
|
content = do
|
|
(contentUnits, indexedInstructions) <- runInstructionParser
|
|
return $ Content {contentUnits, indexedInstructions}
|
|
where
|
|
runInstructionParser = runStateT (contentUnit `sepBy` blank) Map.empty
|
|
|
|
contentUnit :: InstructionParser m => m ContentUnit
|
|
contentUnit =
|
|
(GraphicContext <$> graphicContext)
|
|
<|> (TextContext <$> textContext)
|
|
where
|
|
graphicContext =
|
|
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
|
|
|
|
graphicContextUnit :: InstructionParser m => m GraphicContextUnit
|
|
graphicContextUnit =
|
|
(GraphicInstruction <$> instruction)
|
|
<|> (ContentUnit <$> contentUnit)
|
|
|
|
instruction :: InstructionParser m => m InstructionId
|
|
instruction = evalStateT stackParser [] >>= register
|
|
where
|
|
stackParser = ((directObject <* blank) >>= push) <|> operator
|
|
push arg = modify (arg:) *> stackParser
|
|
|
|
parse :: ByteString -> Either String Content
|
|
parse = evalParser content ()
|
|
|
|
textContext :: InstructionParser m => m TextContext
|
|
textContext =
|
|
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
|
|
|
|
instance Output Content where
|
|
output (Content {contentUnits, indexedInstructions}) =
|
|
runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
|
|
where
|
|
outputCU (GraphicContext gc) = do
|
|
inside <- mconcat <$> mapM outputGCU gc
|
|
return (line "q" `mappend` inside `mappend` line "Q")
|
|
outputCU (TextContext tc) = do
|
|
inside <- mconcat <$> mapM outputIId tc
|
|
return (line "BT" `mappend` inside `mappend` line "ET")
|
|
outputGCU (GraphicInstruction gi) = outputIId gi
|
|
outputGCU (ContentUnit cu) = outputCU cu
|
|
outputIId instructionId = asks (output . (! instructionId))
|