Hufflepdf/src/PDF/Content.hs

100 lines
3.3 KiB
Haskell
Raw Normal View History

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content (
Content(..)
, ContentUnit(..)
, GraphicContextUnit(..)
, TextContext
, parse
) where
import Control.Applicative ((<|>))
import Control.Monad.Fail (MonadFail)
import Control.Monad.Reader (asks, runReader)
import Control.Monad.State (evalStateT, gets, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy)
import Data.ByteString (ByteString)
2020-03-15 15:13:00 +01:00
import Data.Id (Id(..), Indexed, at, empty, insert, size)
import PDF.Box (Box(..))
2020-02-11 17:26:47 +01:00
import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject)
2020-02-11 17:26:47 +01:00
import PDF.Output (Output(..), line)
import PDF.Parser (Parser, runParser, string)
data Instructions = Instructions
data GraphicContextUnit =
GraphicInstruction (Id Instruction)
| ContentUnit ContentUnit
deriving Show
type TextContext = [Id Instruction]
data ContentUnit =
GraphicContext [GraphicContextUnit]
| TextContext TextContext
deriving Show
data Content = Content {
contentUnits :: [ContentUnit]
, indexedInstructions :: Indexed Instruction
} deriving Show
type InstructionParser = Parser (Indexed Instruction)
instance Monad m => Box m Instructions Content (Indexed Instruction) where
r Instructions = return . indexedInstructions
w Instructions indexedInstructions someContent =
return $ someContent {indexedInstructions}
register :: Instruction -> InstructionParser (Id Instruction)
register newInstruction = do
2020-03-15 15:13:00 +01:00
newInstructionID <- gets (Id . size)
modify (insert newInstructionID newInstruction)
return newInstructionID
parse :: MonadFail m => ByteString -> m Content
parse =
2020-03-15 15:13:00 +01:00
either fail (return . uncurry Content) . runParser contentUnits empty
where
contentUnits = contentUnit `sepBy` blank
contentUnit :: InstructionParser ContentUnit
contentUnit =
(GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext)
where
graphicContext =
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
graphicContextUnit :: InstructionParser GraphicContextUnit
graphicContextUnit =
(GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit)
instruction :: InstructionParser (Id Instruction)
instruction = evalStateT stackParser [] >>= register
where
stackParser = ((directObject <* blank) >>= push) <|> operator
push arg = modify (arg:) *> stackParser
textContext :: InstructionParser TextContext
textContext =
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
2020-02-11 17:26:47 +01:00
instance Output Content where
output (Content {contentUnits, indexedInstructions}) =
runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
where
outputCU (GraphicContext gc) = do
inside <- mconcat <$> mapM outputGCU gc
return (line "q" `mappend` inside `mappend` line "Q")
outputCU (TextContext tc) = do
inside <- mconcat <$> mapM outputIId tc
return (line "BT" `mappend` inside `mappend` line "ET")
outputGCU (GraphicInstruction gi) = outputIId gi
outputGCU (ContentUnit cu) = outputCU cu
2020-03-15 15:13:00 +01:00
outputIId instructionId = asks (output . (`at` instructionId))