Hufflepdf/src/PDF/Content.hs

118 lines
4.1 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content (
Content(..)
, ContentUnit(..)
, GraphicContextUnit(..)
, IdContentUnit
, IdGraphicContextUnit
, IdTextContext
, Instructions(..)
, TextContext
, parse
) where
import Control.Applicative ((<|>))
import Control.Monad.Reader (asks, runReader)
import Control.Monad.State.Strict (runState, evalStateT, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy)
import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser, parseOnly)
import Data.ByteString (ByteString)
import Data.Id (Id(..), Indexed, at, empty, register)
import PDF.Box (Box(..))
import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line)
import PDF.Parser (string)
data Instructions = Instructions
data GraphicContextUnit a =
GraphicInstruction a
| ContentUnit (ContentUnit a)
deriving Show
type TextContext a = [a]
data ContentUnit a =
GraphicContext [GraphicContextUnit a]
| TextContext (TextContext a)
deriving Show
data Content = Content {
contentUnits :: [IdContentUnit]
, indexedInstructions :: Indexed Instruction
, firstError :: Maybe String
} deriving Show
type TmpContentUnit = ContentUnit Instruction
type TmpGraphicContextUnit = GraphicContextUnit Instruction
type TmpTextContext = TextContext Instruction
type IdContentUnit = ContentUnit (Id Instruction)
type IdGraphicContextUnit = GraphicContextUnit (Id Instruction)
type IdTextContext = TextContext (Id Instruction)
instance Monad m => Box m Instructions Content (Indexed Instruction) where
r Instructions = return . indexedInstructions
w Instructions indexedInstructions someContent =
return $ someContent {indexedInstructions}
parse :: ByteString -> Content
parse input =
let result = Atto.parseOnly (contentUnit `sepBy` blank) input in
let (contentUnits, indexedInstructions) = either (const ([], empty)) buildContent result in
let firstError = either Just (const Nothing) result in
Content {contentUnits, indexedInstructions, firstError}
buildContent :: [TmpContentUnit] -> ([IdContentUnit], Indexed Instruction)
buildContent instructionContentUnits =
runState (mapM registerContentUnit instructionContentUnits) empty
where
registerContentUnit (GraphicContext gc) =
GraphicContext <$> (mapM registerGraphicContext gc)
registerContentUnit (TextContext tc) = TextContext <$> (mapM register tc)
registerGraphicContext (GraphicInstruction gi) =
GraphicInstruction <$> (register gi)
registerGraphicContext (ContentUnit cu) =
ContentUnit <$> (registerContentUnit cu)
contentUnit :: Atto.Parser TmpContentUnit
contentUnit =
(GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext)
where
graphicContext =
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
graphicContextUnit :: Atto.Parser TmpGraphicContextUnit
graphicContextUnit =
(GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit)
instruction :: Atto.Parser Instruction
instruction = evalStateT stackParser []
where
stackParser = ((directObject <* blank) >>= push) <|> operator
push arg = modify (arg:) *> stackParser
textContext :: Atto.Parser TmpTextContext
textContext =
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
instance Output Content where
output (Content {contentUnits, indexedInstructions}) =
runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
where
outputCU (GraphicContext gc) = do
inside <- mconcat <$> mapM outputGCU gc
return (line "q" `mappend` inside `mappend` line "Q")
outputCU (TextContext tc) = do
inside <- mconcat <$> mapM outputIId tc
return (line "BT" `mappend` inside `mappend` line "ET")
outputGCU (GraphicInstruction gi) = outputIId gi
outputGCU (ContentUnit cu) = outputCU cu
outputIId instructionId = asks (output . (`at` instructionId))