118 lines
4.1 KiB
Haskell
118 lines
4.1 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
{-# LANGUAGE TypeSynonymInstances #-}
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
module PDF.Content (
|
|
Content(..)
|
|
, ContentUnit(..)
|
|
, GraphicContextUnit(..)
|
|
, IdContentUnit
|
|
, IdGraphicContextUnit
|
|
, IdTextContext
|
|
, Instructions(..)
|
|
, TextContext
|
|
, parse
|
|
) where
|
|
|
|
import Control.Applicative ((<|>))
|
|
import Control.Monad.Reader (asks, runReader)
|
|
import Control.Monad.State.Strict (runState, evalStateT, modify)
|
|
import Data.Attoparsec.ByteString.Char8 (sepBy)
|
|
import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser, parseOnly)
|
|
import Data.ByteString (ByteString)
|
|
import Data.Id (Id(..), Indexed, at, empty, register)
|
|
import PDF.Box (Box(..))
|
|
import PDF.Content.Operator (Instruction, operator)
|
|
import PDF.Object (blank, directObject)
|
|
import PDF.Output (Output(..), line)
|
|
import PDF.Parser (string)
|
|
|
|
data Instructions = Instructions
|
|
|
|
data GraphicContextUnit a =
|
|
GraphicInstruction a
|
|
| ContentUnit (ContentUnit a)
|
|
deriving Show
|
|
type TextContext a = [a]
|
|
data ContentUnit a =
|
|
GraphicContext [GraphicContextUnit a]
|
|
| TextContext (TextContext a)
|
|
deriving Show
|
|
data Content = Content {
|
|
contentUnits :: [IdContentUnit]
|
|
, indexedInstructions :: Indexed Instruction
|
|
, firstError :: Maybe String
|
|
} deriving Show
|
|
|
|
type TmpContentUnit = ContentUnit Instruction
|
|
type TmpGraphicContextUnit = GraphicContextUnit Instruction
|
|
type TmpTextContext = TextContext Instruction
|
|
|
|
type IdContentUnit = ContentUnit (Id Instruction)
|
|
type IdGraphicContextUnit = GraphicContextUnit (Id Instruction)
|
|
type IdTextContext = TextContext (Id Instruction)
|
|
|
|
instance Monad m => Box m Instructions Content (Indexed Instruction) where
|
|
r Instructions = return . indexedInstructions
|
|
w Instructions indexedInstructions someContent =
|
|
return $ someContent {indexedInstructions}
|
|
|
|
parse :: ByteString -> Content
|
|
parse input =
|
|
let result = Atto.parseOnly (contentUnit `sepBy` blank) input in
|
|
let (contentUnits, indexedInstructions) = either (const ([], empty)) buildContent result in
|
|
let firstError = either Just (const Nothing) result in
|
|
Content {contentUnits, indexedInstructions, firstError}
|
|
|
|
buildContent :: [TmpContentUnit] -> ([IdContentUnit], Indexed Instruction)
|
|
buildContent instructionContentUnits =
|
|
runState (mapM registerContentUnit instructionContentUnits) empty
|
|
where
|
|
registerContentUnit (GraphicContext gc) =
|
|
GraphicContext <$> (mapM registerGraphicContext gc)
|
|
registerContentUnit (TextContext tc) = TextContext <$> (mapM register tc)
|
|
registerGraphicContext (GraphicInstruction gi) =
|
|
GraphicInstruction <$> (register gi)
|
|
registerGraphicContext (ContentUnit cu) =
|
|
ContentUnit <$> (registerContentUnit cu)
|
|
|
|
contentUnit :: Atto.Parser TmpContentUnit
|
|
contentUnit =
|
|
(GraphicContext <$> graphicContext)
|
|
<|> (TextContext <$> textContext)
|
|
where
|
|
graphicContext =
|
|
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
|
|
|
|
graphicContextUnit :: Atto.Parser TmpGraphicContextUnit
|
|
graphicContextUnit =
|
|
(GraphicInstruction <$> instruction)
|
|
<|> (ContentUnit <$> contentUnit)
|
|
|
|
instruction :: Atto.Parser Instruction
|
|
instruction = evalStateT stackParser []
|
|
where
|
|
stackParser = ((directObject <* blank) >>= push) <|> operator
|
|
push arg = modify (arg:) *> stackParser
|
|
|
|
textContext :: Atto.Parser TmpTextContext
|
|
textContext =
|
|
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
|
|
|
|
instance Output Content where
|
|
output (Content {contentUnits, indexedInstructions}) =
|
|
runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
|
|
where
|
|
outputCU (GraphicContext gc) = do
|
|
inside <- mconcat <$> mapM outputGCU gc
|
|
return (line "q" `mappend` inside `mappend` line "Q")
|
|
outputCU (TextContext tc) = do
|
|
inside <- mconcat <$> mapM outputIId tc
|
|
return (line "BT" `mappend` inside `mappend` line "ET")
|
|
outputGCU (GraphicInstruction gi) = outputIId gi
|
|
outputGCU (ContentUnit cu) = outputCU cu
|
|
outputIId instructionId = asks (output . (`at` instructionId))
|