diff --git a/src/PDF/Content.hs b/src/PDF/Content.hs index c9e4a7a..7ebca2f 100644 --- a/src/PDF/Content.hs +++ b/src/PDF/Content.hs @@ -1,37 +1,64 @@ +{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} module PDF.Content ( Content(..) , ContentUnit(..) , GraphicContextUnit(..) + , IndexedInstructions + , InstructionId , TextContext , content , parse ) where import Control.Applicative ((<|>)) -import Control.Monad.State (evalStateT, modify) +import Control.Monad.Reader (asks, runReader) +import Control.Monad.State (MonadState, evalStateT, gets, modify, runStateT) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) +import Data.Map (Map, (!)) +import qualified Data.Map as Map (empty, insert, size) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) import PDF.Output (Output(..), line) -import PDF.Parser (MonadParser, (), evalParser, string) +import PDF.Parser (MonadParser, evalParser, string) + +newtype InstructionId = InstructionId Int deriving (Eq, Ord, Show) + +type IndexedInstructions = Map InstructionId Instruction data GraphicContextUnit = - GraphicInstruction Instruction + GraphicInstruction InstructionId | ContentUnit ContentUnit deriving Show -type TextContext = [Instruction] +type TextContext = [InstructionId] data ContentUnit = GraphicContext [GraphicContextUnit] | TextContext TextContext deriving Show -newtype Content = Content [ContentUnit] deriving Show +data Content = Content { + contentUnits :: [ContentUnit] + , indexedInstructions :: IndexedInstructions + } deriving Show + +type InstructionParser m = (MonadParser m, MonadState IndexedInstructions m) + +register :: MonadState IndexedInstructions m => Instruction -> m InstructionId +register newInstruction = do + newInstructionID <- gets (InstructionId . Map.size) + modify (Map.insert newInstructionID newInstruction) + return newInstructionID content :: MonadParser m => m Content -content = Content <$> contentUnit `sepBy` blank "content" +content = do + (contentUnits, indexedInstructions) <- runInstructionParser + return $ Content {contentUnits, indexedInstructions} + where + runInstructionParser = runStateT (contentUnit `sepBy` blank) Map.empty -contentUnit :: MonadParser m => m ContentUnit +contentUnit :: InstructionParser m => m ContentUnit contentUnit = (GraphicContext <$> graphicContext) <|> (TextContext <$> textContext) @@ -39,13 +66,13 @@ contentUnit = graphicContext = string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q" -graphicContextUnit :: MonadParser m => m GraphicContextUnit +graphicContextUnit :: InstructionParser m => m GraphicContextUnit graphicContextUnit = (GraphicInstruction <$> instruction) <|> (ContentUnit <$> contentUnit) -instruction :: MonadParser m => m Instruction -instruction = evalStateT stackParser [] +instruction :: InstructionParser m => m InstructionId +instruction = evalStateT stackParser [] >>= register where stackParser = ((directObject <* blank) >>= push) <|> operator push arg = modify (arg:) *> stackParser @@ -53,17 +80,20 @@ instruction = evalStateT stackParser [] parse :: ByteString -> Either String Content parse = evalParser content () -textContext :: MonadParser m => m TextContext +textContext :: InstructionParser m => m TextContext textContext = string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET" instance Output Content where - output (Content contentUnits) = output contentUnits - -instance Output ContentUnit where - output (GraphicContext gc) = line "q" `mappend` output gc `mappend` line "Q" - output (TextContext tc) = line "BT" `mappend` output tc `mappend` line "ET" - -instance Output GraphicContextUnit where - output (GraphicInstruction gi) = output gi - output (ContentUnit cu) = output cu + output (Content {contentUnits, indexedInstructions}) = + runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions + where + outputCU (GraphicContext gc) = do + inside <- mconcat <$> mapM outputGCU gc + return (line "q" `mappend` inside `mappend` line "Q") + outputCU (TextContext tc) = do + inside <- mconcat <$> mapM outputIId tc + return (line "BT" `mappend` inside `mappend` line "ET") + outputGCU (GraphicInstruction gi) = outputIId gi + outputGCU (ContentUnit cu) = outputCU cu + outputIId instructionId = asks (output . (! instructionId)) diff --git a/src/PDF/Content/Text.hs b/src/PDF/Content/Text.hs index 65bc664..403f386 100644 --- a/src/PDF/Content/Text.hs +++ b/src/PDF/Content/Text.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} @@ -13,14 +14,23 @@ import Control.Monad.State (MonadState(..), evalStateT) import Data.ByteString.Char8 (pack) import Data.Map ((!)) import Data.Text (Text) -import PDF.Content (Content(..), ContentUnit(..), GraphicContextUnit(..)) +import PDF.Content ( + Content, ContentUnit(..), IndexedInstructions, InstructionId + , GraphicContextUnit(..), contentUnits + ) +import qualified PDF.Content as Content (Content(..)) import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font, FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) import Prelude hiding (fail) -type TextContent m = (MonadReader FontSet m, MonadFail m) +data ROContext = ROContext { + indexedInstructions :: IndexedInstructions + , fontSet :: FontSet + } + +type TextContent m = (MonadReader ROContext m, MonadFail m) type FontContext m = (MonadState Font m, TextContent m) decodeString :: FontContext m => StringObject -> m Text @@ -29,23 +39,29 @@ decodeString input = do either fail return . font $ toByteString input renderText :: MonadFail m => FontSet -> Content -> m [Text] -renderText fontSet (Content contentUnits) = - runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet +renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) = + runReaderT (concat <$> mapM renderContentUnit contentUnits) roContext + where + roContext = ROContext {indexedInstructions, fontSet} renderContentUnit :: TextContent m => ContentUnit -> m [Text] renderContentUnit (GraphicContext graphicContextUnits) = concat <$> mapM renderGraphicContextUnit graphicContextUnits -renderContentUnit (TextContext instructions) = - evalStateT (concat <$> mapM renderInstruction instructions) emptyFont +renderContentUnit (TextContext instructionIds) = + evalStateT (concat <$> mapM renderInstructionId instructionIds) emptyFont renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text] renderGraphicContextUnit (GraphicInstruction _) = return [] renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit +renderInstructionId :: FontContext m => InstructionId -> m [Text] +renderInstructionId instructionId = + asks ((! instructionId) . indexedInstructions) >>= renderInstruction + renderInstruction :: FontContext m => Instruction -> m [Text] renderInstruction (Text Tf, [NameObject fontName, _]) = - asks (! fontName) >>= put >> return [] + asks ((! fontName) . fontSet) >>= put >> return [] renderInstruction (Text Tstar, []) = return ["\n"]