From eb4d76002ca91228fc0eb5e2eb4e1553f1a99888 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Tue, 11 Feb 2020 22:41:46 +0100 Subject: [PATCH] Finish the split of Navigation out of Page, generalize the use of MonadFail with a custom Error monad (~= Either String) --- src/PDF/CMap.hs | 2 -- src/PDF/Content/Text.hs | 22 +++++++----- src/PDF/Encoding.hs | 8 +++-- src/PDF/Object/Navigation.hs | 9 ++--- src/PDF/Pages.hs | 66 ++++++++++++++---------------------- 5 files changed, 47 insertions(+), 60 deletions(-) diff --git a/src/PDF/CMap.hs b/src/PDF/CMap.hs index 8f55b22..a194113 100644 --- a/src/PDF/CMap.hs +++ b/src/PDF/CMap.hs @@ -1,7 +1,5 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE TypeSynonymInstances #-} module PDF.CMap ( CMap , CMappers diff --git a/src/PDF/Content/Text.hs b/src/PDF/Content/Text.hs index 7b6bdb6..65bc664 100644 --- a/src/PDF/Content/Text.hs +++ b/src/PDF/Content/Text.hs @@ -1,12 +1,15 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} module PDF.Content.Text ( format , renderText ) where import Control.Monad (foldM) -import Control.Monad.Reader (ReaderT, asks, runReaderT) -import Control.Monad.State (MonadState(..), StateT, evalStateT) +import Control.Monad.Fail (MonadFail(..)) +import Control.Monad.Reader (MonadReader(..), asks, runReaderT) +import Control.Monad.State (MonadState(..), evalStateT) import Data.ByteString.Char8 (pack) import Data.Map ((!)) import Data.Text (Text) @@ -15,31 +18,32 @@ import PDF.Content.Operator (Instruction, Operator(..)) import PDF.Content.Operator.Text (Operator(..)) import PDF.Font (Font, FontSet, emptyFont) import PDF.Object (DirectObject(..), StringObject(..), toByteString) +import Prelude hiding (fail) -type RenderingContext = ReaderT FontSet (Either String) -type TextRenderingContext = StateT Font RenderingContext +type TextContent m = (MonadReader FontSet m, MonadFail m) +type FontContext m = (MonadState Font m, TextContent m) -decodeString :: StringObject -> TextRenderingContext Text +decodeString :: FontContext m => StringObject -> m Text decodeString input = do font <- get either fail return . font $ toByteString input -renderText :: FontSet -> Content -> Either String [Text] +renderText :: MonadFail m => FontSet -> Content -> m [Text] renderText fontSet (Content contentUnits) = runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet -renderContentUnit :: ContentUnit -> RenderingContext [Text] +renderContentUnit :: TextContent m => ContentUnit -> m [Text] renderContentUnit (GraphicContext graphicContextUnits) = concat <$> mapM renderGraphicContextUnit graphicContextUnits renderContentUnit (TextContext instructions) = evalStateT (concat <$> mapM renderInstruction instructions) emptyFont -renderGraphicContextUnit :: GraphicContextUnit -> RenderingContext [Text] +renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text] renderGraphicContextUnit (GraphicInstruction _) = return [] renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit -renderInstruction :: Instruction -> StateT Font RenderingContext [Text] +renderInstruction :: FontContext m => Instruction -> m [Text] renderInstruction (Text Tf, [NameObject fontName, _]) = asks (! fontName) >>= put >> return [] diff --git a/src/PDF/Encoding.hs b/src/PDF/Encoding.hs index f0c0a31..8d14e6a 100644 --- a/src/PDF/Encoding.hs +++ b/src/PDF/Encoding.hs @@ -2,9 +2,11 @@ module PDF.Encoding ( encoding ) where +import Control.Monad.Fail (MonadFail(..)) import PDF.Encoding.MacRoman (macRomanEncoding) import PDF.Font (Font) +import Prelude hiding (fail) -encoding :: String -> Either String Font -encoding "MacRomanEncoding" = Right macRomanEncoding -encoding s = Left $ "Unknown encoding " ++ s +encoding :: MonadFail m => String -> m Font +encoding "MacRomanEncoding" = return macRomanEncoding +encoding s = fail $ "Unknown encoding " ++ s diff --git a/src/PDF/Object/Navigation.hs b/src/PDF/Object/Navigation.hs index b95b3cc..96d8230 100644 --- a/src/PDF/Object/Navigation.hs +++ b/src/PDF/Object/Navigation.hs @@ -43,16 +43,13 @@ dictionaryById objectId = objectById objectId >>= castDictionary expected :: (MonadFail m, Show a) => String -> a -> m b expected name = fail . printf "Not a %s: %s" name . show -lookupField :: String -> Dictionary -> Either String DirectObject -lookupField key aDictionary = - maybe (Left errorMessage) Right (Map.lookup (Name key) aDictionary) +getField :: MonadFail m => String -> Dictionary -> m DirectObject +getField key aDictionary = + maybe (fail errorMessage) return (Map.lookup (Name key) aDictionary) where errorMessage = printf "Key %s not found in dictionary %s" key (show aDictionary) -getField :: MonadFail m => String -> Dictionary -> m DirectObject -getField key = either fail return . lookupField key - follow :: PDFContent m => DirectObject -> m Object follow directObject = castObjectId directObject >>= objectById diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index 28f6733..e24281e 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -1,18 +1,17 @@ {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} module PDF.Pages ( Page(..) , get , getAll ) where -import Control.Applicative ((<|>)) -import Control.Monad (foldM) -import Control.Monad.Except (Except, runExcept) +import Control.Applicative (Alternative(..), (<|>)) +import Control.Monad (MonadPlus(..), foldM) import Control.Monad.Fail (MonadFail(..)) -import Control.Monad.RWS (RWST(..), ask, evalRWST, lift, modify) +import Control.Monad.RWS (RWST(..), evalRWST, modify) import qualified Control.Monad.RWS as RWS (get) -import Data.ByteString (ByteString) -import Data.Map (Map, (!)) +import Data.Map (Map) import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) import Data.Text (Text) import PDF.CMap (cMap) @@ -22,7 +21,7 @@ import PDF.Encoding (encoding) import PDF.Font (Font, FontSet) import PDF.Object ( Content(..), Dictionary, DirectObject(..), IndirectObjCoordinates(..) - , Object(..), Name(..), Structure(..) + , Name(..) ,) import PDF.Object.Navigation ( (//), dictionaryById, getDictionary, getField, follow, openStream, origin @@ -32,12 +31,18 @@ import Prelude hiding (fail) import Text.Printf (printf) type CachedFonts = Map ObjectId Font -type T = RWST Content () CachedFonts (Except String) +newtype Error a = Error { + runError :: Either String a + } deriving (Alternative, Functor, Applicative, Monad, MonadPlus) +type T = RWST Content () CachedFonts Error data Page = Page { contents :: [Text] , source :: ObjectId } +instance MonadFail Error where + fail = Error . Left + getFontDictionary :: Dictionary -> T Dictionary getFontDictionary pageDict = ((pageDict // ["Resources", "Font"]) >>= getDictionary) @@ -60,11 +65,13 @@ loadFont objectId = dictionaryById objectId >>= tryMappings <|> (getField "Encoding" dictionary >>= loadEncoding) <|> (fail $ unknownFormat (show objectId) (show dictionary)) unknownFormat = printf "Unknown font format for object #%s : %s" + loadCMap :: Dictionary -> T Font loadCMap dictionary = getField "ToUnicode" dictionary >>= follow >>= openStream >>= cMap - loadEncoding (NameObject (Name name)) = return $ encoding name + loadEncoding :: DirectObject -> T Font + loadEncoding (NameObject (Name name)) = encoding name loadEncoding directObject = - fail $ "Encoding must be a name, not that : %s" $ show directObject + fail $ printf "Encoding must be a name, not that : %s" $ show directObject loadFonts :: Dictionary -> T FontSet loadFonts = foldM addFont Map.empty . Map.toList @@ -74,34 +81,13 @@ loadFonts = foldM addFont Map.empty . Map.toList flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output -getObject :: ObjectId -> T Object -getObject objectId = do - content <- ask - return (objects content ! objectId) - -key :: String -> Dictionary -> Either String DirectObject -key keyName dictionary = - maybe (Left errorMessage) Right (Map.lookup (Name keyName) dictionary) - where - errorMessage = - printf "Key %s not found in dictionary %s" keyName (show dictionary) - -many :: DirectObject -> [DirectObject] -many (Array l) = l -many directObject = [directObject] - -{- -dict :: Object -> Either String Dictionary -dict (Direct (Dictionary dictionary)) = Right dictionary -dict obj = expected "dictionary" obj - -dictObject :: String -> Dictionary -> T Dictionary -dictObject keyName dictionary = key keyName dictionary //= follow \\= dict --} +several :: DirectObject -> [DirectObject] +several (Array l) = l +several directObject = [directObject] pagesList :: T [ObjectId] pagesList = do - pages <- origin >>= (// ["Root", "Pages"]) >>= getDictionary + pages <- (origin :: T Dictionary) >>= (// ["Root", "Pages"]) >>= getDictionary case Map.lookup (Name "Kids") pages of Just (Array kids) -> return $ getReferences kids _ -> return [] @@ -116,14 +102,14 @@ getReferences objects = do extractText :: Dictionary -> T [Text] extractText pageDict = do fonts <- loadFonts =<< getFontDictionary pageDict - let objects = ((many <$> (key "Contents" pageDict)) :: Either String [DirectObject]) - concat <$> (objects //= (mapM $ loadContent fonts)) + objects <- several <$> getField "Contents" pageDict + concat <$> mapM (loadContent fonts) objects where loadContent :: FontSet -> DirectObject -> T [Text] loadContent fonts directObject = follow directObject >>= openStream - >>= either fail return . Content.parse + >>= (either fail return . Content.parse) >>= renderText fonts loadPage :: ObjectId -> T Page @@ -132,7 +118,7 @@ loadPage source = do return $ Page {contents, source} getAll :: Content -> Either String (Map Int Page) -getAll content = runExcept $ fst <$> evalRWST getPages content Map.empty +getAll content = runError $ fst <$> evalRWST getPages content Map.empty where numbered = Map.fromList . zip [1..] getPages = numbered <$> (mapM loadPage =<< pagesList) @@ -140,7 +126,7 @@ getAll content = runExcept $ fst <$> evalRWST getPages content Map.empty get :: Content -> Int -> Either String Page get content pageNumber | pageNumber < 1 = Left "Pages start at 1" - | otherwise = runExcept $ fst <$> evalRWST getPage content Map.empty + | otherwise = runError $ fst <$> evalRWST getPage content Map.empty where firstPage [] = fail "Page is out of bounds" firstPage (p:_) = loadPage p