Finish the split of Navigation out of Page, generalize the use of MonadFail with a custom Error monad (~= Either String)

This commit is contained in:
Tissevert 2020-02-11 22:41:46 +01:00
parent af994cb50c
commit eb4d76002c
5 changed files with 47 additions and 60 deletions

View File

@ -1,7 +1,5 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeSynonymInstances #-}
module PDF.CMap (
CMap
, CMappers

View File

@ -1,12 +1,15 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Content.Text (
format
, renderText
) where
import Control.Monad (foldM)
import Control.Monad.Reader (ReaderT, asks, runReaderT)
import Control.Monad.State (MonadState(..), StateT, evalStateT)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), asks, runReaderT)
import Control.Monad.State (MonadState(..), evalStateT)
import Data.ByteString.Char8 (pack)
import Data.Map ((!))
import Data.Text (Text)
@ -15,31 +18,32 @@ import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font, FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
type RenderingContext = ReaderT FontSet (Either String)
type TextRenderingContext = StateT Font RenderingContext
type TextContent m = (MonadReader FontSet m, MonadFail m)
type FontContext m = (MonadState Font m, TextContent m)
decodeString :: StringObject -> TextRenderingContext Text
decodeString :: FontContext m => StringObject -> m Text
decodeString input = do
font <- get
either fail return . font $ toByteString input
renderText :: FontSet -> Content -> Either String [Text]
renderText :: MonadFail m => FontSet -> Content -> m [Text]
renderText fontSet (Content contentUnits) =
runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet
renderContentUnit :: ContentUnit -> RenderingContext [Text]
renderContentUnit :: TextContent m => ContentUnit -> m [Text]
renderContentUnit (GraphicContext graphicContextUnits) =
concat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructions) =
evalStateT (concat <$> mapM renderInstruction instructions) emptyFont
renderGraphicContextUnit :: GraphicContextUnit -> RenderingContext [Text]
renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text]
renderGraphicContextUnit (GraphicInstruction _) = return []
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
renderInstruction :: Instruction -> StateT Font RenderingContext [Text]
renderInstruction :: FontContext m => Instruction -> m [Text]
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks (! fontName) >>= put >> return []

View File

@ -2,9 +2,11 @@ module PDF.Encoding (
encoding
) where
import Control.Monad.Fail (MonadFail(..))
import PDF.Encoding.MacRoman (macRomanEncoding)
import PDF.Font (Font)
import Prelude hiding (fail)
encoding :: String -> Either String Font
encoding "MacRomanEncoding" = Right macRomanEncoding
encoding s = Left $ "Unknown encoding " ++ s
encoding :: MonadFail m => String -> m Font
encoding "MacRomanEncoding" = return macRomanEncoding
encoding s = fail $ "Unknown encoding " ++ s

View File

@ -43,16 +43,13 @@ dictionaryById objectId = objectById objectId >>= castDictionary
expected :: (MonadFail m, Show a) => String -> a -> m b
expected name = fail . printf "Not a %s: %s" name . show
lookupField :: String -> Dictionary -> Either String DirectObject
lookupField key aDictionary =
maybe (Left errorMessage) Right (Map.lookup (Name key) aDictionary)
getField :: MonadFail m => String -> Dictionary -> m DirectObject
getField key aDictionary =
maybe (fail errorMessage) return (Map.lookup (Name key) aDictionary)
where
errorMessage =
printf "Key %s not found in dictionary %s" key (show aDictionary)
getField :: MonadFail m => String -> Dictionary -> m DirectObject
getField key = either fail return . lookupField key
follow :: PDFContent m => DirectObject -> m Object
follow directObject = castObjectId directObject >>= objectById

View File

@ -1,18 +1,17 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module PDF.Pages (
Page(..)
, get
, getAll
) where
import Control.Applicative ((<|>))
import Control.Monad (foldM)
import Control.Monad.Except (Except, runExcept)
import Control.Applicative (Alternative(..), (<|>))
import Control.Monad (MonadPlus(..), foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.RWS (RWST(..), ask, evalRWST, lift, modify)
import Control.Monad.RWS (RWST(..), evalRWST, modify)
import qualified Control.Monad.RWS as RWS (get)
import Data.ByteString (ByteString)
import Data.Map (Map, (!))
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.Text (Text)
import PDF.CMap (cMap)
@ -22,7 +21,7 @@ import PDF.Encoding (encoding)
import PDF.Font (Font, FontSet)
import PDF.Object (
Content(..), Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Object(..), Name(..), Structure(..)
, Name(..)
,)
import PDF.Object.Navigation (
(//), dictionaryById, getDictionary, getField, follow, openStream, origin
@ -32,12 +31,18 @@ import Prelude hiding (fail)
import Text.Printf (printf)
type CachedFonts = Map ObjectId Font
type T = RWST Content () CachedFonts (Except String)
newtype Error a = Error {
runError :: Either String a
} deriving (Alternative, Functor, Applicative, Monad, MonadPlus)
type T = RWST Content () CachedFonts Error
data Page = Page {
contents :: [Text]
, source :: ObjectId
}
instance MonadFail Error where
fail = Error . Left
getFontDictionary :: Dictionary -> T Dictionary
getFontDictionary pageDict =
((pageDict // ["Resources", "Font"]) >>= getDictionary)
@ -60,11 +65,13 @@ loadFont objectId = dictionaryById objectId >>= tryMappings
<|> (getField "Encoding" dictionary >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show dictionary))
unknownFormat = printf "Unknown font format for object #%s : %s"
loadCMap :: Dictionary -> T Font
loadCMap dictionary =
getField "ToUnicode" dictionary >>= follow >>= openStream >>= cMap
loadEncoding (NameObject (Name name)) = return $ encoding name
loadEncoding :: DirectObject -> T Font
loadEncoding (NameObject (Name name)) = encoding name
loadEncoding directObject =
fail $ "Encoding must be a name, not that : %s" $ show directObject
fail $ printf "Encoding must be a name, not that : %s" $ show directObject
loadFonts :: Dictionary -> T FontSet
loadFonts = foldM addFont Map.empty . Map.toList
@ -74,34 +81,13 @@ loadFonts = foldM addFont Map.empty . Map.toList
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
getObject :: ObjectId -> T Object
getObject objectId = do
content <- ask
return (objects content ! objectId)
key :: String -> Dictionary -> Either String DirectObject
key keyName dictionary =
maybe (Left errorMessage) Right (Map.lookup (Name keyName) dictionary)
where
errorMessage =
printf "Key %s not found in dictionary %s" keyName (show dictionary)
many :: DirectObject -> [DirectObject]
many (Array l) = l
many directObject = [directObject]
{-
dict :: Object -> Either String Dictionary
dict (Direct (Dictionary dictionary)) = Right dictionary
dict obj = expected "dictionary" obj
dictObject :: String -> Dictionary -> T Dictionary
dictObject keyName dictionary = key keyName dictionary //= follow \\= dict
-}
several :: DirectObject -> [DirectObject]
several (Array l) = l
several directObject = [directObject]
pagesList :: T [ObjectId]
pagesList = do
pages <- origin >>= (// ["Root", "Pages"]) >>= getDictionary
pages <- (origin :: T Dictionary) >>= (// ["Root", "Pages"]) >>= getDictionary
case Map.lookup (Name "Kids") pages of
Just (Array kids) -> return $ getReferences kids
_ -> return []
@ -116,14 +102,14 @@ getReferences objects = do
extractText :: Dictionary -> T [Text]
extractText pageDict = do
fonts <- loadFonts =<< getFontDictionary pageDict
let objects = ((many <$> (key "Contents" pageDict)) :: Either String [DirectObject])
concat <$> (objects //= (mapM $ loadContent fonts))
objects <- several <$> getField "Contents" pageDict
concat <$> mapM (loadContent fonts) objects
where
loadContent :: FontSet -> DirectObject -> T [Text]
loadContent fonts directObject =
follow directObject
>>= openStream
>>= either fail return . Content.parse
>>= (either fail return . Content.parse)
>>= renderText fonts
loadPage :: ObjectId -> T Page
@ -132,7 +118,7 @@ loadPage source = do
return $ Page {contents, source}
getAll :: Content -> Either String (Map Int Page)
getAll content = runExcept $ fst <$> evalRWST getPages content Map.empty
getAll content = runError $ fst <$> evalRWST getPages content Map.empty
where
numbered = Map.fromList . zip [1..]
getPages = numbered <$> (mapM loadPage =<< pagesList)
@ -140,7 +126,7 @@ getAll content = runExcept $ fst <$> evalRWST getPages content Map.empty
get :: Content -> Int -> Either String Page
get content pageNumber
| pageNumber < 1 = Left "Pages start at 1"
| otherwise = runExcept $ fst <$> evalRWST getPage content Map.empty
| otherwise = runError $ fst <$> evalRWST getPage content Map.empty
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p