Finish the split of Navigation out of Page, generalize the use of MonadFail with a custom Error monad (~= Either String)
This commit is contained in:
parent
af994cb50c
commit
eb4d76002c
5 changed files with 47 additions and 60 deletions
|
@ -1,7 +1,5 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE TypeSynonymInstances #-}
|
||||
module PDF.CMap (
|
||||
CMap
|
||||
, CMappers
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ConstraintKinds #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
module PDF.Content.Text (
|
||||
format
|
||||
, renderText
|
||||
) where
|
||||
|
||||
import Control.Monad (foldM)
|
||||
import Control.Monad.Reader (ReaderT, asks, runReaderT)
|
||||
import Control.Monad.State (MonadState(..), StateT, evalStateT)
|
||||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Control.Monad.Reader (MonadReader(..), asks, runReaderT)
|
||||
import Control.Monad.State (MonadState(..), evalStateT)
|
||||
import Data.ByteString.Char8 (pack)
|
||||
import Data.Map ((!))
|
||||
import Data.Text (Text)
|
||||
|
@ -15,31 +18,32 @@ import PDF.Content.Operator (Instruction, Operator(..))
|
|||
import PDF.Content.Operator.Text (Operator(..))
|
||||
import PDF.Font (Font, FontSet, emptyFont)
|
||||
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
|
||||
import Prelude hiding (fail)
|
||||
|
||||
type RenderingContext = ReaderT FontSet (Either String)
|
||||
type TextRenderingContext = StateT Font RenderingContext
|
||||
type TextContent m = (MonadReader FontSet m, MonadFail m)
|
||||
type FontContext m = (MonadState Font m, TextContent m)
|
||||
|
||||
decodeString :: StringObject -> TextRenderingContext Text
|
||||
decodeString :: FontContext m => StringObject -> m Text
|
||||
decodeString input = do
|
||||
font <- get
|
||||
either fail return . font $ toByteString input
|
||||
|
||||
renderText :: FontSet -> Content -> Either String [Text]
|
||||
renderText :: MonadFail m => FontSet -> Content -> m [Text]
|
||||
renderText fontSet (Content contentUnits) =
|
||||
runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet
|
||||
|
||||
renderContentUnit :: ContentUnit -> RenderingContext [Text]
|
||||
renderContentUnit :: TextContent m => ContentUnit -> m [Text]
|
||||
renderContentUnit (GraphicContext graphicContextUnits) =
|
||||
concat <$> mapM renderGraphicContextUnit graphicContextUnits
|
||||
renderContentUnit (TextContext instructions) =
|
||||
evalStateT (concat <$> mapM renderInstruction instructions) emptyFont
|
||||
|
||||
renderGraphicContextUnit :: GraphicContextUnit -> RenderingContext [Text]
|
||||
renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text]
|
||||
renderGraphicContextUnit (GraphicInstruction _) = return []
|
||||
renderGraphicContextUnit (ContentUnit contentUnit) =
|
||||
renderContentUnit contentUnit
|
||||
|
||||
renderInstruction :: Instruction -> StateT Font RenderingContext [Text]
|
||||
renderInstruction :: FontContext m => Instruction -> m [Text]
|
||||
renderInstruction (Text Tf, [NameObject fontName, _]) =
|
||||
asks (! fontName) >>= put >> return []
|
||||
|
||||
|
|
|
@ -2,9 +2,11 @@ module PDF.Encoding (
|
|||
encoding
|
||||
) where
|
||||
|
||||
import Control.Monad.Fail (MonadFail(..))
|
||||
import PDF.Encoding.MacRoman (macRomanEncoding)
|
||||
import PDF.Font (Font)
|
||||
import Prelude hiding (fail)
|
||||
|
||||
encoding :: String -> Either String Font
|
||||
encoding "MacRomanEncoding" = Right macRomanEncoding
|
||||
encoding s = Left $ "Unknown encoding " ++ s
|
||||
encoding :: MonadFail m => String -> m Font
|
||||
encoding "MacRomanEncoding" = return macRomanEncoding
|
||||
encoding s = fail $ "Unknown encoding " ++ s
|
||||
|
|
|
@ -43,16 +43,13 @@ dictionaryById objectId = objectById objectId >>= castDictionary
|
|||
expected :: (MonadFail m, Show a) => String -> a -> m b
|
||||
expected name = fail . printf "Not a %s: %s" name . show
|
||||
|
||||
lookupField :: String -> Dictionary -> Either String DirectObject
|
||||
lookupField key aDictionary =
|
||||
maybe (Left errorMessage) Right (Map.lookup (Name key) aDictionary)
|
||||
getField :: MonadFail m => String -> Dictionary -> m DirectObject
|
||||
getField key aDictionary =
|
||||
maybe (fail errorMessage) return (Map.lookup (Name key) aDictionary)
|
||||
where
|
||||
errorMessage =
|
||||
printf "Key %s not found in dictionary %s" key (show aDictionary)
|
||||
|
||||
getField :: MonadFail m => String -> Dictionary -> m DirectObject
|
||||
getField key = either fail return . lookupField key
|
||||
|
||||
follow :: PDFContent m => DirectObject -> m Object
|
||||
follow directObject = castObjectId directObject >>= objectById
|
||||
|
||||
|
|
|
@ -1,18 +1,17 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
module PDF.Pages (
|
||||
Page(..)
|
||||
, get
|
||||
, getAll
|
||||
) where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (foldM)
|
||||
import Control.Monad.Except (Except, runExcept)
|
||||
import Control.Applicative (Alternative(..), (<|>))
|
||||
import Control.Monad (MonadPlus(..), foldM)
|
||||
import Control.Monad.Fail (MonadFail(..))
|
||||
import Control.Monad.RWS (RWST(..), ask, evalRWST, lift, modify)
|
||||
import Control.Monad.RWS (RWST(..), evalRWST, modify)
|
||||
import qualified Control.Monad.RWS as RWS (get)
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Map (Map, (!))
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
||||
import Data.Text (Text)
|
||||
import PDF.CMap (cMap)
|
||||
|
@ -22,7 +21,7 @@ import PDF.Encoding (encoding)
|
|||
import PDF.Font (Font, FontSet)
|
||||
import PDF.Object (
|
||||
Content(..), Dictionary, DirectObject(..), IndirectObjCoordinates(..)
|
||||
, Object(..), Name(..), Structure(..)
|
||||
, Name(..)
|
||||
,)
|
||||
import PDF.Object.Navigation (
|
||||
(//), dictionaryById, getDictionary, getField, follow, openStream, origin
|
||||
|
@ -32,12 +31,18 @@ import Prelude hiding (fail)
|
|||
import Text.Printf (printf)
|
||||
|
||||
type CachedFonts = Map ObjectId Font
|
||||
type T = RWST Content () CachedFonts (Except String)
|
||||
newtype Error a = Error {
|
||||
runError :: Either String a
|
||||
} deriving (Alternative, Functor, Applicative, Monad, MonadPlus)
|
||||
type T = RWST Content () CachedFonts Error
|
||||
data Page = Page {
|
||||
contents :: [Text]
|
||||
, source :: ObjectId
|
||||
}
|
||||
|
||||
instance MonadFail Error where
|
||||
fail = Error . Left
|
||||
|
||||
getFontDictionary :: Dictionary -> T Dictionary
|
||||
getFontDictionary pageDict =
|
||||
((pageDict // ["Resources", "Font"]) >>= getDictionary)
|
||||
|
@ -60,11 +65,13 @@ loadFont objectId = dictionaryById objectId >>= tryMappings
|
|||
<|> (getField "Encoding" dictionary >>= loadEncoding)
|
||||
<|> (fail $ unknownFormat (show objectId) (show dictionary))
|
||||
unknownFormat = printf "Unknown font format for object #%s : %s"
|
||||
loadCMap :: Dictionary -> T Font
|
||||
loadCMap dictionary =
|
||||
getField "ToUnicode" dictionary >>= follow >>= openStream >>= cMap
|
||||
loadEncoding (NameObject (Name name)) = return $ encoding name
|
||||
loadEncoding :: DirectObject -> T Font
|
||||
loadEncoding (NameObject (Name name)) = encoding name
|
||||
loadEncoding directObject =
|
||||
fail $ "Encoding must be a name, not that : %s" $ show directObject
|
||||
fail $ printf "Encoding must be a name, not that : %s" $ show directObject
|
||||
|
||||
loadFonts :: Dictionary -> T FontSet
|
||||
loadFonts = foldM addFont Map.empty . Map.toList
|
||||
|
@ -74,34 +81,13 @@ loadFonts = foldM addFont Map.empty . Map.toList
|
|||
flip (Map.insert name) output <$> cache loadFont objectId
|
||||
addFont output _ = return output
|
||||
|
||||
getObject :: ObjectId -> T Object
|
||||
getObject objectId = do
|
||||
content <- ask
|
||||
return (objects content ! objectId)
|
||||
|
||||
key :: String -> Dictionary -> Either String DirectObject
|
||||
key keyName dictionary =
|
||||
maybe (Left errorMessage) Right (Map.lookup (Name keyName) dictionary)
|
||||
where
|
||||
errorMessage =
|
||||
printf "Key %s not found in dictionary %s" keyName (show dictionary)
|
||||
|
||||
many :: DirectObject -> [DirectObject]
|
||||
many (Array l) = l
|
||||
many directObject = [directObject]
|
||||
|
||||
{-
|
||||
dict :: Object -> Either String Dictionary
|
||||
dict (Direct (Dictionary dictionary)) = Right dictionary
|
||||
dict obj = expected "dictionary" obj
|
||||
|
||||
dictObject :: String -> Dictionary -> T Dictionary
|
||||
dictObject keyName dictionary = key keyName dictionary //= follow \\= dict
|
||||
-}
|
||||
several :: DirectObject -> [DirectObject]
|
||||
several (Array l) = l
|
||||
several directObject = [directObject]
|
||||
|
||||
pagesList :: T [ObjectId]
|
||||
pagesList = do
|
||||
pages <- origin >>= (// ["Root", "Pages"]) >>= getDictionary
|
||||
pages <- (origin :: T Dictionary) >>= (// ["Root", "Pages"]) >>= getDictionary
|
||||
case Map.lookup (Name "Kids") pages of
|
||||
Just (Array kids) -> return $ getReferences kids
|
||||
_ -> return []
|
||||
|
@ -116,14 +102,14 @@ getReferences objects = do
|
|||
extractText :: Dictionary -> T [Text]
|
||||
extractText pageDict = do
|
||||
fonts <- loadFonts =<< getFontDictionary pageDict
|
||||
let objects = ((many <$> (key "Contents" pageDict)) :: Either String [DirectObject])
|
||||
concat <$> (objects //= (mapM $ loadContent fonts))
|
||||
objects <- several <$> getField "Contents" pageDict
|
||||
concat <$> mapM (loadContent fonts) objects
|
||||
where
|
||||
loadContent :: FontSet -> DirectObject -> T [Text]
|
||||
loadContent fonts directObject =
|
||||
follow directObject
|
||||
>>= openStream
|
||||
>>= either fail return . Content.parse
|
||||
>>= (either fail return . Content.parse)
|
||||
>>= renderText fonts
|
||||
|
||||
loadPage :: ObjectId -> T Page
|
||||
|
@ -132,7 +118,7 @@ loadPage source = do
|
|||
return $ Page {contents, source}
|
||||
|
||||
getAll :: Content -> Either String (Map Int Page)
|
||||
getAll content = runExcept $ fst <$> evalRWST getPages content Map.empty
|
||||
getAll content = runError $ fst <$> evalRWST getPages content Map.empty
|
||||
where
|
||||
numbered = Map.fromList . zip [1..]
|
||||
getPages = numbered <$> (mapM loadPage =<< pagesList)
|
||||
|
@ -140,7 +126,7 @@ getAll content = runExcept $ fst <$> evalRWST getPages content Map.empty
|
|||
get :: Content -> Int -> Either String Page
|
||||
get content pageNumber
|
||||
| pageNumber < 1 = Left "Pages start at 1"
|
||||
| otherwise = runExcept $ fst <$> evalRWST getPage content Map.empty
|
||||
| otherwise = runError $ fst <$> evalRWST getPage content Map.empty
|
||||
where
|
||||
firstPage [] = fail "Page is out of bounds"
|
||||
firstPage (p:_) = loadPage p
|
||||
|
|
Loading…
Reference in a new issue