Disable text rendering and font loading from the Page abstraction, this code will have to be moved into a separate Box instance

This commit is contained in:
Tissevert 2020-03-05 17:40:58 +01:00
parent 50ac0692b2
commit 90348c57d6

View file

@ -23,6 +23,7 @@ import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
import Data.Text (Text)
import PDF.Box (Box(..))
import PDF.CMap (cMap)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Content.Text (renderText)
import PDF.Encoding (encoding)
@ -43,10 +44,11 @@ import Text.Printf (printf)
type CachedFonts = Map ObjectId Font
type FontCache m = (MonadState CachedFonts m, PDFContent m)
data Page = Page {
contents :: [Text]
contents :: [Content]
, source :: ObjectId
}
{-
getFontDictionary :: PDFContent m => Object -> m Dictionary
getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary)
@ -80,10 +82,7 @@ loadFonts = foldM addFont Map.empty . Map.toList
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
several :: PDFContent m => Object -> m [Object]
several (Direct (Array l)) = mapM castObject l
several object = return [object]
-}
pagesList :: PDFContent m => m [ObjectId]
pagesList = do
@ -99,20 +98,19 @@ getReferences objects = do
Reference (IndirectObjCoordinates {objectId}) -> [objectId]
_ -> []
extractText :: FontCache m => Object -> m [Text]
extractText pageObj = do
fonts <- loadFonts =<< getFontDictionary pageObj
objects <- pageObj >./ "Contents" >>= several
concat <$> mapM (loadContent fonts) objects
extractContent :: FontCache m => Object -> m [Content]
extractContent pageObj = do
--fonts <- loadFonts =<< getFontDictionary pageObj
pageObj >./ "Contents" >>= several >>= mapM loadContent
where
loadContent fonts object =
r Clear object
>>= (either fail return . Content.parse)
>>= renderText fonts
loadContent object = r Clear object >>= (either fail return . Content.parse)
several (Direct (Array l)) = mapM castObject l
several object = return [object]
-- >>= renderText fonts
loadPage :: FontCache m => ObjectId -> m Page
loadPage source = do
contents <- extractText =<< objectById source
contents <- extractContent =<< objectById source
return $ Page {contents, source}
data Pages = Pages