Disable text rendering and font loading from the Page abstraction, this code will have to be moved into a separate Box instance
This commit is contained in:
parent
50ac0692b2
commit
90348c57d6
1 changed files with 13 additions and 15 deletions
|
@ -23,6 +23,7 @@ import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
|||
import Data.Text (Text)
|
||||
import PDF.Box (Box(..))
|
||||
import PDF.CMap (cMap)
|
||||
import PDF.Content (Content(..))
|
||||
import qualified PDF.Content as Content (parse)
|
||||
import PDF.Content.Text (renderText)
|
||||
import PDF.Encoding (encoding)
|
||||
|
@ -43,10 +44,11 @@ import Text.Printf (printf)
|
|||
type CachedFonts = Map ObjectId Font
|
||||
type FontCache m = (MonadState CachedFonts m, PDFContent m)
|
||||
data Page = Page {
|
||||
contents :: [Text]
|
||||
contents :: [Content]
|
||||
, source :: ObjectId
|
||||
}
|
||||
|
||||
{-
|
||||
getFontDictionary :: PDFContent m => Object -> m Dictionary
|
||||
getFontDictionary pageObj =
|
||||
(pageObj >// ["Resources", "Font"] >>= getDictionary)
|
||||
|
@ -80,10 +82,7 @@ loadFonts = foldM addFont Map.empty . Map.toList
|
|||
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
|
||||
flip (Map.insert name) output <$> cache loadFont objectId
|
||||
addFont output _ = return output
|
||||
|
||||
several :: PDFContent m => Object -> m [Object]
|
||||
several (Direct (Array l)) = mapM castObject l
|
||||
several object = return [object]
|
||||
-}
|
||||
|
||||
pagesList :: PDFContent m => m [ObjectId]
|
||||
pagesList = do
|
||||
|
@ -99,20 +98,19 @@ getReferences objects = do
|
|||
Reference (IndirectObjCoordinates {objectId}) -> [objectId]
|
||||
_ -> []
|
||||
|
||||
extractText :: FontCache m => Object -> m [Text]
|
||||
extractText pageObj = do
|
||||
fonts <- loadFonts =<< getFontDictionary pageObj
|
||||
objects <- pageObj >./ "Contents" >>= several
|
||||
concat <$> mapM (loadContent fonts) objects
|
||||
extractContent :: FontCache m => Object -> m [Content]
|
||||
extractContent pageObj = do
|
||||
--fonts <- loadFonts =<< getFontDictionary pageObj
|
||||
pageObj >./ "Contents" >>= several >>= mapM loadContent
|
||||
where
|
||||
loadContent fonts object =
|
||||
r Clear object
|
||||
>>= (either fail return . Content.parse)
|
||||
>>= renderText fonts
|
||||
loadContent object = r Clear object >>= (either fail return . Content.parse)
|
||||
several (Direct (Array l)) = mapM castObject l
|
||||
several object = return [object]
|
||||
-- >>= renderText fonts
|
||||
|
||||
loadPage :: FontCache m => ObjectId -> m Page
|
||||
loadPage source = do
|
||||
contents <- extractText =<< objectById source
|
||||
contents <- extractContent =<< objectById source
|
||||
return $ Page {contents, source}
|
||||
|
||||
data Pages = Pages
|
||||
|
|
Loading…
Reference in a new issue