Disable text rendering and font loading from the Page abstraction, this code will have to be moved into a separate Box instance
This commit is contained in:
parent
50ac0692b2
commit
90348c57d6
1 changed files with 13 additions and 15 deletions
|
@ -23,6 +23,7 @@ import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import PDF.Box (Box(..))
|
import PDF.Box (Box(..))
|
||||||
import PDF.CMap (cMap)
|
import PDF.CMap (cMap)
|
||||||
|
import PDF.Content (Content(..))
|
||||||
import qualified PDF.Content as Content (parse)
|
import qualified PDF.Content as Content (parse)
|
||||||
import PDF.Content.Text (renderText)
|
import PDF.Content.Text (renderText)
|
||||||
import PDF.Encoding (encoding)
|
import PDF.Encoding (encoding)
|
||||||
|
@ -43,10 +44,11 @@ import Text.Printf (printf)
|
||||||
type CachedFonts = Map ObjectId Font
|
type CachedFonts = Map ObjectId Font
|
||||||
type FontCache m = (MonadState CachedFonts m, PDFContent m)
|
type FontCache m = (MonadState CachedFonts m, PDFContent m)
|
||||||
data Page = Page {
|
data Page = Page {
|
||||||
contents :: [Text]
|
contents :: [Content]
|
||||||
, source :: ObjectId
|
, source :: ObjectId
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{-
|
||||||
getFontDictionary :: PDFContent m => Object -> m Dictionary
|
getFontDictionary :: PDFContent m => Object -> m Dictionary
|
||||||
getFontDictionary pageObj =
|
getFontDictionary pageObj =
|
||||||
(pageObj >// ["Resources", "Font"] >>= getDictionary)
|
(pageObj >// ["Resources", "Font"] >>= getDictionary)
|
||||||
|
@ -80,10 +82,7 @@ loadFonts = foldM addFont Map.empty . Map.toList
|
||||||
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
|
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
|
||||||
flip (Map.insert name) output <$> cache loadFont objectId
|
flip (Map.insert name) output <$> cache loadFont objectId
|
||||||
addFont output _ = return output
|
addFont output _ = return output
|
||||||
|
-}
|
||||||
several :: PDFContent m => Object -> m [Object]
|
|
||||||
several (Direct (Array l)) = mapM castObject l
|
|
||||||
several object = return [object]
|
|
||||||
|
|
||||||
pagesList :: PDFContent m => m [ObjectId]
|
pagesList :: PDFContent m => m [ObjectId]
|
||||||
pagesList = do
|
pagesList = do
|
||||||
|
@ -99,20 +98,19 @@ getReferences objects = do
|
||||||
Reference (IndirectObjCoordinates {objectId}) -> [objectId]
|
Reference (IndirectObjCoordinates {objectId}) -> [objectId]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
|
||||||
extractText :: FontCache m => Object -> m [Text]
|
extractContent :: FontCache m => Object -> m [Content]
|
||||||
extractText pageObj = do
|
extractContent pageObj = do
|
||||||
fonts <- loadFonts =<< getFontDictionary pageObj
|
--fonts <- loadFonts =<< getFontDictionary pageObj
|
||||||
objects <- pageObj >./ "Contents" >>= several
|
pageObj >./ "Contents" >>= several >>= mapM loadContent
|
||||||
concat <$> mapM (loadContent fonts) objects
|
|
||||||
where
|
where
|
||||||
loadContent fonts object =
|
loadContent object = r Clear object >>= (either fail return . Content.parse)
|
||||||
r Clear object
|
several (Direct (Array l)) = mapM castObject l
|
||||||
>>= (either fail return . Content.parse)
|
several object = return [object]
|
||||||
>>= renderText fonts
|
-- >>= renderText fonts
|
||||||
|
|
||||||
loadPage :: FontCache m => ObjectId -> m Page
|
loadPage :: FontCache m => ObjectId -> m Page
|
||||||
loadPage source = do
|
loadPage source = do
|
||||||
contents <- extractText =<< objectById source
|
contents <- extractContent =<< objectById source
|
||||||
return $ Page {contents, source}
|
return $ Page {contents, source}
|
||||||
|
|
||||||
data Pages = Pages
|
data Pages = Pages
|
||||||
|
|
Loading…
Reference in a new issue