diff --git a/src/PDF/Pages.hs b/src/PDF/Pages.hs index 6701fa0..92a76d2 100755 --- a/src/PDF/Pages.hs +++ b/src/PDF/Pages.hs @@ -23,6 +23,7 @@ import qualified Data.Map as Map (empty, fromList, insert, lookup, toList) import Data.Text (Text) import PDF.Box (Box(..)) import PDF.CMap (cMap) +import PDF.Content (Content(..)) import qualified PDF.Content as Content (parse) import PDF.Content.Text (renderText) import PDF.Encoding (encoding) @@ -43,10 +44,11 @@ import Text.Printf (printf) type CachedFonts = Map ObjectId Font type FontCache m = (MonadState CachedFonts m, PDFContent m) data Page = Page { - contents :: [Text] + contents :: [Content] , source :: ObjectId } +{- getFontDictionary :: PDFContent m => Object -> m Dictionary getFontDictionary pageObj = (pageObj >// ["Resources", "Font"] >>= getDictionary) @@ -80,10 +82,7 @@ loadFonts = foldM addFont Map.empty . Map.toList addFont output (name, Reference (IndirectObjCoordinates {objectId})) = flip (Map.insert name) output <$> cache loadFont objectId addFont output _ = return output - -several :: PDFContent m => Object -> m [Object] -several (Direct (Array l)) = mapM castObject l -several object = return [object] +-} pagesList :: PDFContent m => m [ObjectId] pagesList = do @@ -99,20 +98,19 @@ getReferences objects = do Reference (IndirectObjCoordinates {objectId}) -> [objectId] _ -> [] -extractText :: FontCache m => Object -> m [Text] -extractText pageObj = do - fonts <- loadFonts =<< getFontDictionary pageObj - objects <- pageObj >./ "Contents" >>= several - concat <$> mapM (loadContent fonts) objects +extractContent :: FontCache m => Object -> m [Content] +extractContent pageObj = do + --fonts <- loadFonts =<< getFontDictionary pageObj + pageObj >./ "Contents" >>= several >>= mapM loadContent where - loadContent fonts object = - r Clear object - >>= (either fail return . Content.parse) - >>= renderText fonts + loadContent object = r Clear object >>= (either fail return . Content.parse) + several (Direct (Array l)) = mapM castObject l + several object = return [object] + -- >>= renderText fonts loadPage :: FontCache m => ObjectId -> m Page loadPage source = do - contents <- extractText =<< objectById source + contents <- extractContent =<< objectById source return $ Page {contents, source} data Pages = Pages