EPUB Reader: Improved robustness of image extraction
We now maintain the invariant that when fetchImages is called, all images have absolute paths. This patch fixes several bugs relating to this as there are three places where images can be introduced. (1) During the HTML parse (2) As spine elements (3) As a cover image For (1), the paths are corrected by the transformation renameImages For (2) and (3), we need to append the "root" to the path we parse from the spine
This commit is contained in:
parent
40ae8efddc
commit
cfd8c0214c
1 changed files with 9 additions and 7 deletions
|
@ -52,10 +52,12 @@ runEPUB = either error id . runExcept
|
|||
--
|
||||
archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
|
||||
archiveToEPUB (setEPUBOptions -> os) archive = do
|
||||
-- root is path to folder with manifest file in
|
||||
(root, content) <- getManifest archive
|
||||
meta <- parseMeta content
|
||||
(cover, items) <- parseManifest content
|
||||
let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
|
||||
-- No need to collapse here as the image path is from the manifest file
|
||||
let coverDoc = fromMaybe mempty (imageToPandoc . (root </>) <$> cover)
|
||||
spine <- parseSpine items content
|
||||
let escapedSpine = map (escapeURI . takeFileName . fst) spine
|
||||
Pandoc _ bs <-
|
||||
|
@ -68,17 +70,17 @@ archiveToEPUB (setEPUBOptions -> os) archive = do
|
|||
parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
|
||||
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
|
||||
when (readerTrace os) (traceM path)
|
||||
doc <- mimeToReader mime r path
|
||||
doc <- mimeToReader mime (r </> path)
|
||||
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
|
||||
return $ docSpan <> doc
|
||||
mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc
|
||||
mimeToReader "application/xhtml+xml" r path = do
|
||||
fname <- findEntryByPathE (r </> path) archive
|
||||
return $ fixInternalReferences (r </> path) .
|
||||
mimeToReader :: MonadError String m => MIME -> FilePath -> m Pandoc
|
||||
mimeToReader "application/xhtml+xml" (normalise -> path) = do
|
||||
fname <- findEntryByPathE path archive
|
||||
return $ fixInternalReferences path .
|
||||
readHtml os .
|
||||
UTF8.toStringLazy $
|
||||
fromEntry fname
|
||||
mimeToReader s _ path
|
||||
mimeToReader s path
|
||||
| s `elem` imageMimes = return $ imageToPandoc path
|
||||
| otherwise = return $ mempty
|
||||
|
||||
|
|
Loading…
Reference in a new issue