From 847167804aada52bc1af32920c83582b426ef9eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 May 2016 09:38:52 -0700 Subject: [PATCH] EPUB reader: unescape URIs in spine. This should fix #2924. Testing on the epub that caused the problem originally would be welcome. --- src/Text/Pandoc/Readers/EPUB.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 144ba9ca2..149a7c00b 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -14,6 +14,7 @@ import Text.Pandoc.Walk (walk, query) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) +import Network.URI (unEscapeString) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) import Text.Pandoc.Compat.Monoid ((<>)) @@ -74,14 +75,15 @@ archiveToEPUB os archive = do let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc - mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do + mimeToReader "application/xhtml+xml" (unEscapeString -> root) + (unEscapeString -> path) = do fname <- findEntryByPathE (root path) archive html <- either throwError return . readHtml os' . UTF8.toStringLazy $ fromEntry fname return $ fixInternalReferences path html - mimeToReader s _ path + mimeToReader s _ (unEscapeString -> path) | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty