From b0b90aba6260998579a1ee28a657614901865ba1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 7 Nov 2017 12:24:37 -0800
Subject: [PATCH] EPUB writer: fixed EPUB OCF structure.

The structure of the EPUBs was messed up, and #3720 was
improperly implemented.  This commit fixes things.
---
 src/Text/Pandoc/Writers/EPUB.hs | 147 ++++++++++++++++++--------------
 1 file changed, 82 insertions(+), 65 deletions(-)

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 0dcef1d63..23df046d0 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -48,7 +48,7 @@ import qualified Data.Set as Set
 import qualified Data.Text as TS
 import qualified Data.Text.Lazy as TL
 import Network.HTTP (urlEncode)
-import System.FilePath (takeExtension, takeFileName)
+import System.FilePath (takeExtension, takeFileName, makeRelative)
 import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags)
 import Text.Pandoc.Builder (fromList, setMeta)
 import Text.Pandoc.Class (PandocMonad, report)
@@ -81,6 +81,7 @@ data Chapter = Chapter (Maybe [Int]) [Block]
 
 data EPUBState = EPUBState {
         stMediaPaths  :: [(FilePath, (FilePath, Maybe Entry))]
+      , stEpubSubdir  :: String
       }
 
 type E m = StateT EPUBState m
@@ -149,6 +150,20 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
+mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
+mkEntry path content = do
+  epubSubdir <- gets stEpubSubdir
+  let addEpubSubdir :: Entry -> Entry
+      addEpubSubdir e = e{ eRelativePath =
+          (if null epubSubdir
+              then ""
+              else epubSubdir ++ "/") ++ eRelativePath e }
+  epochtime <- floor <$> lift P.getPOSIXTime
+  return $
+       (if path == "mimetype" || "META-INF" `isPrefixOf` path
+           then id
+           else addEpubSubdir) $ toEntry path epochtime content
+
 getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
@@ -366,11 +381,13 @@ writeEPUB :: PandocMonad m
           -> WriterOptions  -- ^ Writer options
           -> Pandoc         -- ^ Document to convert
           -> m B.ByteString
-writeEPUB epubVersion opts doc =
-  let initState = EPUBState { stMediaPaths = [] }
-  in
-    evalStateT (pandocToEPUB epubVersion opts doc)
-      initState
+writeEPUB epubVersion opts doc = do
+  let epubSubdir = writerEpubSubdirectory opts
+  -- sanity check on epubSubdir
+  unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+    throwError $ PandocEpubSubdirectoryError epubSubdir
+  let initState = EPUBState { stMediaPaths = [], stEpubSubdir = epubSubdir }
+  evalStateT (pandocToEPUB epubVersion opts doc) initState
 
 pandocToEPUB :: PandocMonad m
              => EPUBVersion
@@ -378,27 +395,18 @@ pandocToEPUB :: PandocMonad m
              -> Pandoc
              -> E m B.ByteString
 pandocToEPUB version opts doc@(Pandoc meta _) = do
-  let epubSubdir = writerEpubSubdirectory opts
-  -- sanity check on epubSubdir
-  unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
-    throwError $ PandocEpubSubdirectoryError epubSubdir
-  let inSubdir f = if null epubSubdir
-                      then f
-                      else epubSubdir ++ "/" ++ f
-
+  epubSubdir <- gets stEpubSubdir
   let epub3 = version == EPUB3
   let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) .
                       writeHtmlStringForEPUB version o
-  epochtime <- floor <$> lift P.getPOSIXTime
   metadata <- getEPUBMetadata opts meta
-  let mkEntry path content = toEntry path epochtime content
 
   -- stylesheet
   stylesheets <- case epubStylesheets metadata of
                       [] -> (\x -> [B.fromChunks [x]]) <$>
                              P.readDataFile "epub.css"
                       fs -> mapM P.readFileLazy fs
-  let stylesheetEntries = zipWith
+  stylesheetEntries <- zipWithM
         (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs)
         stylesheets [(1 :: Int)..]
 
@@ -406,10 +414,10 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
              : [(x,y) | (x,y) <- writerVariables opts, x /= "css"]
 
   let cssvars useprefix = map (\e -> ("css",
-                               (if useprefix && not (null epubSubdir)
+                               (if useprefix
                                    then "../"
                                    else "")
-                               ++ eRelativePath e))
+                               ++ makeRelative epubSubdir (eRelativePath e)))
                           stylesheetEntries
 
   let opts' = opts{ writerEmailObfuscation = NoObfuscation
@@ -430,18 +438,21 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
                        cpContent <- lift $ writeHtml
                             opts'{ writerVariables =
                                     ("coverpage","true"):
-                                     cssvars False ++ vars }
-                            (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
+                                     cssvars True ++ vars }
+                            (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
                        imgContent <- lift $ P.readFileLazy img
-                       return ( [mkEntry "cover.xhtml" cpContent]
-                              , [mkEntry coverImage imgContent] )
+                       coverEntry <- mkEntry "text/cover.xhtml" cpContent
+                       coverImageEntry <- mkEntry ("media/" ++ coverImage)
+                                             imgContent
+                       return ( [ coverEntry ]
+                              , [ coverImageEntry ] )
 
   -- title page
   tpContent <- lift $ writeHtml opts'{
                                   writerVariables = ("titlepage","true"):
                                   cssvars True ++ vars }
                                (Pandoc meta [])
-  let tpEntry = mkEntry (inSubdir "title_page.xhtml") tpContent
+  tpEntry <- mkEntry "text/title_page.xhtml" tpContent
 
   -- handle pictures
   -- mediaRef <- P.newIORef []
@@ -454,7 +465,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
         when (null xs) $
           report $ CouldNotFetchResource f "glob did not match any font files"
         return xs
-  let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) <$>
+  let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
                         lift (P.readFileLazy f)
   fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
   fontEntries <- mapM mkFontEntry fontFiles
@@ -540,7 +551,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
                  chapters'
 
   let chapToEntry num (Chapter mbnum bs) =
-       mkEntry (inSubdir (showChapter num)) <$>
+        mkEntry ("text/" ++ showChapter num) =<<
         writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum
                        , writerVariables = cssvars True ++ vars }
                  (case bs of
@@ -550,7 +561,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
                                  nullMeta) bs
                      _                   -> Pandoc nullMeta bs)
 
-  chapterEntries <- lift $ zipWithM chapToEntry [1..] chapters
+  chapterEntries <- zipWithM chapToEntry [1..] chapters
 
   -- incredibly inefficient (TODO):
   let containsMathML ent = epub3 &&
@@ -563,24 +574,34 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
 
   -- contents.opf
   let chapterNode ent = unode "item" !
-                           ([("id", toId $ eRelativePath ent),
-                             ("href", eRelativePath ent),
+                           ([("id", toId $ makeRelative epubSubdir
+                                         $ eRelativePath ent),
+                             ("href", makeRelative epubSubdir
+                                      $ eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
                             ++ case props ent of
                                     [] -> []
                                     xs -> [("properties", unwords xs)])
                         $ ()
+
   let chapterRefNode ent = unode "itemref" !
-                             [("idref", toId $ eRelativePath ent)] $ ()
+                             [("idref", toId $ makeRelative epubSubdir
+                                             $ eRelativePath ent)] $ ()
   let pictureNode ent = unode "item" !
-                           [("id", toId $ eRelativePath ent),
-                            ("href", eRelativePath ent),
-                            ("media-type", fromMaybe "application/octet-stream"
+                           [("id", toId $ makeRelative epubSubdir
+                                        $ eRelativePath ent),
+                            ("href", makeRelative epubSubdir
+                                     $ eRelativePath ent),
+                            ("media-type",
+                               fromMaybe "application/octet-stream"
                                $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
-                           [("id", toId $ eRelativePath ent),
-                            ("href", eRelativePath ent),
-                            ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
+                           [("id", toId $ makeRelative epubSubdir
+                                        $ eRelativePath ent),
+                            ("href", makeRelative epubSubdir
+                                     $ eRelativePath ent),
+                            ("media-type", fromMaybe "" $
+                                  getMimeType $ eRelativePath ent)] $ ()
   let plainTitle = case docTitle' meta of
                         [] -> case epubTitle metadata of
                                    []    -> "UNTITLED"
@@ -613,7 +634,9 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
              ] ++
              [ unode "item" ! [("id","style"), ("href",fp)
                               ,("media-type","text/css")] $ () |
-                          fp <- map eRelativePath stylesheetEntries ] ++
+                             fp <- map
+                               (makeRelative epubSubdir . eRelativePath)
+                               stylesheetEntries ] ++
              map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
              (case cpicEntry of
                     []    -> []
@@ -648,7 +671,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
                | isJust (epubCoverImage metadata)
              ]
           ]
-  let contentsEntry = mkEntry "content.opf" contentsData
+  contentsEntry <- mkEntry "content.opf" contentsData
 
   -- toc.ncx
   let secs = hierarchicalize blocks'
@@ -681,12 +704,12 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
       navMapFormatter n tit src subs = unode "navPoint" !
                [("id", "navPoint-" ++ show n)] $
                   [ unode "navLabel" $ unode "text" $ stringify tit
-                  , unode "content" ! [("src", inSubdir src)] $ ()
+                  , unode "content" ! [("src", "text/" ++ src)] $ ()
                   ] ++ subs
 
   let tpNode = unode "navPoint" !  [("id", "navPoint-0")] $
                   [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
-                  , unode "content" ! [("src", inSubdir "title_page.xhtml")]
+                  , unode "content" ! [("src", "text/title_page.xhtml")]
                   $ () ]
 
   navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1
@@ -710,13 +733,13 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
           , unode "navMap" $
               tpNode : navMap
           ]
-  let tocEntry = mkEntry "toc.ncx" tocData
+  tocEntry <- mkEntry "toc.ncx" tocData
 
   let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element
       navXhtmlFormatter n tit src subs = unode "li" !
                                        [("id", "toc-li-" ++ show n)] $
                                             (unode "a" !
-                                                [("href", inSubdir src)]
+                                                [("href", "text/" ++ src)]
                                              $ titElements)
                                             : case subs of
                                                  []    -> []
@@ -766,36 +789,37 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
             (Pandoc (setMeta "title"
                      (walk removeNote $ fromList $ docTitle' meta) nullMeta)
                (navBlocks ++ landmarks))
-  let navEntry = mkEntry "nav.xhtml" navData
+  navEntry <- mkEntry "nav.xhtml" navData
 
   -- mimetype
-  let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip"
+  mimetypeEntry <- mkEntry "mimetype" $
+                        UTF8.fromStringLazy "application/epub+zip"
 
   -- container.xml
   let containerData = UTF8.fromStringLazy $ ppTopElement $
        unode "container" ! [("version","1.0")
               ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
          unode "rootfiles" $
-           unode "rootfile" ! [("full-path", inSubdir "content.opf")
+           unode "rootfile" ! [("full-path",
+                    (if null epubSubdir
+                        then ""
+                        else epubSubdir ++ "/") ++ "content.opf")
                ,("media-type","application/oebps-package+xml")] $ ()
-  let containerEntry = mkEntry "META-INF/container.xml" containerData
+  containerEntry <- mkEntry "META-INF/container.xml" containerData
 
   -- com.apple.ibooks.display-options.xml
   let apple = UTF8.fromStringLazy $ ppTopElement $
         unode "display_options" $
           unode "platform" ! [("name","*")] $
             unode "option" ! [("name","specified-fonts")] $ "true"
-  let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
+  appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
 
-  let addEpubSubdir :: Entry -> Entry
-      addEpubSubdir e = e{ eRelativePath = inSubdir (eRelativePath e) }
   -- construct archive
   let archive = foldr addEntryToArchive emptyArchive $
-                 [mimetypeEntry, containerEntry, appleEntry] ++
-                 map addEpubSubdir
-                 (tpEntry : contentsEntry : tocEntry : navEntry :
-                  (stylesheetEntries ++ picEntries ++ cpicEntry ++
-                   cpgEntry ++ chapterEntries ++ fontEntries))
+                 [mimetypeEntry, containerEntry, appleEntry,
+                  contentsEntry, tocEntry, navEntry, tpEntry] ++
+                  stylesheetEntries ++ picEntries ++ cpicEntry ++
+                  cpgEntry ++ chapterEntries ++ fontEntries
   return $ fromArchive archive
 
 metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
@@ -936,8 +960,7 @@ modifyMediaRef oldsrc = do
                let new = "media/file" ++ show (length media) ++
                           fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
                           (('.':) <$> (mbMime >>= extensionFromMimeType))
-               epochtime <- floor `fmap` lift P.getPOSIXTime
-               let entry = toEntry new epochtime (B.fromChunks . (:[]) $ img)
+               entry <- mkEntry new (B.fromChunks . (:[]) $ img)
                modify $ \st -> st{ stMediaPaths =
                             (oldsrc, (new, Just entry)):media}
                return new)
@@ -959,21 +982,15 @@ transformInline  :: PandocMonad m
                  => WriterOptions
                  -> Inline
                  -> E m Inline
-transformInline opts (Image attr lab (src,tit)) = do
+transformInline _opts (Image attr lab (src,tit)) = do
     newsrc <- modifyMediaRef src
-    let pref = if null (writerEpubSubdirectory opts)
-                  then ""
-                  else "../"
-    return $ Image attr lab (pref ++ newsrc, tit)
+    return $ Image attr lab ("../" ++ newsrc, tit)
 transformInline opts (x@(Math t m))
   | WebTeX url <- writerHTMLMathMethod opts = do
     newsrc <- modifyMediaRef (url ++ urlEncode m)
     let mathclass = if t == DisplayMath then "display" else "inline"
-    let pref = if null (writerEpubSubdirectory opts)
-                  then ""
-                  else "../"
     return $ Span ("",["math",mathclass],[])
-                [Image nullAttr [x] (pref ++ newsrc, "")]
+                [Image nullAttr [x] ("../" ++ newsrc, "")]
 transformInline _opts (RawInline fmt raw)
   | fmt == Format "html" = do
   let tags = parseTags raw