From ef4efa5373a419edbb99355808ddc63d35ddef20 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 19 Aug 2021 10:49:20 -0700
Subject: [PATCH] Improve docx reader's robustness in extracting images.

The docx reader made a couple assumptions about how docx
containers were laid out that were not always true, with
the result that some images in documents did not get
found/extracted.

Closes #7511.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index dbb16a821..eb048ab14 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -507,9 +507,7 @@ archiveToRelationships archive docXmlPath =
 
 filePathIsMedia :: FilePath -> Bool
 filePathIsMedia fp =
-  let (dir, _) = splitFileName fp
-  in
-   (dir == "word/media/")
+  "media" `elem` splitPath (takeDirectory fp)
 
 lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
 lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
@@ -774,8 +772,11 @@ expandDrawingId s = do
   target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
   case target of
     Just filepath -> do
-      bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
-      case bytes of
+      media <- asks envMedia
+      let filepath' = case filepath of
+                        ('/':rest) -> rest
+                        _ -> "word/" ++ filepath
+      case lookup filepath' media of
         Just bs -> return (filepath, bs)
         Nothing -> throwError DocxError
     Nothing -> throwError DocxError