From 0f6458c0c13380969ccac82d54a0e68a3ec76200 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 May 2017 13:38:19 +0200
Subject: [PATCH] Don't double extract images from docx.

This fixes a regression that was introduced when `--extract-media`
was generalized to work with any input format.  We were getting
two versions of each image extracted from a docx, one with a hash,
one with the original filename, though only the hash one was used.
This patch restores the original behavior (using the original
filename).

Pointed out in comments on #3674. Thanks to @laperouse.
---
 src/Text/Pandoc/Class.hs | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 4ef56ec33..8b2adc507 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -345,19 +345,24 @@ withPaths (p:ps) action fp =
   catchError (action (p </> fp))
              (\_ -> withPaths ps action fp)
 
--- | Traverse tree, filling media bag.
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
 fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc
 fillMediaBag sourceURL d = walkM handleImage d
   where handleImage :: PandocMonad m => Inline -> m Inline
         handleImage (Image attr lab (src, tit)) = catchError
-          (do (bs, mt) <- fetchItem sourceURL src
-              let ext = fromMaybe (takeExtension src)
-                          (mt >>= extensionFromMimeType)
-              let bs' = BL.fromChunks [bs]
-              let basename = showDigest $ sha1 bs'
-              let fname = basename <.> ext
-              insertMedia fname mt bs'
-              return $ Image attr lab (fname, tit))
+          (do mediabag <- getMediaBag
+              case lookupMedia src mediabag of
+                Just (_, _) -> return $ Image attr lab (src, tit)
+                Nothing -> do
+                  (bs, mt) <- downloadOrRead sourceURL src
+                  let ext = fromMaybe (takeExtension src)
+                              (mt >>= extensionFromMimeType)
+                  let bs' = BL.fromChunks [bs]
+                  let basename = showDigest $ sha1 bs'
+                  let fname = basename <.> ext
+                  insertMedia fname mt bs'
+                  return $ Image attr lab (fname, tit))
           (\e -> do
               case e of
                 PandocResourceNotFound _ -> do