Don't double extract images from docx.

This fixes a regression that was introduced when `--extract-media`
was generalized to work with any input format.  We were getting
two versions of each image extracted from a docx, one with a hash,
one with the original filename, though only the hash one was used.
This patch restores the original behavior (using the original
filename).

Pointed out in comments on #3674. Thanks to @laperouse.
This commit is contained in:
John MacFarlane 2017-05-18 13:38:19 +02:00
parent 818d5c2f35
commit 0f6458c0c1

View file

@ -345,19 +345,24 @@ withPaths (p:ps) action fp =
catchError (action (p </> fp))
(\_ -> withPaths ps action fp)
-- | Traverse tree, filling media bag.
-- | Traverse tree, filling media bag for any images that
-- aren't already in the media bag.
fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc
fillMediaBag sourceURL d = walkM handleImage d
where handleImage :: PandocMonad m => Inline -> m Inline
handleImage (Image attr lab (src, tit)) = catchError
(do (bs, mt) <- fetchItem sourceURL src
let ext = fromMaybe (takeExtension src)
(mt >>= extensionFromMimeType)
let bs' = BL.fromChunks [bs]
let basename = showDigest $ sha1 bs'
let fname = basename <.> ext
insertMedia fname mt bs'
return $ Image attr lab (fname, tit))
(do mediabag <- getMediaBag
case lookupMedia src mediabag of
Just (_, _) -> return $ Image attr lab (src, tit)
Nothing -> do
(bs, mt) <- downloadOrRead sourceURL src
let ext = fromMaybe (takeExtension src)
(mt >>= extensionFromMimeType)
let bs' = BL.fromChunks [bs]
let basename = showDigest $ sha1 bs'
let fname = basename <.> ext
insertMedia fname mt bs'
return $ Image attr lab (fname, tit))
(\e -> do
case e of
PandocResourceNotFound _ -> do