diff --git a/MANUAL.txt b/MANUAL.txt index b3a1f95e2..ef569433a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -675,12 +675,12 @@ header when requesting a document from a URL: : Extract images and other media contained in or linked from the source document to the path *DIR*, creating it if necessary, and adjust the images references in the document - so they point to the extracted files. If the source format is - a binary container (docx, epub, or odt), the media is - extracted from the container and the original - filenames are used. Otherwise the media is read from the - file system or downloaded, and new filenames are constructed - based on SHA1 hashes of the contents. + so they point to the extracted files. Media are downloaded, + read from the file system, or extracted from a binary + container (e.g. docx), as needed. The original file paths + are used if they are relative paths not containing `..`. + Otherwise filenames are constructed from the SHA1 hash of + the contents. `--abbreviations=`*FILE* diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 6df39d4d0..169074860 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -200,31 +200,32 @@ alertIndent (l:ls) = do extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc extractMedia dir d = do media <- getMediaBag - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - mapM_ (writeMedia dir media) fps - return $ walk (adjustImagePath dir fps) d + let items = mediaItems media + if null items + then return d + else do + mapM_ (writeMedia dir) items + return $ walk (adjustImagePath dir media) d -- | Write the contents of a media bag to a path. writeMedia :: (PandocMonad m, MonadIO m) - => FilePath -> MediaBag -> FilePath + => FilePath + -> (FilePath, MimeType, BL.ByteString) -> m () -writeMedia dir mediabag subpath = do - let mbcontents = lookupMedia subpath mediabag - case mbcontents of - Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just item -> do - -- we normalize to get proper path separators for the platform - let fullpath = dir </> normalise (mediaPath item) - liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath $ mediaContents item +writeMedia dir (fp, _mt, bs) = do + -- we normalize to get proper path separators for the platform + let fullpath = normalise $ dir </> fp + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + logIOError $ BL.writeFile fullpath bs -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; -- returns the element unchanged otherwise. -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | unpack src `elem` paths - = Image attr lab (pack (normalise $ dir </> unpack src), tit) +adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline +adjustImagePath dir mediabag (Image attr lab (src, tit)) = + case lookupMedia (T.unpack src) mediabag of + Nothing -> Image attr lab (src, tit) + Just item -> + let fullpath = dir </> mediaPath item + in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 4eb80df29..439aec071 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag let fp = T.unpack src - src' <- T.pack <$> case lookupMedia fp mediabag of - Just item -> return $ mediaPath item - Nothing -> do - (bs, mt) <- fetchItem src - insertMedia fp mt (BL.fromStrict bs) - mediabag' <- getMediaBag - case lookupMedia fp mediabag' of - Just item -> return $ mediaPath item - Nothing -> throwError $ PandocSomeError $ - src <> " not successfully inserted into MediaBag" - return $ Image attr lab (src', tit)) + case lookupMedia fp mediabag of + Just _ -> return () + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + return $ Image attr lab (src, tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a65f315fc..06fba5632 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -71,16 +71,21 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) mediaItem mediamap) - where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> - "." <> ext + MediaBag (M.insert fp' mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = newpath , mediaContents = contents , mediaMimeType = mt } + fp' = canonicalize fp + newpath = if isRelative fp && ".." `notElem` splitPath fp + then T.unpack fp' + else showDigest (sha1 contents) <> "." <> ext fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp mt = fromMaybe fallback mbMime - ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + ext = case takeExtension fp of + '.':e -> e + _ -> maybe "" T.unpack $ extensionFromMimeType mt -- | Lookup a media item in a 'MediaBag', returning mime type and contents. @@ -92,13 +97,11 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] -mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp item -> - ((T.unpack fp, mediaMimeType item, - fromIntegral (BL.length (mediaContents item))):)) [] mediamap +mediaDirectory mediabag = + map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs))) + (mediaItems mediabag) mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp item -> - ((T.unpack fp, mediaMimeType item, mediaContents item):)) - [] mediamap + map (\item -> (mediaPath item, mediaMimeType item, mediaContents item)) + (M.elems mediamap)