From 3fb5499dd638dae7156fba63ba5c1522bed5e46d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 18 Jun 2021 12:06:20 -0700 Subject: [PATCH] insertMediaBag: ensure we get sane mediaPath for URLs. Long URLs cannot be treated as mediaPaths, but System.FilePath's `isRelative` often returns True for them. So we add a check for an absolute URL. We also ensure that extensions are derived only from the path portion of URLs (previously a following query was being included). Closes #7391. --- src/Text/Pandoc/MediaBag.hs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 06fba5632..098e484ee 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -26,13 +26,14 @@ module Text.Pandoc.MediaBag ( import qualified Data.ByteString.Lazy as BL import Data.Data (Data) import qualified Data.Map as M -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isNothing) import Data.Typeable (Typeable) import System.FilePath import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType) import Data.Text (Text) import qualified Data.Text as T import Data.Digest.Pure.SHA (sha1, showDigest) +import Network.URI (URI (..), parseURI) data MediaItem = MediaItem @@ -76,16 +77,20 @@ insertMedia fp mbMime contents (MediaBag mediamap) = , mediaContents = contents , mediaMimeType = mt } fp' = canonicalize fp - newpath = if isRelative fp && ".." `notElem` splitPath fp + uri = parseURI fp + newpath = if isRelative fp + && isNothing uri + && ".." `notElem` splitPath fp then T.unpack fp' else showDigest (sha1 contents) <> "." <> ext fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp mt = fromMaybe fallback mbMime - ext = case takeExtension fp of - '.':e -> e - _ -> maybe "" T.unpack $ extensionFromMimeType mt + path = maybe fp uriPath uri + ext = case takeExtension path of + '.':e -> e + _ -> maybe "" T.unpack $ extensionFromMimeType mt -- | Lookup a media item in a 'MediaBag', returning mime type and contents.