insertMediaBag: ensure we get sane mediaPath for URLs.

Long URLs cannot be treated as mediaPaths, but System.FilePath's
`isRelative` often returns True for them.  So we add a check
for an absolute URL.  We also ensure that extensions are derived
only from the path portion of URLs (previously a following query
was being included).

Closes #7391.
This commit is contained in:
John MacFarlane 2021-06-18 12:06:20 -07:00
parent 961268446c
commit 3fb5499dd6

View file

@ -26,13 +26,14 @@ module Text.Pandoc.MediaBag (
import qualified Data.ByteString.Lazy as BL
import Data.Data (Data)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import Data.Maybe (fromMaybe, isNothing)
import Data.Typeable (Typeable)
import System.FilePath
import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Digest.Pure.SHA (sha1, showDigest)
import Network.URI (URI (..), parseURI)
data MediaItem =
MediaItem
@ -76,14 +77,18 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
, mediaContents = contents
, mediaMimeType = mt }
fp' = canonicalize fp
newpath = if isRelative fp && ".." `notElem` splitPath fp
uri = parseURI fp
newpath = if isRelative fp
&& isNothing uri
&& ".." `notElem` splitPath fp
then T.unpack fp'
else showDigest (sha1 contents) <> "." <> ext
fallback = case takeExtension fp of
".gz" -> getMimeTypeDef $ dropExtension fp
_ -> getMimeTypeDef fp
mt = fromMaybe fallback mbMime
ext = case takeExtension fp of
path = maybe fp uriPath uri
ext = case takeExtension path of
'.':e -> e
_ -> maybe "" T.unpack $ extensionFromMimeType mt