Shared: fetchItem improvements.

* More consistent logic:  absolute URIs are fetched from the net;
  other things are treated as relative URIs if sourceURL is a Just,
  otherwise as file paths.
* We escape characters that are not allowed in URIs before trying
  to parse them (e.g. '|', which often occurs in the wild).
* When treating relative paths as local file paths, we drop
  any fragment or query.  This is useful e.g. when you've downloaded
  web fonts locally, but your source still contains the original
  relative URLs.

Together with the previous commit, this should close #1477.
This commit is contained in:
John MacFarlane 2014-08-02 16:09:17 -07:00
parent ce8922437d
commit 1d137fbed6

View file

@ -101,7 +101,7 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
import Data.List ( find, isPrefixOf, intercalate ) import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
unEscapeString, parseURIReference ) unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set import qualified Data.Set as Set
import System.Directory import System.Directory
import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.MIME (getMimeType)
@ -766,21 +766,22 @@ readDataFileUTF8 userDir fname =
-- Returns raw content and maybe mime type. -- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String)) -> IO (Either E.SomeException (BS.ByteString, Maybe String))
fetchItem sourceURL s fetchItem sourceURL s =
| isURI s = openURL s case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
| otherwise = (_, s') | isURI s' -> openURL s'
case sourceURL >>= parseURIReference of (Just u, s') -> -- try fetching from relative path at source
Just u -> case parseURIReference s of case parseURIReference s' of
Just s' -> openURL $ show $ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
s' `nonStrictRelativeTo` u Nothing -> openURL s' -- will throw error
Nothing -> openURL $ show u ++ "/" ++ s (Nothing, _) -> E.try readLocalFile -- get from local file system
Nothing -> E.try readLocalFile
where readLocalFile = do where readLocalFile = do
let mime = case takeExtension s of let mime = case takeExtension s of
".gz" -> getMimeType $ dropExtension s ".gz" -> getMimeType $ dropExtension s
x -> getMimeType x x -> getMimeType x
cont <- BS.readFile $ unEscapeString s cont <- BS.readFile $ unEscapeString $ dropFragmentAndQuery s
return (cont, mime) return (cont, mime)
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
ensureEscaped = escapeURIString isAllowedInURI
-- | Like 'fetchItem', but also looks for items in a 'MediaBag'. -- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
fetchItem' :: MediaBag -> Maybe String -> String fetchItem' :: MediaBag -> Maybe String -> String