Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.
This commit is contained in:
John MacFarlane 2013-07-18 20:58:14 -07:00
parent 6c2e76ac61
commit 7c980f39bf
5 changed files with 24 additions and 17 deletions

View file

@ -40,7 +40,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
import Text.Pandoc.Shared (renderTags', openURL, readDataFile)
import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
@ -98,7 +98,7 @@ cssURLs userdata d orig =
getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
getItem userdata f =
if isAbsoluteURI f
then openURL f
then openURL f >>= either handleErr return
else do
-- strip off trailing query or fragment part, if relative URL.
-- this is needed for things like cmunrm.eot?#iefix,
@ -110,6 +110,7 @@ getItem userdata f =
exists <- doesFileExist f'
cont <- if exists then B.readFile f' else readDataFile userdata f'
return (cont, mime)
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
getRaw userdata mimetype src = do

View file

@ -95,6 +95,7 @@ import Text.Pandoc.MIME (getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
import Control.Monad (msum, unless)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
@ -586,12 +587,13 @@ readDataFileUTF8 userDir fname =
-- | Fetch an image or other item from the local filesystem or the net.
-- Returns raw content and maybe mime type.
fetchItem :: String -> String -> IO (BS.ByteString, Maybe String)
fetchItem :: String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
fetchItem sourceDir s =
case s of
_ | isAbsoluteURI s -> openURL s
| isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s
| otherwise -> do
| otherwise -> E.try $ do
let mime = case takeExtension s of
".gz" -> getMimeType $ dropExtension s
x -> getMimeType x
@ -600,21 +602,21 @@ fetchItem sourceDir s =
return (cont, mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (BS.ByteString, Maybe String)
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
openURL u
| "data:" `isPrefixOf` u =
let mime = takeWhile (/=',') $ drop 5 u
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
in return (contents, Just mime)
in return $ Right (contents, Just mime)
#ifdef HTTP_CONDUIT
| otherwise = do
| otherwise = E.try $ do
req <- parseUrl u
resp <- withManager $ httpLbs req
return (BS.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else
| otherwise = getBodyAndMimeType `fmap` browse
(do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
| otherwise = E.try $ getBodyAndMimeType `fmap` browse
(do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
setOutHandler $ const (return ())
setAllowRedirects True
request (getRequest' u'))

View file

@ -726,7 +726,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
Just (_,_,_,elt,_) -> return [elt]
Nothing -> do
let sourceDir = writerSourceDirectory opts
res <- liftIO $ E.try $ fetchItem sourceDir src
res <- liftIO $ fetchItem sourceDir src
case res of
Left (_ :: E.SomeException) -> do
liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."

View file

@ -123,10 +123,15 @@ writeEPUB opts doc@(Pandoc meta _) = do
Pandoc _ blocks <- bottomUpM
(transformInline opts' sourceDir picsRef) doc
pics <- readIORef picsRef
let readPicEntry (oldsrc, newsrc) = do
(img,_) <- fetchItem sourceDir oldsrc
return $ toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img
picEntries <- mapM readPicEntry pics
let readPicEntry entries (oldsrc, newsrc) = do
res <- fetchItem sourceDir oldsrc
case res of
Left e -> do
warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
return entries
Right (img,_) -> return $
(toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
picEntries <- foldM readPicEntry [] pics
-- handle fonts
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f

View file

@ -42,7 +42,6 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
import Control.Monad (liftM)
import Control.Monad.Trans (liftIO)
import Text.Pandoc.XML
import Text.Pandoc.Pretty
import qualified Control.Exception as E
@ -114,10 +113,10 @@ writeODT opts doc@(Pandoc meta _) = do
transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
transformPic sourceDir entriesRef (Image lab (src,_)) = do
res <- liftIO $ E.try $ fetchItem sourceDir src
res <- fetchItem sourceDir src
case res of
Left (_ :: E.SomeException) -> do
liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
warn $ "Could not find image `" ++ src ++ "', skipping..."
return $ Emph lab
Right (img, _) -> do
let size = imageSize img