HTML reader: fix bad handling of empty src attribute in iframe.

- If src is empty, we simply skip the iframe.
- If src is invalid or cannot be fetched, we issue a warning
  and skip instead of failing with an error.
- Closes #7099.
This commit is contained in:
John MacFarlane 2021-02-13 13:06:22 -08:00
parent 6e73273916
commit d84a6041e1
2 changed files with 24 additions and 8 deletions

View file

@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Control.Monad (guard, msum, mzero, unless, void) import Control.Monad (guard, msum, mzero, unless, void)
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError, catchError)
import Control.Monad.Reader (ask, asks, lift, local, runReaderT) import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
import Data.ByteString.Base64 (encode) import Data.ByteString.Base64 (encode)
import Data.Char (isAlphaNum, isLetter) import Data.Char (isAlphaNum, isLetter)
@ -393,11 +393,17 @@ pIframe = try $ do
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src")) tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
pCloses "iframe" <|> eof pCloses "iframe" <|> eof
url <- canonicalizeUrl $ fromAttrib "src" tag url <- canonicalizeUrl $ fromAttrib "src" tag
(bs, _) <- openURL url if T.null url
let inp = UTF8.toText bs then ignore $ renderTags' [tag, TagClose "iframe"]
opts <- readerOpts <$> getState else catchError
Pandoc _ contents <- readHtml opts inp (do (bs, _) <- openURL url
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents let inp = UTF8.toText bs
opts <- readerOpts <$> getState
Pandoc _ contents <- readHtml opts inp
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
(\e -> do
logMessage $ CouldNotFetchResource url (renderError e)
ignore $ renderTags' [tag, TagClose "iframe"])
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do pRawHtmlBlock = do

View file

@ -2,7 +2,17 @@
% pandoc -f html -t native --verbose % pandoc -f html -t native --verbose
<iframe src=""></iframe> <iframe src=""></iframe>
^D ^D
[INFO] Fetching ... [INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1 []
```
```
% pandoc -f html -t native --verbose
<iframe src="h:invalid@url"></iframe>
^D
[INFO] Fetching h:invalid@url...
[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
InvalidUrlException "h:invalid@url" "Invalid scheme"
[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
[] []
``` ```