HTML reader: fix bad handling of empty src attribute in iframe.

- If src is empty, we simply skip the iframe.
- If src is invalid or cannot be fetched, we issue a warning
  and skip instead of failing with an error.
- Closes #7099.
This commit is contained in:
John MacFarlane 2021-02-13 13:06:22 -08:00
parent 6e73273916
commit d84a6041e1
2 changed files with 24 additions and 8 deletions

View file

@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
import Control.Applicative ((<|>))
import Control.Monad (guard, msum, mzero, unless, void)
import Control.Monad.Except (throwError)
import Control.Monad.Except (throwError, catchError)
import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
import Data.ByteString.Base64 (encode)
import Data.Char (isAlphaNum, isLetter)
@ -393,11 +393,17 @@ pIframe = try $ do
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
pCloses "iframe" <|> eof
url <- canonicalizeUrl $ fromAttrib "src" tag
(bs, _) <- openURL url
if T.null url
then ignore $ renderTags' [tag, TagClose "iframe"]
else catchError
(do (bs, _) <- openURL url
let inp = UTF8.toText bs
opts <- readerOpts <$> getState
Pandoc _ contents <- readHtml opts inp
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
(\e -> do
logMessage $ CouldNotFetchResource url (renderError e)
ignore $ renderTags' [tag, TagClose "iframe"])
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do

View file

@ -2,7 +2,17 @@
% pandoc -f html -t native --verbose
<iframe src=""></iframe>
^D
[INFO] Fetching ...
[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1
[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
[]
```
```
% pandoc -f html -t native --verbose
<iframe src="h:invalid@url"></iframe>
^D
[INFO] Fetching h:invalid@url...
[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
InvalidUrlException "h:invalid@url" "Invalid scheme"
[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
[]
```