HTML reader: fix bad handling of empty src attribute in iframe.

- If src is empty, we simply skip the iframe.
- If src is invalid or cannot be fetched, we issue a warning
  and skip instead of failing with an error.
- Closes #7099.
This commit is contained in:
John MacFarlane 2021-02-13 13:06:22 -08:00
parent 6e73273916
commit d84a6041e1
2 changed files with 24 additions and 8 deletions

View file

@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Control.Monad (guard, msum, mzero, unless, void) import Control.Monad (guard, msum, mzero, unless, void)
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError, catchError)
import Control.Monad.Reader (ask, asks, lift, local, runReaderT) import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
import Data.ByteString.Base64 (encode) import Data.ByteString.Base64 (encode)
import Data.Char (isAlphaNum, isLetter) import Data.Char (isAlphaNum, isLetter)
@ -393,11 +393,17 @@ pIframe = try $ do
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src")) tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
pCloses "iframe" <|> eof pCloses "iframe" <|> eof
url <- canonicalizeUrl $ fromAttrib "src" tag url <- canonicalizeUrl $ fromAttrib "src" tag
(bs, _) <- openURL url if T.null url
then ignore $ renderTags' [tag, TagClose "iframe"]
else catchError
(do (bs, _) <- openURL url
let inp = UTF8.toText bs let inp = UTF8.toText bs
opts <- readerOpts <$> getState opts <- readerOpts <$> getState
Pandoc _ contents <- readHtml opts inp Pandoc _ contents <- readHtml opts inp
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
(\e -> do
logMessage $ CouldNotFetchResource url (renderError e)
ignore $ renderTags' [tag, TagClose "iframe"])
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do pRawHtmlBlock = do

View file

@ -2,7 +2,17 @@
% pandoc -f html -t native --verbose % pandoc -f html -t native --verbose
<iframe src=""></iframe> <iframe src=""></iframe>
^D ^D
[INFO] Fetching ... [INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1 []
```
```
% pandoc -f html -t native --verbose
<iframe src="h:invalid@url"></iframe>
^D
[INFO] Fetching h:invalid@url...
[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
InvalidUrlException "h:invalid@url" "Invalid scheme"
[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
[] []
``` ```