HTML reader: fix bad handling of empty src attribute in iframe.
- If src is empty, we simply skip the iframe. - If src is invalid or cannot be fetched, we issue a warning and skip instead of failing with an error. - Closes #7099.
This commit is contained in:
parent
6e73273916
commit
d84a6041e1
2 changed files with 24 additions and 8 deletions
|
@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
|
|||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (guard, msum, mzero, unless, void)
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad.Except (throwError, catchError)
|
||||
import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
|
||||
import Data.ByteString.Base64 (encode)
|
||||
import Data.Char (isAlphaNum, isLetter)
|
||||
|
@ -393,11 +393,17 @@ pIframe = try $ do
|
|||
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
|
||||
pCloses "iframe" <|> eof
|
||||
url <- canonicalizeUrl $ fromAttrib "src" tag
|
||||
(bs, _) <- openURL url
|
||||
if T.null url
|
||||
then ignore $ renderTags' [tag, TagClose "iframe"]
|
||||
else catchError
|
||||
(do (bs, _) <- openURL url
|
||||
let inp = UTF8.toText bs
|
||||
opts <- readerOpts <$> getState
|
||||
Pandoc _ contents <- readHtml opts inp
|
||||
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
|
||||
return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
|
||||
(\e -> do
|
||||
logMessage $ CouldNotFetchResource url (renderError e)
|
||||
ignore $ renderTags' [tag, TagClose "iframe"])
|
||||
|
||||
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
|
||||
pRawHtmlBlock = do
|
||||
|
|
|
@ -2,7 +2,17 @@
|
|||
% pandoc -f html -t native --verbose
|
||||
<iframe src=""></iframe>
|
||||
^D
|
||||
[INFO] Fetching ...
|
||||
[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1
|
||||
[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
|
||||
[]
|
||||
```
|
||||
|
||||
```
|
||||
% pandoc -f html -t native --verbose
|
||||
<iframe src="h:invalid@url"></iframe>
|
||||
^D
|
||||
[INFO] Fetching h:invalid@url...
|
||||
[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
|
||||
InvalidUrlException "h:invalid@url" "Invalid scheme"
|
||||
[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
|
||||
[]
|
||||
```
|
||||
|
|
Loading…
Reference in a new issue