HTML Reader: be more forgiving about figcaption

fixes #4183
This commit is contained in:
mb21 2017-12-21 09:56:14 +01:00
parent e3c1449ae6
commit 9b54b94612
2 changed files with 39 additions and 4 deletions

View file

@ -74,8 +74,8 @@ import Text.Pandoc.Options (
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces,
safeRead, underlineSpan)
import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
extractSpaces, safeRead, underlineSpan)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
@ -588,8 +588,11 @@ pFigure = try $ do
skipMany pBlank
let pImg = (\x -> (Just x, Nothing)) <$>
(pOptInTag "p" pImage <* skipMany pBlank)
pCapt = (\x -> (Nothing, Just x)) <$>
(pInTags "figcaption" inline <* skipMany pBlank)
pCapt = (\x -> (Nothing, Just x)) <$> do
skipMany pBlank
bs <- pInTags "figcaption" block
skipMany pBlank
return $ blocksToInlines' $ B.toList bs
pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
res <- many (pImg <|> pCapt <|> pSkip)
let mbimg = msum $ map fst res

32
test/command/4183.md Normal file
View file

@ -0,0 +1,32 @@
```
% pandoc -f html -t native
<figure>
<img src="foo" alt="bar">
</figure>
^D
[Para [Image ("",[],[]) [] ("foo","fig:")]]
```
```
% pandoc -f html -t native
<figure>
<img src="foo" alt="bar">
<figcaption>
<div>
baz
</div>
</figcaption>
</figure>
^D
[Para [Image ("",[],[]) [Str "baz"] ("foo","fig:")]]
```
```
% pandoc -f html -t native
<figure>
<img src="foo">
<figcaption><p><em>baz</em></p></figcaption>
</figure>
^D
[Para [Image ("",[],[]) [Emph [Str "baz"]] ("foo","fig:")]]
```