From 7d9b782f73edfc49fbe6f0c3d6ce61328811cbc7 Mon Sep 17 00:00:00 2001 From: Mauro Bieg <mb21@users.noreply.github.com> Date: Sat, 22 Jul 2017 19:22:56 +0200 Subject: [PATCH] HTML Reader: parse figure and figcaption (#3813) --- src/Text/Pandoc/Readers/HTML.hs | 20 ++++++++++++++ test/command/html-read-figure.md | 45 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 test/command/html-read-figure.md diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 734973e33..3a0d6eb14 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -188,6 +188,7 @@ block = do , pBody , pDiv , pPlain + , pFigure , pRawHtmlBlock ] trace (take 60 $ show $ B.toList res) @@ -553,6 +554,25 @@ pPara = do contents <- trimInlines <$> pInTags "p" inline return $ B.para contents +pFigure :: PandocMonad m => TagParser m Blocks +pFigure = do + TagOpen _ _ <- pSatisfy (matchTagOpen "figure" []) + skipMany pBlank + let pImg = pOptInTag "p" pImage <* skipMany pBlank + pCapt = option mempty $ pInTags "figcaption" inline <* skipMany pBlank + pImgCapt = do + img <- pImg + cap <- pCapt + return (img, cap) + pCaptImg = do + cap <- pCapt + img <- pImg + return (img, cap) + (imgMany, caption) <- pImgCapt <|> pCaptImg + TagClose _ <- pSatisfy (matchTagClose "figure") + let (Image attr _ (url, tit)):_ = B.toList imgMany + return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) diff --git a/test/command/html-read-figure.md b/test/command/html-read-figure.md new file mode 100644 index 000000000..9c604c706 --- /dev/null +++ b/test/command/html-read-figure.md @@ -0,0 +1,45 @@ +``` +% pandoc -f html -t native +<figure> + <img src="foo.png" title="voyage"> + <figcaption>bar</figcaption> +</figure> +^D +[Para [Image ("",[],[]) [Str "bar"] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +<figure> + <figcaption>bar</figcaption> + <img src="foo.png" title="voyage"> +</figure> +^D +[Para [Image ("",[],[]) [Str "bar"] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +<figure> + <img src="foo.png" title="voyage"> +</figure> +^D +[Para [Image ("",[],[]) [] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +<figure> + <p><img src="foo.png" title="voyage"></p> + <figcaption>bar</figcaption> +</figure> +^D +[Para [Image ("",[],[]) [Str "bar"] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +<figure><img src="foo.png" title="voyage" alt="this is ignored"><figcaption>bar <strong>baz</strong></figcaption></figure> +^D +[Para [Image ("",[],[]) [Str "bar",Space,Strong [Str "baz"]] ("foo.png","fig:voyage")]] +```