From bf2eb4f288df5d5b5e054cdc4ffa3b3c4dd187fa Mon Sep 17 00:00:00 2001
From: Florian Beeres <yuuki@protonmail.com>
Date: Mon, 11 Nov 2019 17:55:58 +0100
Subject: [PATCH] Change the implementation of `htmlSpanLikeElements` and
 implement `<dfn>` (#5882)

* Add HTML Reader support for `<dfn>`, parsing this as a Span with class `dfn`.
* Change `htmlSpanLikeElements` implementation to retain classes,
  attributes and inline content.
---
 src/Text/Pandoc/Readers/HTML.hs | 15 +++++++++++----
 src/Text/Pandoc/Shared.hs       |  2 +-
 src/Text/Pandoc/Writers/HTML.hs |  6 ++++--
 test/command/5795.md            | 20 ++++++++++++++++++++
 4 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 test/command/5795.md

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 697a3aa9d..e03ac6a97 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -719,10 +719,17 @@ pSubscript :: PandocMonad m => TagParser m Inlines
 pSubscript = pInlinesInTags "sub" B.subscript
 
 pSpanLike :: PandocMonad m => TagParser m Inlines
-pSpanLike = Set.foldr
-  (\tag acc -> acc <|> pInlinesInTags tag (B.spanWith ("",[T.unpack tag],[])))
-  mzero
-  htmlSpanLikeElements
+pSpanLike =
+  Set.foldr
+    (\tagName acc -> acc <|> parseTag tagName)
+    mzero
+    htmlSpanLikeElements
+  where
+    parseTag tagName = do
+      TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True)
+      let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs
+      content <- mconcat <$> manyTill inline (pCloses tagName <|> eof)
+      return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content
 
 pSmall :: PandocMonad m => TagParser m Inlines
 pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[]))
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index ff97d16fb..797a0a0b0 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -702,7 +702,7 @@ underlineSpan = B.spanWith ("", ["underline"], [])
 -- | Set of HTML elements that are represented as Span with a class equal as
 -- the element tag itself.
 htmlSpanLikeElements :: Set.Set T.Text
-htmlSpanLikeElements = Set.fromList [T.pack "kbd", T.pack "mark"]
+htmlSpanLikeElements = Set.fromList [T.pack "kbd", T.pack "mark", T.pack "dfn"]
 
 -- | Returns the first sentence in a list of inlines, and the rest.
 breakSentence :: [Inline] -> ([Inline], [Inline])
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 3971b7740..f042bda21 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -998,13 +998,15 @@ inlineToHtml opts inline = do
 
     (Span (id',classes,kvs) ils) ->
                         let spanLikeTag = case classes of
-                                [c] -> do
+                                (c:_) -> do
                                   let c' = T.pack c
                                   guard (c' `Set.member` htmlSpanLikeElements)
                                   pure $ customParent (textTag c')
                                 _   -> Nothing
                         in case spanLikeTag of
-                            Just tag -> tag <$> inlineListToHtml opts ils
+                            Just tag -> do
+                              h <- inlineListToHtml opts ils
+                              addAttrs opts (id',tail classes',kvs') $ tag h
                             Nothing -> do
                               h <- inlineListToHtml opts ils
                               addAttrs opts (id',classes',kvs') (H.span h)
diff --git a/test/command/5795.md b/test/command/5795.md
new file mode 100644
index 000000000..0d5154b4d
--- /dev/null
+++ b/test/command/5795.md
@@ -0,0 +1,20 @@
+```
+% pandoc -f html -t html
+<dfn class="dfn" id="foo" title="bax"><span>foo</span></dfn>
+^D
+<dfn id="foo" class="dfn" title="bax"><span>foo</span></dfn>
+```
+
+```
+% pandoc -f html -t native
+<dfn class="dfn" id="foo" title="bax"><span>foo</span></dfn>
+^D
+[Plain [Span ("foo",["dfn","dfn"],[("title","bax")]) [Span ("",[],[]) [Str "foo"]]]]
+```
+
+```
+% pandoc -f native -t html
+[Plain [Span ("foo",["dfn","dfn"],[("title","bax")]) [Span ("",[],[]) [Str "foo"]]]]
+^D
+<dfn id="foo" class="dfn" title="bax"><span>foo</span></dfn>
+```