From f317ec41a1948e35330364c3120d937cc9934888 Mon Sep 17 00:00:00 2001
From: Elliot Bobrow <77182873+ebobrow@users.noreply.github.com>
Date: Mon, 20 Jun 2022 12:18:05 -0700
Subject: [PATCH] LaTeX: Handle formatted text inside code inline (#8129)

Add `formatCode` function to Text.Pandoc.Shared [API change].

Use this in the LaTeX reader so that e.g.
`\texttt{\textbf{bold code}}` is parsed as `Strong [Code ("",[],[]) "bold code"]`.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 11 ++++-------
 src/Text/Pandoc/Shared.hs        | 23 ++++++++++++++++++++++-
 test/command/7525.md             | 19 +++++++++++++++++++
 3 files changed, 45 insertions(+), 8 deletions(-)
 create mode 100644 test/command/7525.md

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f5d82b93c..988cc3055 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -347,7 +347,7 @@ inlineCommands = M.unions
     , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
     , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
     , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
-    , ("texttt", ttfamily)
+    , ("texttt", formatCode nullAttr <$> tok)
     , ("sout", extractSpaces strikeout <$> tok)
     , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
     , ("textsuperscript", extractSpaces superscript <$> tok)
@@ -368,7 +368,7 @@ inlineCommands = M.unions
     , ("it", extractSpaces emph <$> inlines)
     , ("sl", extractSpaces emph <$> inlines)
     , ("bf", extractSpaces strong <$> inlines)
-    , ("tt", code . stringify . toList <$> inlines)
+    , ("tt", formatCode nullAttr <$> inlines)
     , ("rm", inlines)
     , ("itshape", extractSpaces emph <$> inlines)
     , ("slshape", extractSpaces emph <$> inlines)
@@ -407,8 +407,8 @@ inlineCommands = M.unions
     , ("hypertarget", hypertargetInline)
     -- hyphenat
     , ("nohyphens", tok)
-    , ("textnhtt", ttfamily)
-    , ("nhttfamily", ttfamily)
+    , ("textnhtt", formatCode nullAttr <$> tok)
+    , ("nhttfamily", formatCode nullAttr <$> tok)
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
@@ -547,9 +547,6 @@ coloredInline stylename = do
   color <- braced
   spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok
 
-ttfamily :: PandocMonad m => LP m Inlines
-ttfamily = code . stringify . toList <$> tok
-
 processHBox :: Inlines -> Inlines
 processHBox = walk convert
   where
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 141a0e82a..cc7893511 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -70,6 +70,7 @@ module Text.Pandoc.Shared (
                      eastAsianLineBreakFilter,
                      htmlSpanLikeElements,
                      filterIpynbOutput,
+                     formatCode,
                      -- * TagSoup HTML handling
                      renderTags',
                      -- * File handling
@@ -106,7 +107,7 @@ import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
 import Data.Containers.ListUtils (nubOrd)
-import Data.List (find, intercalate, intersperse, sortOn, foldl')
+import Data.List (find, intercalate, intersperse, sortOn, foldl', groupBy)
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -779,6 +780,26 @@ filterIpynbOutput mode = walk go
                     | otherwise = ""
         go x = x
 
+-- | Reformat 'Inlines' as code, putting the stringlike parts in 'Code'
+-- elements while bringing other inline formatting outside.
+-- The idea is that e.g. `[Str "a",Space,Strong [Str "b"]]` should turn
+-- into `[Code ("",[],[]) "a ", Strong [Code ("",[],[]) "b"]]`.
+-- This helps work around the limitation that pandoc's Code element can
+-- only contain string content (see issue #7525).
+formatCode :: Attr -> Inlines -> Inlines
+formatCode attr = B.fromList . walk fmt . B.toList
+  where
+    isPlaintext (Str _) = True
+    isPlaintext Space = True
+    isPlaintext SoftBreak = True
+    isPlaintext (Quoted _ _) = True
+    isPlaintext _ = False
+    fmt = concatMap go . groupBy (\a b -> isPlaintext a && isPlaintext b)
+      where
+        go xs
+          | all isPlaintext xs = B.toList $ B.codeWith attr $ stringify xs
+          | otherwise = xs
+
 --
 -- TagSoup HTML handling
 --
diff --git a/test/command/7525.md b/test/command/7525.md
new file mode 100644
index 000000000..788c749a6
--- /dev/null
+++ b/test/command/7525.md
@@ -0,0 +1,19 @@
+```
+% pandoc -f latex -t native
+\texttt{Normal code. \emph{Emph and code.} \textsc{\textbf{Bold small caps.}} \sout{Strikeout. \underline{Strikeout and underline.}}}
+^D
+[ Para
+    [ Code ( "" , [] , [] ) "Normal code. "
+    , Emph [ Code ( "" , [] , [] ) "Emph and code." ]
+    , Code ( "" , [] , [] ) " "
+    , SmallCaps
+        [ Strong [ Code ( "" , [] , [] ) "Bold small caps." ] ]
+    , Code ( "" , [] , [] ) " "
+    , Strikeout
+        [ Code ( "" , [] , [] ) "Strikeout. "
+        , Underline
+            [ Code ( "" , [] , [] ) "Strikeout and underline." ]
+        ]
+    ]
+]
+```