From f317ec41a1948e35330364c3120d937cc9934888 Mon Sep 17 00:00:00 2001 From: Elliot Bobrow <77182873+ebobrow@users.noreply.github.com> Date: Mon, 20 Jun 2022 12:18:05 -0700 Subject: [PATCH] LaTeX: Handle formatted text inside code inline (#8129) Add `formatCode` function to Text.Pandoc.Shared [API change]. Use this in the LaTeX reader so that e.g. `\texttt{\textbf{bold code}}` is parsed as `Strong [Code ("",[],[]) "bold code"]`. --- src/Text/Pandoc/Readers/LaTeX.hs | 11 ++++------- src/Text/Pandoc/Shared.hs | 23 ++++++++++++++++++++++- test/command/7525.md | 19 +++++++++++++++++++ 3 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 test/command/7525.md diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f5d82b93c..988cc3055 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -347,7 +347,7 @@ inlineCommands = M.unions , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) - , ("texttt", ttfamily) + , ("texttt", formatCode nullAttr <$> tok) , ("sout", extractSpaces strikeout <$> tok) , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer , ("textsuperscript", extractSpaces superscript <$> tok) @@ -368,7 +368,7 @@ inlineCommands = M.unions , ("it", extractSpaces emph <$> inlines) , ("sl", extractSpaces emph <$> inlines) , ("bf", extractSpaces strong <$> inlines) - , ("tt", code . stringify . toList <$> inlines) + , ("tt", formatCode nullAttr <$> inlines) , ("rm", inlines) , ("itshape", extractSpaces emph <$> inlines) , ("slshape", extractSpaces emph <$> inlines) @@ -407,8 +407,8 @@ inlineCommands = M.unions , ("hypertarget", hypertargetInline) -- hyphenat , ("nohyphens", tok) - , ("textnhtt", ttfamily) - , ("nhttfamily", ttfamily) + , ("textnhtt", formatCode nullAttr <$> tok) + , ("nhttfamily", formatCode nullAttr <$> tok) -- LaTeX colors , ("textcolor", coloredInline "color") , ("colorbox", coloredInline "background-color") @@ -547,9 +547,6 @@ coloredInline stylename = do color <- braced spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok -ttfamily :: PandocMonad m => LP m Inlines -ttfamily = code . stringify . toList <$> tok - processHBox :: Inlines -> Inlines processHBox = walk convert where diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 141a0e82a..cc7893511 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -70,6 +70,7 @@ module Text.Pandoc.Shared ( eastAsianLineBreakFilter, htmlSpanLikeElements, filterIpynbOutput, + formatCode, -- * TagSoup HTML handling renderTags', -- * File handling @@ -106,7 +107,7 @@ import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum, generalCategory, GeneralCategory(NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation)) import Data.Containers.ListUtils (nubOrd) -import Data.List (find, intercalate, intersperse, sortOn, foldl') +import Data.List (find, intercalate, intersperse, sortOn, foldl', groupBy) import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) import Data.Monoid (Any (..)) @@ -779,6 +780,26 @@ filterIpynbOutput mode = walk go | otherwise = "" go x = x +-- | Reformat 'Inlines' as code, putting the stringlike parts in 'Code' +-- elements while bringing other inline formatting outside. +-- The idea is that e.g. `[Str "a",Space,Strong [Str "b"]]` should turn +-- into `[Code ("",[],[]) "a ", Strong [Code ("",[],[]) "b"]]`. +-- This helps work around the limitation that pandoc's Code element can +-- only contain string content (see issue #7525). +formatCode :: Attr -> Inlines -> Inlines +formatCode attr = B.fromList . walk fmt . B.toList + where + isPlaintext (Str _) = True + isPlaintext Space = True + isPlaintext SoftBreak = True + isPlaintext (Quoted _ _) = True + isPlaintext _ = False + fmt = concatMap go . groupBy (\a b -> isPlaintext a && isPlaintext b) + where + go xs + | all isPlaintext xs = B.toList $ B.codeWith attr $ stringify xs + | otherwise = xs + -- -- TagSoup HTML handling -- diff --git a/test/command/7525.md b/test/command/7525.md new file mode 100644 index 000000000..788c749a6 --- /dev/null +++ b/test/command/7525.md @@ -0,0 +1,19 @@ +``` +% pandoc -f latex -t native +\texttt{Normal code. \emph{Emph and code.} \textsc{\textbf{Bold small caps.}} \sout{Strikeout. \underline{Strikeout and underline.}}} +^D +[ Para + [ Code ( "" , [] , [] ) "Normal code. " + , Emph [ Code ( "" , [] , [] ) "Emph and code." ] + , Code ( "" , [] , [] ) " " + , SmallCaps + [ Strong [ Code ( "" , [] , [] ) "Bold small caps." ] ] + , Code ( "" , [] , [] ) " " + , Strikeout + [ Code ( "" , [] , [] ) "Strikeout. " + , Underline + [ Code ( "" , [] , [] ) "Strikeout and underline." ] + ] + ] +] +```