From a0e44b1ff651ba59fe23b9e1b5d534d324dc0808 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 11 Aug 2021 10:32:52 -0700
Subject: [PATCH] LaTeX reader: improve handling of plain TeX macro primitives.

- Fixed semantics for `\let`.
- Implement `\edef`, `\gdef`, and `\xdef`.
- Add comment noting that currently `\def` and `\edef` set global
  macros (so are equivalent to `\gdef` and `\xdef`).  This should be
  fixed by scoping macro definitions to groups, in a future commit.

Closes #7474.
---
 src/Text/Pandoc/Readers/LaTeX/Macro.hs   | 31 ++++++++++++++++---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  4 +--
 test/command/macros.md                   | 38 +++++++++++++++++++++++-
 3 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 5495a8e74..6a7b0eed3 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -23,7 +23,8 @@ macroDef constructor = do
       guardDisabled Ext_latex_macros)
      <|> return mempty
   where commandDef = do
-          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+          nameMacroPairs <- newcommand <|> letmacro <|>
+            edefmacro <|> defmacro <|> newif
           guardDisabled Ext_latex_macros <|>
            mapM_ (\(name, macro') ->
                    updateState (\s -> s{ sMacros = M.insert name macro'
@@ -46,23 +47,45 @@ macroDef constructor = do
 letmacro :: PandocMonad m => LP m [(Text, Macro)]
 letmacro = do
   controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
+  withVerbatimMode $ do
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     optional $ symbol '='
     spaces
     -- we first parse in verbatim mode, and then expand macros,
     -- because we don't want \let\foo\bar to turn into
     -- \let\foo hello if we have previously \def\bar{hello}
+    macros <- sMacros <$> getState
+    target <- anyControlSeq <|> singleChar
+    case target of
+      (Tok _ (CtrlSeq name') _) ->
+        case M.lookup name' macros of
+          Just m -> return [(name, m)]
+          Nothing -> return [(name, Macro ExpandWhenDefined [] Nothing [target])]
+      _ -> return [(name, Macro ExpandWhenDefined [] Nothing [target])]
+
+edefmacro :: PandocMonad m => LP m [(Text, Macro)]
+edefmacro = do
+  controlSeq "edef" <|> controlSeq "xdef"
+  -- TODO Currently we don't distinguish these.  \edef should only
+  -- affect its own group, while \xdef sets a global macro.
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
     contents <- bracedOrToken
     return (name, contents)
-  contents' <- doMacros' 0 contents
+  -- expand macros
+  contents' <- parseFromToks (many anyTok) contents
   return [(name, Macro ExpandWhenDefined [] Nothing contents')]
 
 defmacro :: PandocMonad m => LP m [(Text, Macro)]
 defmacro = do
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
-  controlSeq "def"
+  controlSeq "def" <|> controlSeq "gdef"
+  -- TODO Currently we don't distinguish these.  \def should only
+  -- affect its own group, while \gdef sets a global macro.
   withVerbatimMode $ do
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     argspecs <- many (argspecArg <|> argspecPattern)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 9dac4d6ef..8d6791abb 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -267,7 +267,7 @@ rawLaTeXParser toks retokenize parser valParser = do
        Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        ts <- many (satisfyTok (const True))
+                                        ts <- many anyTok
                                         setInput ts
                                       rawparser)
                         lstate' "chunk" toks'
@@ -296,7 +296,7 @@ rawLaTeXParser toks retokenize parser valParser = do
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
             => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
-   do let retokenize = untokenize <$> many (satisfyTok (const True))
+   do let retokenize = untokenize <$> many anyTok
       pstate <- getState
       let lstate = def{ sOptions = extractReaderOptions pstate
                       , sMacros  = extractMacros pstate }
diff --git a/test/command/macros.md b/test/command/macros.md
index e3c07e661..9f7a0f9d3 100644
--- a/test/command/macros.md
+++ b/test/command/macros.md
@@ -25,7 +25,7 @@ expanded at point of use:
 ```
 % pandoc -f latex -t latex
 \let\a\b
-\newcommand{\b}{\emph{ouk}}
+\def\b{\emph{ouk}}
 \a a
 ^D
 a̱
@@ -123,3 +123,39 @@ hello+hello
 hello+goodbye
 ```
 
+```
+% pandoc -f latex -t plain
+\def\txt{a}
+\def\foo{\txt}
+\let\bar\foo
+\bar % -> a
+\def\txt{b}
+\bar % -> b
+\def\foo{OH}
+\bar % -> b
+^D
+a b b
+```
+
+```
+% pandoc -f latex -t plain
+\def\aaa{aaa}
+\def\bbb{x\aaa}
+\edef\ccc{y\aaa}
+\def\aaa{AAA}
+\bbb \ccc
+^D
+xAAAyaaa
+```
+
+```
+% pandoc -f latex -t plain
+\gdef\aaa{aaa}
+\gdef\bbb{x\aaa}
+\xdef\ccc{y\aaa}
+\gdef\aaa{AAA}
+\bbb \ccc
+^D
+xAAAyaaa
+```
+