From e3a263df4669450049c49043412f791261e049a4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 11 Aug 2021 16:14:34 -0700
Subject: [PATCH] Fix scope for LaTeX macros.

They should by default scope over the group in which they
are defined (except `\gdef` and `\xdef`, which are global).
In addition, environments must be treated as groups.

We handle this by making sMacros in the LaTeX parser state
a STACK of macro tables. Opening a group adds a table to
the stack, closing one removes one.  Only the top of the stack
is queried.

This commit adds a parameter for scope to the Macro constructor
(not exported).

Closes #7494.
---
 src/Text/Pandoc/Readers/LaTeX/Macro.hs   | 100 +++++++++++++----------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  49 ++++++++---
 src/Text/Pandoc/Readers/LaTeX/Types.hs   |   6 +-
 test/command/7494.md                     |  50 ++++++++++++
 4 files changed, 150 insertions(+), 55 deletions(-)
 create mode 100644 test/command/7494.md

diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 6a7b0eed3..5709cbb8c 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -15,6 +15,8 @@ import Control.Applicative ((<|>), optional)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.List.NonEmpty as NonEmpty
+import Data.List.NonEmpty (NonEmpty(..))
 
 macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
 macroDef constructor = do
@@ -26,24 +28,36 @@ macroDef constructor = do
           nameMacroPairs <- newcommand <|> letmacro <|>
             edefmacro <|> defmacro <|> newif
           guardDisabled Ext_latex_macros <|>
-           mapM_ (\(name, macro') ->
-                   updateState (\s -> s{ sMacros = M.insert name macro'
-                                          (sMacros s) })) nameMacroPairs
+            mapM_ insertMacro nameMacroPairs
         environmentDef = do
           mbenv <- newenvironment
           case mbenv of
             Nothing -> return ()
             Just (name, macro1, macro2) ->
               guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
+                do insertMacro (name, macro1)
+                   insertMacro ("end" <> name, macro2)
         -- @\newenvironment{envname}[n-args][default]{begin}{end}@
         -- is equivalent to
         -- @\newcommand{\envname}[n-args][default]{begin}@
         -- @\newcommand{\endenvname}@
 
+insertMacro :: PandocMonad m => (Text, Macro) -> LP m ()
+insertMacro (name, macro'@(Macro GlobalScope _ _ _ _)) =
+  updateState $ \s ->
+     s{ sMacros = NonEmpty.map (M.insert name macro') (sMacros s) }
+insertMacro (name, macro'@(Macro GroupScope _ _ _ _)) =
+  updateState $ \s ->
+     s{ sMacros = M.insert name macro' (NonEmpty.head (sMacros s)) :|
+                      NonEmpty.tail (sMacros s) }
+
+lookupMacro :: PandocMonad m => Text -> LP m Macro
+lookupMacro name = do
+   macros :| _ <- sMacros <$> getState
+   case M.lookup name macros of
+     Just m -> return m
+     Nothing -> fail "Macro not found"
+
 letmacro :: PandocMonad m => LP m [(Text, Macro)]
 letmacro = do
   controlSeq "let"
@@ -54,20 +68,19 @@ letmacro = do
     -- we first parse in verbatim mode, and then expand macros,
     -- because we don't want \let\foo\bar to turn into
     -- \let\foo hello if we have previously \def\bar{hello}
-    macros <- sMacros <$> getState
     target <- anyControlSeq <|> singleChar
     case target of
       (Tok _ (CtrlSeq name') _) ->
-        case M.lookup name' macros of
-          Just m -> return [(name, m)]
-          Nothing -> return [(name, Macro ExpandWhenDefined [] Nothing [target])]
-      _ -> return [(name, Macro ExpandWhenDefined [] Nothing [target])]
+         (do m <- lookupMacro name'
+             pure [(name, m)])
+         <|> pure [(name,
+                    Macro GroupScope ExpandWhenDefined [] Nothing [target])]
+      _ -> pure [(name, Macro GroupScope ExpandWhenDefined [] Nothing [target])]
 
 edefmacro :: PandocMonad m => LP m [(Text, Macro)]
 edefmacro = do
-  controlSeq "edef" <|> controlSeq "xdef"
-  -- TODO Currently we don't distinguish these.  \edef should only
-  -- affect its own group, while \xdef sets a global macro.
+  scope <- (GroupScope <$ controlSeq "edef")
+       <|> (GlobalScope <$ controlSeq "xdef")
   (name, contents) <- withVerbatimMode $ do
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     -- we first parse in verbatim mode, and then expand macros,
@@ -77,20 +90,19 @@ edefmacro = do
     return (name, contents)
   -- expand macros
   contents' <- parseFromToks (many anyTok) contents
-  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+  return [(name, Macro scope ExpandWhenDefined [] Nothing contents')]
 
 defmacro :: PandocMonad m => LP m [(Text, Macro)]
 defmacro = do
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
-  controlSeq "def" <|> controlSeq "gdef"
-  -- TODO Currently we don't distinguish these.  \def should only
-  -- affect its own group, while \gdef sets a global macro.
+  scope <- (GroupScope <$ controlSeq "def")
+       <|> (GlobalScope <$ controlSeq "gdef")
   withVerbatimMode $ do
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     argspecs <- many (argspecArg <|> argspecPattern)
     contents <- bracedOrToken
-    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+    return [(name, Macro scope ExpandWhenUsed argspecs Nothing contents)]
 
 -- \newif\iffoo' defines:
 -- \iffoo to be \iffalse
@@ -105,16 +117,16 @@ newif = do
     -- \def\footrue{\def\iffoo\iftrue}
     -- \def\foofalse{\def\iffoo\iffalse}
     let base = T.drop 2 name
-    return [ (name, Macro ExpandWhenUsed [] Nothing
+    return [ (name, Macro GroupScope ExpandWhenUsed [] Nothing
                     [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
            , (base <> "true",
-                   Macro ExpandWhenUsed [] Nothing
+                   Macro GroupScope ExpandWhenUsed [] Nothing
                    [ Tok pos (CtrlSeq "def") "\\def"
                    , Tok pos (CtrlSeq name) ("\\" <> name)
                    , Tok pos (CtrlSeq "iftrue") "\\iftrue"
                    ])
            , (base <> "false",
-                   Macro ExpandWhenUsed [] Nothing
+                   Macro GroupScope ExpandWhenUsed [] Nothing
                    [ Tok pos (CtrlSeq "def") "\\def"
                    , Tok pos (CtrlSeq name) ("\\" <> name)
                    , Tok pos (CtrlSeq "iffalse") "\\iffalse"
@@ -161,14 +173,13 @@ newcommand = do
                  : (contents' ++
                    [ Tok pos Symbol "}", Tok pos Symbol "}" ])
               _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return [(name, macro)]
-          | mtype == "providecommand" -> return [(name, macro)]
-        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+    let macro = Macro GroupScope ExpandWhenUsed argspecs optarg contents
+    (do lookupMacro name
+        case mtype of
+          "providecommand" -> return []
+          "renewcommand" -> return [(name, macro)]
+          _ -> [] <$ report (MacroAlreadyDefined txt pos))
+      <|> pure [(name, macro)]
 
 newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
 newenvironment = do
@@ -187,17 +198,22 @@ newenvironment = do
     let argspecs = map (\i -> ArgNum i) [1..numargs]
     startcontents <- spaces >> bracedOrToken
     endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
+    -- we need the environment to be in a group so macros defined
+    -- inside behave correctly:
+    let bg = Tok pos (CtrlSeq "bgroup") "\\bgroup "
+    let eg = Tok pos (CtrlSeq "egroup") "\\egroup "
+    let result = (name,
+                    Macro GroupScope ExpandWhenUsed argspecs optarg
+                      (bg:startcontents),
+                    Macro GroupScope ExpandWhenUsed [] Nothing (eg:endcontents))
+    (do lookupMacro name
+        case mtype of
+          "provideenvironment" -> return Nothing
+          "renewenvironment" -> return (Just result)
+          _ -> do
+             report $ MacroAlreadyDefined name pos
+             return Nothing)
+      <|> return (Just result)
 
 bracketedNum :: PandocMonad m => LP m Int
 bracketedNum = do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 8d6791abb..e1b72926f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -102,6 +102,9 @@ import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Data.List.NonEmpty (NonEmpty(..))
+import qualified Data.List.NonEmpty as NonEmpty
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -146,7 +149,7 @@ data TheoremSpec =
 data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sMeta          :: Meta
                             , sQuoteContext  :: QuoteContext
-                            , sMacros        :: M.Map Text Macro
+                            , sMacros        :: NonEmpty (M.Map Text Macro)
                             , sContainers    :: [Text]
                             , sLogMessages   :: [LogMessage]
                             , sIdentifiers   :: Set.Set Text
@@ -173,7 +176,7 @@ defaultLaTeXState :: LaTeXState
 defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sMeta          = nullMeta
                               , sQuoteContext  = NoQuote
-                              , sMacros        = M.empty
+                              , sMacros        = M.empty :| []
                               , sContainers    = []
                               , sLogMessages   = []
                               , sIdentifiers   = Set.empty
@@ -220,8 +223,9 @@ instance HasIncludeFiles LaTeXState where
   dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
 
 instance HasMacros LaTeXState where
-  extractMacros  st  = sMacros st
-  updateMacros f st  = st{ sMacros = f (sMacros st) }
+  extractMacros  st  = NonEmpty.head $ sMacros st
+  updateMacros f st  = st{ sMacros = f (NonEmpty.head $ sMacros st)
+                                     :| (NonEmpty.tail $ sMacros st) }
 
 instance HasReaderOptions LaTeXState where
   extractReaderOptions = sOptions
@@ -254,7 +258,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
-  let lstate' = lstate { sMacros = extractMacros pstate }
+  let lstate' = lstate { sMacros = extractMacros pstate :| [] }
   let setStartPos = case toks of
                       Tok pos _ _ : _ -> setPosition pos
                       _ -> return ()
@@ -274,7 +278,7 @@ rawLaTeXParser toks retokenize parser valParser = do
          case res of
               Left _    -> mzero
               Right ((val, raw), st) -> do
-                updateState (updateMacros (sMacros st <>))
+                updateState (updateMacros ((NonEmpty.head (sMacros st)) <>))
                 let skipTilPos stopPos = do
                       anyChar
                       pos <- getPosition
@@ -299,7 +303,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many anyTok
       pstate <- getState
       let lstate = def{ sOptions = extractReaderOptions pstate
-                      , sMacros  = extractMacros pstate }
+                      , sMacros  = extractMacros pstate :| [] }
       res <- runParserT retokenize lstate "math" (tokenize "math" s)
       case res of
            Left e   -> Prelude.fail (show e)
@@ -552,10 +556,10 @@ doMacros' n inp =
     handleMacros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
-      macros <- sMacros <$> getState
+      (macros :| _ ) <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> trySpecialMacro name ts
-           Just (Macro expansionPoint argspecs optarg newtoks) -> do
+           Just (Macro _scope expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
                      (case expansionPoint of
@@ -745,10 +749,22 @@ primEscape = do
 bgroup :: PandocMonad m => LP m Tok
 bgroup = try $ do
   optional sp
-  symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+  t <- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+  -- Add a copy of the macro table to the top of the macro stack,
+  -- private for this group. We inherit all the macros defined in
+  -- the parent group.
+  updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s))
+                                                 (sMacros s) }
+  return t
+
 
 egroup :: PandocMonad m => LP m Tok
-egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+egroup = do
+  t <- symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+  -- remove the group's macro table from the stack
+  updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $
+      NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) }
+  return t
 
 grouped :: (PandocMonad m,  Monoid a) => LP m a -> LP m a
 grouped parser = try $ do
@@ -1017,7 +1033,16 @@ resetCaption = updateState $ \st -> st{ sCaption   = Nothing
                                       , sLastLabel = Nothing }
 
 env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
+env name p = do
+  -- environments are groups as far as macros are concerned,
+  -- so we need a local copy of the macro table (see above, bgroup, egroup):
+  updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s))
+                                                 (sMacros s) }
+  result <- p
+  updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $
+      NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) }
+  end_ name
+  return result
 
 tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
 tokWith inlineParser = try $ spaces >>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index c20b72bc5..a4eae56db 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        , Macro(..)
                                        , ArgSpec(..)
                                        , ExpansionPoint(..)
+                                       , MacroScope(..)
                                        , SourcePos
                                        )
 where
@@ -43,7 +44,10 @@ tokToText (Tok _ _ t) = t
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
-data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
+data MacroScope = GlobalScope | GroupScope
+  deriving (Eq, Ord, Show)
+
+data Macro = Macro MacroScope ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
      deriving Show
 
 data ArgSpec = ArgNum Int | Pattern [Tok]
diff --git a/test/command/7494.md b/test/command/7494.md
new file mode 100644
index 000000000..d8a2ff7a5
--- /dev/null
+++ b/test/command/7494.md
@@ -0,0 +1,50 @@
+```
+% pandoc -f latex -t plain
+\def\foo{BAR}
+{\foo
+\def\foo{BAZ}
+\foo
+}
+\foo
+^D
+BAR BAZ BAR
+```
+
+```
+% pandoc -f latex -t plain
+\def\foo{BAR}
+{\foo
+\gdef\foo{BAZ}
+\foo
+}
+\foo
+^D
+BAR BAZ BAZ
+```
+
+```
+% pandoc -f latex -t plain
+\newcommand{\aaa}{BBB}
+{
+\renewcommand{\aaa}{AAA}
+\aaa
+}
+\aaa
+^D
+AAA BBB
+```
+
+```
+% pandoc -f latex -t markdown
+\newcommand{\aaa}{BBB}
+\begin{quote}
+\renewcommand{\aaa}{AAA}
+\aaa
+\end{quote}
+\aaa
+^D
+> AAA
+
+BBB
+```
+