LaTeX reader: tokenize before pulling tokens,

rather than after.  This has some performance penalty
but is more reliable.

Closes #4408.
This commit is contained in:
John MacFarlane 2018-10-15 14:52:34 -07:00
parent 7e9e24b8bc
commit 1db585689a
2 changed files with 16 additions and 15 deletions

View file

@ -1480,7 +1480,7 @@ authors = try $ do
macroDef :: (Monoid a, PandocMonad m) => LP m a
macroDef =
mempty <$ ((commandDef <|> environmentDef) <* doMacros)
mempty <$ (commandDef <|> environmentDef)
where commandDef = do
(name, macro') <- newcommand <|> letmacro <|> defmacro
guardDisabled Ext_latex_macros <|>
@ -1501,7 +1501,7 @@ letmacro :: PandocMonad m => LP m (Text, Macro)
letmacro = do
controlSeq "let"
(name, contents) <- withVerbatimMode $ do
Tok _ (CtrlSeq name) _ <- withVerbatimMode anyControlSeq
Tok _ (CtrlSeq name) _ <- anyControlSeq
optional $ symbol '='
spaces
-- we first parse in verbatim mode, and then expand macros,
@ -1521,7 +1521,6 @@ defmacro = try $
Tok _ (CtrlSeq name) _ <- anyControlSeq
argspecs <- many (argspecArg <|> argspecPattern)
contents <- bracedOrToken
doMacros -- after all this verbatim mode
return (name, Macro ExpandWhenUsed argspecs Nothing contents)
argspecArg :: PandocMonad m => LP m ArgSpec
@ -1559,7 +1558,6 @@ newcommand = do
case M.lookup name macros of
Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
Nothing -> return ()
doMacros -- after all this verbatim mode
return (name, Macro ExpandWhenUsed argspecs optarg contents)
newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)

View file

@ -143,6 +143,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sLabels :: M.Map String [Inline]
, sHasChapters :: Bool
, sToggles :: M.Map String Bool
, sExpanded :: Bool
}
deriving Show
@ -164,6 +165,7 @@ defaultLaTeXState = LaTeXState{ sOptions = def
, sLabels = M.empty
, sHasChapters = False
, sToggles = M.empty
, sExpanded = False
}
instance PandocMonad m => HasQuoteContext LaTeXState m where
@ -249,8 +251,7 @@ rawLaTeXParser retokenize parser valParser = do
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> String -> ParserT String s m String
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
do let retokenize = doMacros *>
(toksToString <$> many (satisfyTok (const True)))
do let retokenize = toksToString <$> many (satisfyTok (const True))
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate
, sMacros = extractMacros pstate }
@ -258,6 +259,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
case res of
Left e -> fail (show e)
Right s' -> return s'
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
@ -371,11 +373,10 @@ toksToString :: [Tok] -> String
toksToString = T.unpack . untokenize
satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
satisfyTok f =
try $ do
res <- tokenPrim (T.unpack . untoken) updatePos matcher
satisfyTok f = do
doMacros -- apply macros on remaining input stream
return res
updateState $ \st -> st{ sExpanded = False }
tokenPrim (T.unpack . untoken) updatePos matcher
where matcher t | f t = Just t
| otherwise = Nothing
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
@ -384,12 +385,14 @@ satisfyTok f =
doMacros :: PandocMonad m => LP m ()
doMacros = do
expanded <- sExpanded <$> getState
verbatimMode <- sVerbatimMode <$> getState
unless verbatimMode $ do
mbNewInp <- getInput >>= doMacros' 1
case mbNewInp of
Nothing -> return ()
Just inp -> setInput inp
unless (expanded || verbatimMode) $ do
mbNewInp <- getInput >>= doMacros' 1
case mbNewInp of
Nothing -> return ()
Just inp -> setInput inp
updateState $ \st -> st{ sExpanded = True }
doMacros' :: PandocMonad m => Int -> [Tok] -> LP m (Maybe [Tok])
doMacros' n inp = do