LaTeX reader efficiency improvements.

In conjunction with other changes this makes the reader
almost twice as fast on our benchmark as it was on Feb. 10.
This commit is contained in:
John MacFarlane 2021-02-28 12:52:41 -08:00
parent 564c39beef
commit f6cf03857b

View file

@ -191,12 +191,6 @@ inlineCommand = do
word :: PandocMonad m => LP m Inlines word :: PandocMonad m => LP m Inlines
word = str . untoken <$> satisfyTok isWordTok word = str . untoken <$> satisfyTok isWordTok
regularSymbol :: PandocMonad m => LP m Inlines
regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
isRegularSymbol _ = False
isSpecial c = c `Set.member` specialChars
inlineGroup :: PandocMonad m => LP m Inlines inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do inlineGroup = do
ils <- grouped inline ils <- grouped inline
@ -961,31 +955,48 @@ lookupListDefault d = (fromMaybe d .) . lookupList
where lookupList l m = msum $ map (`M.lookup` m) l where lookupList l m = msum $ map (`M.lookup` m) l
inline :: PandocMonad m => LP m Inlines inline :: PandocMonad m => LP m Inlines
inline = (mempty <$ comment) inline = do
<|> (space <$ whitespace) Tok pos toktype t <- lookAhead anyTok
<|> (softbreak <$ endline) let symbolAsString = str . untoken <$> anySymbol
<|> word let unescapedSymbolAsString =
<|> macroDef (rawInline "latex") do s <- untoken <$> anySymbol
<|> inlineCommand' report $ ParsingUnescaped s pos
<|> inlineEnvironment return $ str s
<|> inlineGroup case toktype of
<|> (symbol '-' *> Comment -> mempty <$ comment
option (str "-") (symbol '-' *> Spaces -> space <$ whitespace
option (str "") (str "" <$ symbol '-'))) Newline -> softbreak <$ endline
<|> doubleQuote Word -> word
<|> singleQuote Esc1 -> str . T.singleton <$> primEscape
<|> (str "" <$ try (symbol '\'' >> symbol '\'')) Esc2 -> str . T.singleton <$> primEscape
<|> (str "" <$ symbol '\'') Symbol ->
<|> (str "\160" <$ symbol '~') case t of
<|> dollarsMath "-" -> symbol '-' *>
<|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) option (str "-") (symbol '-' *>
<|> (str . T.singleton <$> primEscape) option (str "") (str "" <$ symbol '-'))
<|> regularSymbol "'" -> symbol '\'' *>
<|> (do res <- symbolIn "#^'`\"[]&" option (str "") (str "" <$ symbol '\'')
pos <- getPosition "~" -> str "\160" <$ symbol '~'
let s = untoken res "`" -> doubleQuote <|> singleQuote <|> symbolAsString
report $ ParsingUnescaped s pos "\"" -> doubleQuote <|> singleQuote <|> symbolAsString
return $ str s) "" -> doubleQuote <|> symbolAsString
"" -> singleQuote <|> symbolAsString
"$" -> dollarsMath <|> unescapedSymbolAsString
"|" -> (guardEnabled Ext_literate_haskell *>
symbol '|' *> doLHSverb) <|> symbolAsString
"{" -> inlineGroup
"#" -> unescapedSymbolAsString
"&" -> unescapedSymbolAsString
"_" -> unescapedSymbolAsString
"^" -> unescapedSymbolAsString
"\\" -> mzero
"}" -> mzero
_ -> symbolAsString
CtrlSeq _ -> macroDef (rawInline "latex")
<|> inlineCommand'
<|> inlineEnvironment
<|> inlineGroup
_ -> mzero
inlines :: PandocMonad m => LP m Inlines inlines :: PandocMonad m => LP m Inlines
inlines = mconcat <$> many inline inlines = mconcat <$> many inline