LaTeX reader: use custom TokStream...
that keeps track of whether macros are expanded. This allows us to improve performance a bit by avoiding unnecessary runs of the macro expansion code (e.g. from 24 ms to 20 ms on our standard benchmark).
This commit is contained in:
parent
d523eca3d3
commit
b423c17100
5 changed files with 67 additions and 36 deletions
|
@ -84,7 +84,7 @@ readLaTeX :: (PandocMonad m, ToSources a)
|
|||
readLaTeX opts ltx = do
|
||||
let sources = toSources ltx
|
||||
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
|
||||
(tokenizeSources sources)
|
||||
(TokStream False (tokenizeSources sources))
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError sources e
|
||||
|
@ -516,11 +516,11 @@ ifToggle = do
|
|||
spaces
|
||||
no <- braced
|
||||
toggles <- sToggles <$> getState
|
||||
inp <- getInput
|
||||
TokStream _ inp <- getInput
|
||||
let name' = untokenize name
|
||||
case M.lookup name' toggles of
|
||||
Just True -> setInput (yes ++ inp)
|
||||
Just False -> setInput (no ++ inp)
|
||||
Just True -> setInput $ TokStream False (yes ++ inp)
|
||||
Just False -> setInput $ TokStream False (no ++ inp)
|
||||
Nothing -> do
|
||||
pos <- getPosition
|
||||
report $ UndefinedToggle name' pos
|
||||
|
@ -532,9 +532,10 @@ ifstrequal = do
|
|||
str2 <- tok
|
||||
ifequal <- braced
|
||||
ifnotequal <- braced
|
||||
TokStream _ ts <- getInput
|
||||
if str1 == str2
|
||||
then getInput >>= setInput . (ifequal ++)
|
||||
else getInput >>= setInput . (ifnotequal ++)
|
||||
then setInput $ TokStream False (ifequal ++ ts)
|
||||
else setInput $ TokStream False (ifnotequal ++ ts)
|
||||
return mempty
|
||||
|
||||
coloredInline :: PandocMonad m => Text -> LP m Inlines
|
||||
|
@ -602,7 +603,7 @@ lookupListDefault d = (fromMaybe d .) . lookupList
|
|||
|
||||
inline :: PandocMonad m => LP m Inlines
|
||||
inline = do
|
||||
Tok pos toktype t <- lookAhead anyTok
|
||||
Tok pos toktype t <- peekTok
|
||||
let symbolAsString = str . untoken <$> anySymbol
|
||||
let unescapedSymbolAsString =
|
||||
do s <- untoken <$> anySymbol
|
||||
|
@ -652,7 +653,8 @@ opt = do
|
|||
toks <- try (sp *> bracketedToks <* sp)
|
||||
-- now parse the toks as inlines
|
||||
st <- getState
|
||||
parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
|
||||
parsed <- runParserT (mconcat <$> many inline) st "bracketed option"
|
||||
(TokStream False toks)
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError (toSources toks) e
|
||||
|
@ -700,7 +702,7 @@ doSubfile = do
|
|||
skipMany opt
|
||||
f <- T.unpack . removeDoubleQuotes . T.strip . untokenize <$> braced
|
||||
oldToks <- getInput
|
||||
setInput []
|
||||
setInput $ TokStream False []
|
||||
insertIncluded ".tex" f
|
||||
bs <- blocks
|
||||
eof
|
||||
|
@ -747,7 +749,8 @@ insertIncluded defaultExtension f' = do
|
|||
Nothing -> do
|
||||
report $ CouldNotLoadIncludeFile (T.pack f) pos
|
||||
return ""
|
||||
getInput >>= setInput . (tokenize (initialPos f) contents ++)
|
||||
TokStream _ ts <- getInput
|
||||
setInput $ TokStream False (tokenize (initialPos f) contents ++ ts)
|
||||
updateState dropLatestIncludeFile
|
||||
|
||||
authors :: PandocMonad m => LP m ()
|
||||
|
@ -1265,7 +1268,7 @@ orderedList' = try $ do
|
|||
|
||||
block :: PandocMonad m => LP m Blocks
|
||||
block = do
|
||||
Tok _ toktype _ <- lookAhead anyTok
|
||||
Tok _ toktype _ <- peekTok
|
||||
res <- (case toktype of
|
||||
Newline -> mempty <$ spaces1
|
||||
Spaces -> mempty <$ spaces1
|
||||
|
|
|
@ -117,7 +117,8 @@ simpleCiteArgs inline = try $ do
|
|||
-- now parse the toks as inlines
|
||||
st <- getState
|
||||
parsed <- lift $
|
||||
runParserT (mconcat <$> many inline) st "bracketed option" toks
|
||||
runParserT (mconcat <$> many inline) st "bracketed option"
|
||||
(TokStream False toks)
|
||||
case parsed of
|
||||
Right result -> return result
|
||||
Left e -> throwError $ PandocParsecError (toSources toks) e
|
||||
|
|
|
@ -94,8 +94,9 @@ verbTok stopchar = do
|
|||
Nothing -> return t
|
||||
Just i -> do
|
||||
let (t1, t2) = T.splitAt i txt
|
||||
inp <- getInput
|
||||
setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
|
||||
TokStream macrosExpanded inp <- getInput
|
||||
setInput $ TokStream macrosExpanded
|
||||
$ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
|
||||
: tokenize (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
|
||||
return $ Tok pos toktype t1
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
|
|||
, LaTeXState(..)
|
||||
, defaultLaTeXState
|
||||
, LP
|
||||
, TokStream(..)
|
||||
, withVerbatimMode
|
||||
, rawLaTeXParser
|
||||
, applyMacros
|
||||
|
@ -34,6 +35,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
|
|||
, untoken
|
||||
, toksToString
|
||||
, satisfyTok
|
||||
, peekTok
|
||||
, parseFromToks
|
||||
, disablingWithRaw
|
||||
, doMacros
|
||||
|
@ -119,6 +121,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
|
|||
ArgSpec (..), Tok (..), TokType (..))
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Parsec.Pos
|
||||
import Text.Parsec (Stream(uncons))
|
||||
import Text.Pandoc.Walk
|
||||
|
||||
newtype DottedNum = DottedNum [Int]
|
||||
|
@ -243,7 +246,16 @@ instance HasMeta LaTeXState where
|
|||
instance Default LaTeXState where
|
||||
def = defaultLaTeXState
|
||||
|
||||
type LP m = ParserT [Tok] LaTeXState m
|
||||
-- The Boolean is True if macros have already been expanded,
|
||||
-- False if they need expanding.
|
||||
data TokStream = TokStream !Bool [Tok]
|
||||
deriving (Show)
|
||||
|
||||
instance Monad m => Stream TokStream m Tok where
|
||||
uncons (TokStream _ []) = return Nothing
|
||||
uncons (TokStream _ (t:ts)) = return $ Just (t, TokStream False ts)
|
||||
|
||||
type LP m = ParserT TokStream LaTeXState m
|
||||
|
||||
withVerbatimMode :: PandocMonad m => LP m a -> LP m a
|
||||
withVerbatimMode parser = do
|
||||
|
@ -269,11 +281,12 @@ rawLaTeXParser toks parser valParser = do
|
|||
let preparser = setStartPos >> parser
|
||||
let rawparser = (,) <$> withRaw valParser <*> getState
|
||||
res' <- lift $ runParserT (withRaw (preparser >> getPosition))
|
||||
lstate "chunk" toks
|
||||
lstate "chunk" $ TokStream False toks
|
||||
case res' of
|
||||
Left _ -> mzero
|
||||
Right (endpos, toks') -> do
|
||||
res <- lift $ runParserT rawparser lstate' "chunk" toks'
|
||||
res <- lift $ runParserT rawparser lstate' "chunk"
|
||||
$ TokStream False toks'
|
||||
case res of
|
||||
Left _ -> mzero
|
||||
Right ((val, raw), st) -> do
|
||||
|
@ -303,7 +316,8 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
|
|||
pstate <- getState
|
||||
let lstate = def{ sOptions = extractReaderOptions pstate
|
||||
, sMacros = extractMacros pstate :| [] }
|
||||
res <- runParserT retokenize lstate "math" (tokenize (initialPos "math") s)
|
||||
res <- runParserT retokenize lstate "math" $
|
||||
TokStream False (tokenize (initialPos "math") s)
|
||||
case res of
|
||||
Left e -> Prelude.fail (show e)
|
||||
Right s' -> return s'
|
||||
|
@ -458,7 +472,7 @@ toksToString = T.unpack . untokenize
|
|||
parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
|
||||
parseFromToks parser toks = do
|
||||
oldInput <- getInput
|
||||
setInput toks
|
||||
setInput $ TokStream False toks
|
||||
oldpos <- getPosition
|
||||
case toks of
|
||||
Tok pos _ _ : _ -> setPosition pos
|
||||
|
@ -487,15 +501,22 @@ satisfyTok f = do
|
|||
return $! res
|
||||
where matcher t | f t = Just t
|
||||
| otherwise = Nothing
|
||||
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
|
||||
updatePos _spos _ (Tok pos _ _ : _) = pos
|
||||
updatePos spos (Tok _ _ t) [] = incSourceColumn spos (T.length t)
|
||||
updatePos :: SourcePos -> Tok -> TokStream -> SourcePos
|
||||
updatePos _spos _ (TokStream _ (Tok pos _ _ : _)) = pos
|
||||
updatePos spos (Tok _ _ t) _ = incSourceColumn spos (T.length t)
|
||||
|
||||
peekTok :: PandocMonad m => LP m Tok
|
||||
peekTok = do
|
||||
doMacros
|
||||
lookAhead (satisfyTok (const True))
|
||||
|
||||
doMacros :: PandocMonad m => LP m ()
|
||||
doMacros = do
|
||||
st <- getState
|
||||
unless (sVerbatimMode st) $
|
||||
getInput >>= doMacros' 1 >>= setInput
|
||||
TokStream macrosExpanded toks <- getInput
|
||||
unless macrosExpanded $ do
|
||||
st <- getState
|
||||
unless (sVerbatimMode st) $
|
||||
doMacros' 1 toks >>= setInput . TokStream True
|
||||
|
||||
doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
|
||||
doMacros' n inp =
|
||||
|
@ -568,10 +589,10 @@ doMacros' n inp =
|
|||
Just o -> do
|
||||
x <- option o bracketedToks
|
||||
getargs (M.singleton 1 x) $ drop 1 argspecs
|
||||
rest <- getInput
|
||||
TokStream _ rest <- getInput
|
||||
return (args, rest)
|
||||
lstate <- getState
|
||||
res <- lift $ runParserT getargs' lstate "args" ts
|
||||
res <- lift $ runParserT getargs' lstate "args" $ TokStream False ts
|
||||
case res of
|
||||
Left _ -> Prelude.fail $ "Could not parse arguments for " ++
|
||||
T.unpack name
|
||||
|
@ -599,7 +620,8 @@ trySpecialMacro _ _ = mzero
|
|||
|
||||
handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
|
||||
handleIf b ts = do
|
||||
res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
|
||||
res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens"
|
||||
$ TokStream False ts
|
||||
case res' of
|
||||
Left _ -> Prelude.fail "Could not parse conditional"
|
||||
Right ts' -> return ts'
|
||||
|
@ -610,7 +632,7 @@ ifParser b = do
|
|||
*> anyTok)
|
||||
elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
|
||||
<|> ([] <$ controlSeq "fi")
|
||||
rest <- getInput
|
||||
TokStream _ rest <- getInput
|
||||
return $ (if b then ifToks else elseToks) ++ rest
|
||||
|
||||
startsWithAlphaNum :: Text -> Bool
|
||||
|
@ -717,8 +739,9 @@ singleChar = singleCharTok <|> singleCharFromWord
|
|||
singleCharFromWord = do
|
||||
Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
|
||||
let (t1, t2) = (T.take 1 t, T.drop 1 t)
|
||||
inp <- getInput
|
||||
setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
|
||||
TokStream macrosExpanded inp <- getInput
|
||||
setInput $ TokStream macrosExpanded
|
||||
$ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
|
||||
anyTok
|
||||
|
||||
specialChars :: Set.Set Char
|
||||
|
@ -802,7 +825,8 @@ retokenizeComment = (do
|
|||
Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1))
|
||||
(sourceColumn pos)) toktype' txt'
|
||||
let newtoks = map updPos $ tokenize pos $ T.tail txt
|
||||
getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++))
|
||||
TokStream macrosExpanded ts <- getInput
|
||||
setInput $ TokStream macrosExpanded ((Tok pos Symbol "%" : newtoks) ++ ts))
|
||||
<|> return ()
|
||||
|
||||
bracedOrToken :: PandocMonad m => LP m [Tok]
|
||||
|
|
|
@ -62,10 +62,11 @@ amp = symbol '&'
|
|||
-- Split a Word into individual Symbols (for parseAligns)
|
||||
splitWordTok :: PandocMonad m => LP m ()
|
||||
splitWordTok = do
|
||||
inp <- getInput
|
||||
TokStream macrosExpanded inp <- getInput
|
||||
case inp of
|
||||
(Tok spos Word t : rest) ->
|
||||
setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
|
||||
setInput $ TokStream macrosExpanded
|
||||
$ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
|
||||
_ -> return ()
|
||||
|
||||
parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
|
||||
|
@ -108,8 +109,9 @@ parseAligns = try $ do
|
|||
spaces
|
||||
spec <- braced
|
||||
case safeRead ds of
|
||||
Just n ->
|
||||
getInput >>= setInput . (mconcat (replicate n spec) ++)
|
||||
Just n -> do
|
||||
TokStream _ ts <- getInput
|
||||
setInput $ TokStream False (mconcat (replicate n spec) ++ ts)
|
||||
Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
|
||||
bgroup
|
||||
spaces
|
||||
|
|
Loading…
Reference in a new issue