Markdown reader: parse refs and notes in the same pass.

Previously the markdown reader made one pass for references,
a second pass for notes (which it parsed and stored in the
parser state), and a third pass for the rest.  This patch
achieves a 10% speed improvement by storing the raw notes
on the first (reference) pass, then parsing them when the
notes are inserted into the AST. This eliminates the need
for a second pass to parse notes.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1629 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2009-11-28 03:22:18 +00:00
parent 1d440130c4
commit d1b80f8f35
2 changed files with 14 additions and 21 deletions

View file

@ -164,23 +164,18 @@ parseMarkdown = do
-- markdown allows raw HTML
updateState (\state -> state { stateParseRaw = True })
startPos <- getPosition
-- go through once just to get list of reference keys
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
return . concat
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
st <- getState
let firstPassParser = referenceKey
<|> (if stateStrict st then pzero else noteBlock)
<|> lineClump
docMinusKeys <- liftM concat $ manyTill firstPassParser eof
setInput docMinusKeys
setPosition startPos
st <- getState
-- go through again for notes unless strict...
if stateStrict st
then return ()
else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
return . concat
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
setInput docMinusNotes
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
(title, author, date) <- option ([],[],"") titleBlock
blocks <- parseBlocks
@ -243,9 +238,7 @@ noteBlock = try $ do
raw <- sepBy rawLines (try (blankline >> indentSpaces))
optional blanklines
endPos <- getPosition
-- parse the extracted text, which may contain various block elements:
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
let newnote = (ref, contents)
let newnote = (ref, (intercalate "\n" raw) ++ "\n\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
@ -1174,8 +1167,8 @@ note = try $ do
state <- getState
let notes = stateNotes state
case lookup ref notes of
Nothing -> fail "note not found"
Just contents -> return $ Note contents
Nothing -> fail "note not found"
Just raw -> liftM Note $ parseFromString parseBlocks raw
inlineNote :: GenParser Char ParserState Inline
inlineNote = try $ do

View file

@ -713,7 +713,7 @@ data QuoteContext
| NoQuote -- ^ Used when not parsing inside quotes
deriving (Eq, Show)
type NoteTable = [(String, [Block])]
type NoteTable = [(String, String)]
type KeyTable = [([Inline], Target)]