From d1b80f8f350c7588ba2c95f2c4a646f7af5a5cb3 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 28 Nov 2009 03:22:18 +0000 Subject: [PATCH] Markdown reader: parse refs and notes in the same pass. Previously the markdown reader made one pass for references, a second pass for notes (which it parsed and stored in the parser state), and a third pass for the rest. This patch achieves a 10% speed improvement by storing the raw notes on the first (reference) pass, then parsing them when the notes are inserted into the AST. This eliminates the need for a second pass to parse notes. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1629 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 33 ++++++++++++----------------- src/Text/Pandoc/Shared.hs | 2 +- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 226252381..7a16f1578 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -164,23 +164,18 @@ parseMarkdown = do -- markdown allows raw HTML updateState (\state -> state { stateParseRaw = True }) startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= - return . concat + -- go through once just to get list of reference keys and notes + -- docMinusKeys is the raw document with blanks where the keys/notes were... + st <- getState + let firstPassParser = referenceKey + <|> (if stateStrict st then pzero else noteBlock) + <|> lineClump + docMinusKeys <- liftM concat $ manyTill firstPassParser eof setInput docMinusKeys setPosition startPos - st <- getState - -- go through again for notes unless strict... - if stateStrict st - then return () - else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= - return . concat - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - setInput docMinusNotes - setPosition startPos + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... (title, author, date) <- option ([],[],"") titleBlock blocks <- parseBlocks @@ -243,9 +238,7 @@ noteBlock = try $ do raw <- sepBy rawLines (try (blankline >> indentSpaces)) optional blanklines endPos <- getPosition - -- parse the extracted text, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - let newnote = (ref, contents) + let newnote = (ref, (intercalate "\n" raw) ++ "\n\n") st <- getState let oldnotes = stateNotes st updateState $ \s -> s { stateNotes = newnote : oldnotes } @@ -1174,8 +1167,8 @@ note = try $ do state <- getState let notes = stateNotes state case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return $ Note contents + Nothing -> fail "note not found" + Just raw -> liftM Note $ parseFromString parseBlocks raw inlineNote :: GenParser Char ParserState Inline inlineNote = try $ do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index f920c79aa..810911165 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -713,7 +713,7 @@ data QuoteContext | NoQuote -- ^ Used when not parsing inside quotes deriving (Eq, Show) -type NoteTable = [(String, [Block])] +type NoteTable = [(String, String)] type KeyTable = [([Inline], Target)]