Textile reader: Improved speed of hyphenedWords.

This speeds up the textile  reader by about a factor of 4.
But the reader is still very slow, compared to others readers.
This commit is contained in:
John MacFarlane 2012-09-06 13:49:43 -07:00
parent c2b520fb36
commit 37c29bfa3d
2 changed files with 7 additions and 5 deletions

View file

@ -7,7 +7,7 @@ all:
cabal-dev configure --enable-tests --enable-benchmarks && cabal-dev build
prof:
cabal-dev configure --enable-library-profiling --enable-executable-profiling && cabal-dev build
cabal-dev configure --enable-tests --enable-library-profiling --enable-executable-profiling && cabal-dev build
prep: pandoc-types citeproc-hs
cabal-dev install-deps --enable-library-profiling --enable-tests --enable-benchmarks

View file

@ -61,6 +61,7 @@ import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup.Match
import Data.List ( intercalate )
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
@ -426,14 +427,15 @@ wordBoundaries = markupChars ++ stringBreakers
-- | Parse a hyphened sequence of words
hyphenedWords :: Parser [Char] ParserState String
hyphenedWords = try $ do
hyphenedWords = intercalate "-" <$> sepBy1 wordChunk (char '-')
wordChunk :: Parser [Char] ParserState String
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( (noneOf wordBoundaries) <|>
try (notFollowedBy' note *> oneOf markupChars
<* lookAhead (noneOf wordBoundaries) ) )
let wd = hd:tl
option wd $ try $
(\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords)
return $ hd:tl
-- | Any string
str :: Parser [Char] ParserState Inline