Optimized oneOfStringsCI.
The call to toLower in ciMatch was very expensive (and very often used), because toLower from Data.Char calls a fully unicode aware function. This optimization avoids the call to toLower for the most common, ASCII cases. This dramatically reduces the speed penalty that comes from enabling the `autolink_bare_uris` extension. The penalty is still substantial (in one test, from 0.33s to 0.44s), but nowhere near what it used to be.
This commit is contained in:
parent
92acb24486
commit
a6c167125f
1 changed files with 9 additions and 3 deletions
|
@ -152,8 +152,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock)
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
|
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
import Text.Parsec.Pos (newPos)
|
import Text.Parsec.Pos (newPos)
|
||||||
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isHexDigit,
|
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit,
|
||||||
isSpace )
|
isHexDigit, isSpace )
|
||||||
import Data.List ( intercalate, transpose )
|
import Data.List ( intercalate, transpose )
|
||||||
import Text.Pandoc.Shared
|
import Text.Pandoc.Shared
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -244,7 +244,13 @@ oneOfStrings = oneOfStrings' (==)
|
||||||
-- | Parses one of a list of strings (tried in order), case insensitive.
|
-- | Parses one of a list of strings (tried in order), case insensitive.
|
||||||
oneOfStringsCI :: [String] -> Parser [Char] st String
|
oneOfStringsCI :: [String] -> Parser [Char] st String
|
||||||
oneOfStringsCI = oneOfStrings' ciMatch
|
oneOfStringsCI = oneOfStrings' ciMatch
|
||||||
where ciMatch x y = toLower x == toLower y
|
where ciMatch x y = toLower' x == toLower' y
|
||||||
|
-- this optimizes toLower by checking common ASCII case
|
||||||
|
-- first, before calling the expensive unicode-aware
|
||||||
|
-- function:
|
||||||
|
toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32)
|
||||||
|
| isAscii c = c
|
||||||
|
| otherwise = toLower c
|
||||||
|
|
||||||
-- | Parses a space or tab.
|
-- | Parses a space or tab.
|
||||||
spaceChar :: Parser [Char] st Char
|
spaceChar :: Parser [Char] st Char
|
||||||
|
|
Loading…
Reference in a new issue