T.P.Shared: remove backslashEscapes, escapeStringUsing.

[API change]

These are inefficient association list lookups.
Replace with more efficient functions in the writers that
used them (with 10-25% performance improvements in
haddock, org, rtf, texinfo writers).
This commit is contained in:
John MacFarlane 2021-03-20 00:02:24 -07:00
parent ceadf33246
commit a1a57bce4e
8 changed files with 77 additions and 47 deletions

View file

@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
findM,
-- * Text processing
tshow,
backslashEscapes,
escapeStringUsing,
elemText,
notElemText,
stripTrailingNewlines,
@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
tshow :: Show a => a -> T.Text
tshow = T.pack . show
-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char] -- ^ list of special characters to escape
-> [(Char, T.Text)]
backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
-- | @True@ exactly when the @Char@ appears in the @Text@.
elemText :: Char -> T.Text -> Bool
elemText c = T.any (== c)

View file

@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
-- | Escape special characters for AsciiDoc.
escapeString :: Text -> Text
escapeString = escapeStringUsing escs
where escs = backslashEscapes "{"
escapeString t
| T.any (== '{') t = T.concatMap escChar t
| otherwise = t
where escChar '{' = "\\{"
escChar c = T.singleton c
-- | Ordered list start parser for use in Para below.
olMarker :: Parser Text ParserState Char
@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
| otherwise -> [Str "``"] ++ lst ++ [Str "''"]
inlineToAsciiDoc _ (Code _ str) = do
isAsciidoctor <- gets asciidoctorVariant
let contents = literal (escapeStringUsing (backslashEscapes "`") str)
let escChar '`' = "\\'"
escChar c = T.singleton c
let contents = literal (T.concatMap escChar str)
return $
if isAsciidoctor
then text "`+" <> contents <> "+`"

View file

@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
put $ st {stNextRef = next + 1}
let ref = "url" <> tshow next
contents <- inlineListToConTeXt txt
let escChar '#' = "\\#"
escChar '%' = "\\%"
escChar c = T.singleton c
let escContextURL = T.concatMap escChar
return $ "\\useURL"
<> brackets (literal ref)
<> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
<> brackets (literal $ escContextURL src)
<> (if isAutolink
then empty
else brackets empty <> brackets contents)

View file

@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/>
-}
module Text.Pandoc.Writers.Haddock (writeHaddock) where
import Control.Monad.State.Strict
import Data.Char (isAlphaNum)
import Data.Default
import Data.Text (Text)
import qualified Data.Text as T
@ -71,8 +72,18 @@ notesToHaddock opts notes =
-- | Escape special characters for Haddock.
escapeString :: Text -> Text
escapeString = escapeStringUsing haddockEscapes
where haddockEscapes = backslashEscapes "\\/'`\"@<"
escapeString t
| T.all isAlphaNum t = t
| otherwise = T.concatMap escChar t
where
escChar '\\' = "\\\\"
escChar '/' = "\\/"
escChar '\'' = "\\'"
escChar '`' = "\\`"
escChar '"' = "\\\""
escChar '@' = "\\@"
escChar '<' = "\\<"
escChar c = T.singleton c
-- | Convert Pandoc block element to haddock.
blockToHaddock :: PandocMonad m

View file

@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
(c:_) -> c
[] -> '!'
let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
let isEscapable '\\' = True
isEscapable '{' = True
isEscapable '}' = True
isEscapable '%' = True
isEscapable '~' = True
isEscapable '_' = True
isEscapable '&' = True
isEscapable '#' = True
isEscapable '^' = True
isEscapable _ = False
let escChar c | isEscapable c = T.pack ['\\',c]
| otherwise = T.singleton c
let str' = T.concatMap escChar str
-- we always put lstinline in a dummy 'passthrough' command
-- (defined in the default template) so that we don't have
-- to change the way we escape characters depending on whether

View file

@ -84,12 +84,15 @@ noteToOrg num note = do
-- | Escape special characters for Org.
escapeString :: Text -> Text
escapeString = escapeStringUsing
[ ('\x2014',"---")
, ('\x2013',"--")
, ('\x2019',"'")
, ('\x2026',"...")
]
escapeString t
| T.all (\c -> c < '\x2013' || c > '\x2026') t = t
| otherwise = T.concatMap escChar t
where
escChar '\x2013' = "--"
escChar '\x2014' = "---"
escChar '\x2019' = "'"
escChar '\x2026' = "..."
escChar c = T.singleton c
isRawFormat :: Format -> Bool
isRawFormat f =

View file

@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
import Control.Monad.Except (catchError, throwError)
import Control.Monad
import qualified Data.ByteString as B
import Data.Char (chr, isDigit, ord)
import Data.Char (chr, isDigit, ord, isAlphaNum)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
-- | Escape special characters.
escapeSpecial :: Text -> Text
escapeSpecial = escapeStringUsing $
[ ('\t',"\\tab ")
, ('\8216',"\\u8216'")
, ('\8217',"\\u8217'")
, ('\8220',"\\u8220\"")
, ('\8221',"\\u8221\"")
, ('\8211',"\\u8211-")
, ('\8212',"\\u8212-")
] <> backslashEscapes "{\\}"
escapeSpecial t
| T.all isAlphaNum t = t
| otherwise = T.concatMap escChar t
where
escChar '\t' = "\\tab "
escChar '\8216' = "\\u8216'"
escChar '\8217' = "\\u8217'"
escChar '\8220' = "\\u8220\""
escChar '\8221' = "\\u8221\""
escChar '\8211' = "\\u8211-"
escChar '\8212' = "\\u8212-"
escChar '{' = "\\{"
escChar '}' = "\\}"
escChar '\\' = "\\\\"
escChar c = T.singleton c
-- | Escape strings as needed for rich text format.
stringToRTF :: Text -> Text

View file

@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
import Data.Char (chr, ord)
import Data.Char (chr, ord, isAlphaNum)
import Data.List (maximumBy, transpose, foldl')
import Data.List.NonEmpty (nonEmpty)
import Data.Ord (comparing)
@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
-- | Escape things as needed for Texinfo.
stringToTexinfo :: Text -> Text
stringToTexinfo = escapeStringUsing texinfoEscapes
where texinfoEscapes = [ ('{', "@{")
, ('}', "@}")
, ('@', "@@")
, ('\160', "@ ")
, ('\x2014', "---")
, ('\x2013', "--")
, ('\x2026', "@dots{}")
, ('\x2019', "'")
]
stringToTexinfo t
| T.all isAlphaNum t = t
| otherwise = T.concatMap escChar t
where escChar '{' = "@{"
escChar '}' = "@}"
escChar '@' = "@@"
escChar '\160' = "@ "
escChar '\x2014' = "---"
escChar '\x2013' = "--"
escChar '\x2026' = "@dots{}"
escChar '\x2019' = "'"
escChar c = T.singleton c
escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
escapeCommas parser = do