T.P.Shared: remove backslashEscapes, escapeStringUsing.

[API change]

These are inefficient association list lookups.
Replace with more efficient functions in the writers that
used them (with 10-25% performance improvements in
haddock, org, rtf, texinfo writers).
This commit is contained in:
John MacFarlane 2021-03-20 00:02:24 -07:00
parent ceadf33246
commit a1a57bce4e
8 changed files with 77 additions and 47 deletions

View file

@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
findM, findM,
-- * Text processing -- * Text processing
tshow, tshow,
backslashEscapes,
escapeStringUsing,
elemText, elemText,
notElemText, notElemText,
stripTrailingNewlines, stripTrailingNewlines,
@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
tshow :: Show a => a -> T.Text tshow :: Show a => a -> T.Text
tshow = T.pack . show tshow = T.pack . show
-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char] -- ^ list of special characters to escape
-> [(Char, T.Text)]
backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-- | Escape a string of characters, using an association list of
-- characters and strings.
escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
-- | @True@ exactly when the @Char@ appears in the @Text@. -- | @True@ exactly when the @Char@ appears in the @Text@.
elemText :: Char -> T.Text -> Bool elemText :: Char -> T.Text -> Bool
elemText c = T.any (== c) elemText c = T.any (== c)

View file

@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
-- | Escape special characters for AsciiDoc. -- | Escape special characters for AsciiDoc.
escapeString :: Text -> Text escapeString :: Text -> Text
escapeString = escapeStringUsing escs escapeString t
where escs = backslashEscapes "{" | T.any (== '{') t = T.concatMap escChar t
| otherwise = t
where escChar '{' = "\\{"
escChar c = T.singleton c
-- | Ordered list start parser for use in Para below. -- | Ordered list start parser for use in Para below.
olMarker :: Parser Text ParserState Char olMarker :: Parser Text ParserState Char
@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
| otherwise -> [Str "``"] ++ lst ++ [Str "''"] | otherwise -> [Str "``"] ++ lst ++ [Str "''"]
inlineToAsciiDoc _ (Code _ str) = do inlineToAsciiDoc _ (Code _ str) = do
isAsciidoctor <- gets asciidoctorVariant isAsciidoctor <- gets asciidoctorVariant
let contents = literal (escapeStringUsing (backslashEscapes "`") str) let escChar '`' = "\\'"
escChar c = T.singleton c
let contents = literal (T.concatMap escChar str)
return $ return $
if isAsciidoctor if isAsciidoctor
then text "`+" <> contents <> "+`" then text "`+" <> contents <> "+`"

View file

@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
put $ st {stNextRef = next + 1} put $ st {stNextRef = next + 1}
let ref = "url" <> tshow next let ref = "url" <> tshow next
contents <- inlineListToConTeXt txt contents <- inlineListToConTeXt txt
let escChar '#' = "\\#"
escChar '%' = "\\%"
escChar c = T.singleton c
let escContextURL = T.concatMap escChar
return $ "\\useURL" return $ "\\useURL"
<> brackets (literal ref) <> brackets (literal ref)
<> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) <> brackets (literal $ escContextURL src)
<> (if isAutolink <> (if isAutolink
then empty then empty
else brackets empty <> brackets contents) else brackets empty <> brackets contents)

View file

@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/>
-} -}
module Text.Pandoc.Writers.Haddock (writeHaddock) where module Text.Pandoc.Writers.Haddock (writeHaddock) where
import Control.Monad.State.Strict import Control.Monad.State.Strict
import Data.Char (isAlphaNum)
import Data.Default import Data.Default
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
@ -71,8 +72,18 @@ notesToHaddock opts notes =
-- | Escape special characters for Haddock. -- | Escape special characters for Haddock.
escapeString :: Text -> Text escapeString :: Text -> Text
escapeString = escapeStringUsing haddockEscapes escapeString t
where haddockEscapes = backslashEscapes "\\/'`\"@<" | T.all isAlphaNum t = t
| otherwise = T.concatMap escChar t
where
escChar '\\' = "\\\\"
escChar '/' = "\\/"
escChar '\'' = "\\'"
escChar '`' = "\\`"
escChar '"' = "\\\""
escChar '@' = "\\@"
escChar '<' = "\\<"
escChar c = T.singleton c
-- | Convert Pandoc block element to haddock. -- | Convert Pandoc block element to haddock.
blockToHaddock :: PandocMonad m blockToHaddock :: PandocMonad m

View file

@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
(c:_) -> c (c:_) -> c
[] -> '!' [] -> '!'
let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str let isEscapable '\\' = True
isEscapable '{' = True
isEscapable '}' = True
isEscapable '%' = True
isEscapable '~' = True
isEscapable '_' = True
isEscapable '&' = True
isEscapable '#' = True
isEscapable '^' = True
isEscapable _ = False
let escChar c | isEscapable c = T.pack ['\\',c]
| otherwise = T.singleton c
let str' = T.concatMap escChar str
-- we always put lstinline in a dummy 'passthrough' command -- we always put lstinline in a dummy 'passthrough' command
-- (defined in the default template) so that we don't have -- (defined in the default template) so that we don't have
-- to change the way we escape characters depending on whether -- to change the way we escape characters depending on whether

View file

@ -84,12 +84,15 @@ noteToOrg num note = do
-- | Escape special characters for Org. -- | Escape special characters for Org.
escapeString :: Text -> Text escapeString :: Text -> Text
escapeString = escapeStringUsing escapeString t
[ ('\x2014',"---") | T.all (\c -> c < '\x2013' || c > '\x2026') t = t
, ('\x2013',"--") | otherwise = T.concatMap escChar t
, ('\x2019',"'") where
, ('\x2026',"...") escChar '\x2013' = "--"
] escChar '\x2014' = "---"
escChar '\x2019' = "'"
escChar '\x2026' = "..."
escChar c = T.singleton c
isRawFormat :: Format -> Bool isRawFormat :: Format -> Bool
isRawFormat f = isRawFormat f =

View file

@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
import Control.Monad.Except (catchError, throwError) import Control.Monad.Except (catchError, throwError)
import Control.Monad import Control.Monad
import qualified Data.ByteString as B import qualified Data.ByteString as B
import Data.Char (chr, isDigit, ord) import Data.Char (chr, isDigit, ord, isAlphaNum)
import qualified Data.Map as M import qualified Data.Map as M
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
-- | Escape special characters. -- | Escape special characters.
escapeSpecial :: Text -> Text escapeSpecial :: Text -> Text
escapeSpecial = escapeStringUsing $ escapeSpecial t
[ ('\t',"\\tab ") | T.all isAlphaNum t = t
, ('\8216',"\\u8216'") | otherwise = T.concatMap escChar t
, ('\8217',"\\u8217'") where
, ('\8220',"\\u8220\"") escChar '\t' = "\\tab "
, ('\8221',"\\u8221\"") escChar '\8216' = "\\u8216'"
, ('\8211',"\\u8211-") escChar '\8217' = "\\u8217'"
, ('\8212',"\\u8212-") escChar '\8220' = "\\u8220\""
] <> backslashEscapes "{\\}" escChar '\8221' = "\\u8221\""
escChar '\8211' = "\\u8211-"
escChar '\8212' = "\\u8212-"
escChar '{' = "\\{"
escChar '}' = "\\}"
escChar '\\' = "\\\\"
escChar c = T.singleton c
-- | Escape strings as needed for rich text format. -- | Escape strings as needed for rich text format.
stringToRTF :: Text -> Text stringToRTF :: Text -> Text

View file

@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError)
import Control.Monad.State.Strict import Control.Monad.State.Strict
import Data.Char (chr, ord) import Data.Char (chr, ord, isAlphaNum)
import Data.List (maximumBy, transpose, foldl') import Data.List (maximumBy, transpose, foldl')
import Data.List.NonEmpty (nonEmpty) import Data.List.NonEmpty (nonEmpty)
import Data.Ord (comparing) import Data.Ord (comparing)
@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
-- | Escape things as needed for Texinfo. -- | Escape things as needed for Texinfo.
stringToTexinfo :: Text -> Text stringToTexinfo :: Text -> Text
stringToTexinfo = escapeStringUsing texinfoEscapes stringToTexinfo t
where texinfoEscapes = [ ('{', "@{") | T.all isAlphaNum t = t
, ('}', "@}") | otherwise = T.concatMap escChar t
, ('@', "@@") where escChar '{' = "@{"
, ('\160', "@ ") escChar '}' = "@}"
, ('\x2014', "---") escChar '@' = "@@"
, ('\x2013', "--") escChar '\160' = "@ "
, ('\x2026', "@dots{}") escChar '\x2014' = "---"
, ('\x2019', "'") escChar '\x2013' = "--"
] escChar '\x2026' = "@dots{}"
escChar '\x2019' = "'"
escChar c = T.singleton c
escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text) escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
escapeCommas parser = do escapeCommas parser = do