T.P.Shared: remove backslashEscapes
, escapeStringUsing
.
[API change] These are inefficient association list lookups. Replace with more efficient functions in the writers that used them (with 10-25% performance improvements in haddock, org, rtf, texinfo writers).
This commit is contained in:
parent
ceadf33246
commit
a1a57bce4e
8 changed files with 77 additions and 47 deletions
|
@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
|
|||
findM,
|
||||
-- * Text processing
|
||||
tshow,
|
||||
backslashEscapes,
|
||||
escapeStringUsing,
|
||||
elemText,
|
||||
notElemText,
|
||||
stripTrailingNewlines,
|
||||
|
@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
|
|||
tshow :: Show a => a -> T.Text
|
||||
tshow = T.pack . show
|
||||
|
||||
-- | Returns an association list of backslash escapes for the
|
||||
-- designated characters.
|
||||
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
||||
-> [(Char, T.Text)]
|
||||
backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
|
||||
|
||||
-- | Escape a string of characters, using an association list of
|
||||
-- characters and strings.
|
||||
escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
|
||||
escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
|
||||
|
||||
-- | @True@ exactly when the @Char@ appears in the @Text@.
|
||||
elemText :: Char -> T.Text -> Bool
|
||||
elemText c = T.any (== c)
|
||||
|
|
|
@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
|
|||
|
||||
-- | Escape special characters for AsciiDoc.
|
||||
escapeString :: Text -> Text
|
||||
escapeString = escapeStringUsing escs
|
||||
where escs = backslashEscapes "{"
|
||||
escapeString t
|
||||
| T.any (== '{') t = T.concatMap escChar t
|
||||
| otherwise = t
|
||||
where escChar '{' = "\\{"
|
||||
escChar c = T.singleton c
|
||||
|
||||
-- | Ordered list start parser for use in Para below.
|
||||
olMarker :: Parser Text ParserState Char
|
||||
|
@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
|
|||
| otherwise -> [Str "``"] ++ lst ++ [Str "''"]
|
||||
inlineToAsciiDoc _ (Code _ str) = do
|
||||
isAsciidoctor <- gets asciidoctorVariant
|
||||
let contents = literal (escapeStringUsing (backslashEscapes "`") str)
|
||||
let escChar '`' = "\\'"
|
||||
escChar c = T.singleton c
|
||||
let contents = literal (T.concatMap escChar str)
|
||||
return $
|
||||
if isAsciidoctor
|
||||
then text "`+" <> contents <> "+`"
|
||||
|
|
|
@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
|
|||
put $ st {stNextRef = next + 1}
|
||||
let ref = "url" <> tshow next
|
||||
contents <- inlineListToConTeXt txt
|
||||
let escChar '#' = "\\#"
|
||||
escChar '%' = "\\%"
|
||||
escChar c = T.singleton c
|
||||
let escContextURL = T.concatMap escChar
|
||||
return $ "\\useURL"
|
||||
<> brackets (literal ref)
|
||||
<> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
|
||||
<> brackets (literal $ escContextURL src)
|
||||
<> (if isAutolink
|
||||
then empty
|
||||
else brackets empty <> brackets contents)
|
||||
|
|
|
@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/>
|
|||
-}
|
||||
module Text.Pandoc.Writers.Haddock (writeHaddock) where
|
||||
import Control.Monad.State.Strict
|
||||
import Data.Char (isAlphaNum)
|
||||
import Data.Default
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
|
@ -71,8 +72,18 @@ notesToHaddock opts notes =
|
|||
|
||||
-- | Escape special characters for Haddock.
|
||||
escapeString :: Text -> Text
|
||||
escapeString = escapeStringUsing haddockEscapes
|
||||
where haddockEscapes = backslashEscapes "\\/'`\"@<"
|
||||
escapeString t
|
||||
| T.all isAlphaNum t = t
|
||||
| otherwise = T.concatMap escChar t
|
||||
where
|
||||
escChar '\\' = "\\\\"
|
||||
escChar '/' = "\\/"
|
||||
escChar '\'' = "\\'"
|
||||
escChar '`' = "\\`"
|
||||
escChar '"' = "\\\""
|
||||
escChar '@' = "\\@"
|
||||
escChar '<' = "\\<"
|
||||
escChar c = T.singleton c
|
||||
|
||||
-- | Convert Pandoc block element to haddock.
|
||||
blockToHaddock :: PandocMonad m
|
||||
|
|
|
@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
|
|||
let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
|
||||
(c:_) -> c
|
||||
[] -> '!'
|
||||
let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
|
||||
let isEscapable '\\' = True
|
||||
isEscapable '{' = True
|
||||
isEscapable '}' = True
|
||||
isEscapable '%' = True
|
||||
isEscapable '~' = True
|
||||
isEscapable '_' = True
|
||||
isEscapable '&' = True
|
||||
isEscapable '#' = True
|
||||
isEscapable '^' = True
|
||||
isEscapable _ = False
|
||||
let escChar c | isEscapable c = T.pack ['\\',c]
|
||||
| otherwise = T.singleton c
|
||||
let str' = T.concatMap escChar str
|
||||
-- we always put lstinline in a dummy 'passthrough' command
|
||||
-- (defined in the default template) so that we don't have
|
||||
-- to change the way we escape characters depending on whether
|
||||
|
|
|
@ -84,12 +84,15 @@ noteToOrg num note = do
|
|||
|
||||
-- | Escape special characters for Org.
|
||||
escapeString :: Text -> Text
|
||||
escapeString = escapeStringUsing
|
||||
[ ('\x2014',"---")
|
||||
, ('\x2013',"--")
|
||||
, ('\x2019',"'")
|
||||
, ('\x2026',"...")
|
||||
]
|
||||
escapeString t
|
||||
| T.all (\c -> c < '\x2013' || c > '\x2026') t = t
|
||||
| otherwise = T.concatMap escChar t
|
||||
where
|
||||
escChar '\x2013' = "--"
|
||||
escChar '\x2014' = "---"
|
||||
escChar '\x2019' = "'"
|
||||
escChar '\x2026' = "..."
|
||||
escChar c = T.singleton c
|
||||
|
||||
isRawFormat :: Format -> Bool
|
||||
isRawFormat f =
|
||||
|
|
|
@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
|
|||
import Control.Monad.Except (catchError, throwError)
|
||||
import Control.Monad
|
||||
import qualified Data.ByteString as B
|
||||
import Data.Char (chr, isDigit, ord)
|
||||
import Data.Char (chr, isDigit, ord, isAlphaNum)
|
||||
import qualified Data.Map as M
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
|
@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
|
|||
|
||||
-- | Escape special characters.
|
||||
escapeSpecial :: Text -> Text
|
||||
escapeSpecial = escapeStringUsing $
|
||||
[ ('\t',"\\tab ")
|
||||
, ('\8216',"\\u8216'")
|
||||
, ('\8217',"\\u8217'")
|
||||
, ('\8220',"\\u8220\"")
|
||||
, ('\8221',"\\u8221\"")
|
||||
, ('\8211',"\\u8211-")
|
||||
, ('\8212',"\\u8212-")
|
||||
] <> backslashEscapes "{\\}"
|
||||
escapeSpecial t
|
||||
| T.all isAlphaNum t = t
|
||||
| otherwise = T.concatMap escChar t
|
||||
where
|
||||
escChar '\t' = "\\tab "
|
||||
escChar '\8216' = "\\u8216'"
|
||||
escChar '\8217' = "\\u8217'"
|
||||
escChar '\8220' = "\\u8220\""
|
||||
escChar '\8221' = "\\u8221\""
|
||||
escChar '\8211' = "\\u8211-"
|
||||
escChar '\8212' = "\\u8212-"
|
||||
escChar '{' = "\\{"
|
||||
escChar '}' = "\\}"
|
||||
escChar '\\' = "\\\\"
|
||||
escChar c = T.singleton c
|
||||
|
||||
-- | Escape strings as needed for rich text format.
|
||||
stringToRTF :: Text -> Text
|
||||
|
|
|
@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
|
|||
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad.State.Strict
|
||||
import Data.Char (chr, ord)
|
||||
import Data.Char (chr, ord, isAlphaNum)
|
||||
import Data.List (maximumBy, transpose, foldl')
|
||||
import Data.List.NonEmpty (nonEmpty)
|
||||
import Data.Ord (comparing)
|
||||
|
@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
|
|||
|
||||
-- | Escape things as needed for Texinfo.
|
||||
stringToTexinfo :: Text -> Text
|
||||
stringToTexinfo = escapeStringUsing texinfoEscapes
|
||||
where texinfoEscapes = [ ('{', "@{")
|
||||
, ('}', "@}")
|
||||
, ('@', "@@")
|
||||
, ('\160', "@ ")
|
||||
, ('\x2014', "---")
|
||||
, ('\x2013', "--")
|
||||
, ('\x2026', "@dots{}")
|
||||
, ('\x2019', "'")
|
||||
]
|
||||
stringToTexinfo t
|
||||
| T.all isAlphaNum t = t
|
||||
| otherwise = T.concatMap escChar t
|
||||
where escChar '{' = "@{"
|
||||
escChar '}' = "@}"
|
||||
escChar '@' = "@@"
|
||||
escChar '\160' = "@ "
|
||||
escChar '\x2014' = "---"
|
||||
escChar '\x2013' = "--"
|
||||
escChar '\x2026' = "@dots{}"
|
||||
escChar '\x2019' = "'"
|
||||
escChar c = T.singleton c
|
||||
|
||||
escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
|
||||
escapeCommas parser = do
|
||||
|
|
Loading…
Reference in a new issue