T.P.Shared: remove backslashEscapes
, escapeStringUsing
.
[API change] These are inefficient association list lookups. Replace with more efficient functions in the writers that used them (with 10-25% performance improvements in haddock, org, rtf, texinfo writers).
This commit is contained in:
parent
ceadf33246
commit
a1a57bce4e
8 changed files with 77 additions and 47 deletions
|
@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
|
||||||
findM,
|
findM,
|
||||||
-- * Text processing
|
-- * Text processing
|
||||||
tshow,
|
tshow,
|
||||||
backslashEscapes,
|
|
||||||
escapeStringUsing,
|
|
||||||
elemText,
|
elemText,
|
||||||
notElemText,
|
notElemText,
|
||||||
stripTrailingNewlines,
|
stripTrailingNewlines,
|
||||||
|
@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
|
||||||
tshow :: Show a => a -> T.Text
|
tshow :: Show a => a -> T.Text
|
||||||
tshow = T.pack . show
|
tshow = T.pack . show
|
||||||
|
|
||||||
-- | Returns an association list of backslash escapes for the
|
|
||||||
-- designated characters.
|
|
||||||
backslashEscapes :: [Char] -- ^ list of special characters to escape
|
|
||||||
-> [(Char, T.Text)]
|
|
||||||
backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
|
|
||||||
|
|
||||||
-- | Escape a string of characters, using an association list of
|
|
||||||
-- characters and strings.
|
|
||||||
escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
|
|
||||||
escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
|
|
||||||
|
|
||||||
-- | @True@ exactly when the @Char@ appears in the @Text@.
|
-- | @True@ exactly when the @Char@ appears in the @Text@.
|
||||||
elemText :: Char -> T.Text -> Bool
|
elemText :: Char -> T.Text -> Bool
|
||||||
elemText c = T.any (== c)
|
elemText c = T.any (== c)
|
||||||
|
|
|
@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
|
||||||
|
|
||||||
-- | Escape special characters for AsciiDoc.
|
-- | Escape special characters for AsciiDoc.
|
||||||
escapeString :: Text -> Text
|
escapeString :: Text -> Text
|
||||||
escapeString = escapeStringUsing escs
|
escapeString t
|
||||||
where escs = backslashEscapes "{"
|
| T.any (== '{') t = T.concatMap escChar t
|
||||||
|
| otherwise = t
|
||||||
|
where escChar '{' = "\\{"
|
||||||
|
escChar c = T.singleton c
|
||||||
|
|
||||||
-- | Ordered list start parser for use in Para below.
|
-- | Ordered list start parser for use in Para below.
|
||||||
olMarker :: Parser Text ParserState Char
|
olMarker :: Parser Text ParserState Char
|
||||||
|
@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
|
||||||
| otherwise -> [Str "``"] ++ lst ++ [Str "''"]
|
| otherwise -> [Str "``"] ++ lst ++ [Str "''"]
|
||||||
inlineToAsciiDoc _ (Code _ str) = do
|
inlineToAsciiDoc _ (Code _ str) = do
|
||||||
isAsciidoctor <- gets asciidoctorVariant
|
isAsciidoctor <- gets asciidoctorVariant
|
||||||
let contents = literal (escapeStringUsing (backslashEscapes "`") str)
|
let escChar '`' = "\\'"
|
||||||
|
escChar c = T.singleton c
|
||||||
|
let contents = literal (T.concatMap escChar str)
|
||||||
return $
|
return $
|
||||||
if isAsciidoctor
|
if isAsciidoctor
|
||||||
then text "`+" <> contents <> "+`"
|
then text "`+" <> contents <> "+`"
|
||||||
|
|
|
@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
|
||||||
put $ st {stNextRef = next + 1}
|
put $ st {stNextRef = next + 1}
|
||||||
let ref = "url" <> tshow next
|
let ref = "url" <> tshow next
|
||||||
contents <- inlineListToConTeXt txt
|
contents <- inlineListToConTeXt txt
|
||||||
|
let escChar '#' = "\\#"
|
||||||
|
escChar '%' = "\\%"
|
||||||
|
escChar c = T.singleton c
|
||||||
|
let escContextURL = T.concatMap escChar
|
||||||
return $ "\\useURL"
|
return $ "\\useURL"
|
||||||
<> brackets (literal ref)
|
<> brackets (literal ref)
|
||||||
<> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
|
<> brackets (literal $ escContextURL src)
|
||||||
<> (if isAutolink
|
<> (if isAutolink
|
||||||
then empty
|
then empty
|
||||||
else brackets empty <> brackets contents)
|
else brackets empty <> brackets contents)
|
||||||
|
|
|
@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/>
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.Writers.Haddock (writeHaddock) where
|
module Text.Pandoc.Writers.Haddock (writeHaddock) where
|
||||||
import Control.Monad.State.Strict
|
import Control.Monad.State.Strict
|
||||||
|
import Data.Char (isAlphaNum)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
@ -71,8 +72,18 @@ notesToHaddock opts notes =
|
||||||
|
|
||||||
-- | Escape special characters for Haddock.
|
-- | Escape special characters for Haddock.
|
||||||
escapeString :: Text -> Text
|
escapeString :: Text -> Text
|
||||||
escapeString = escapeStringUsing haddockEscapes
|
escapeString t
|
||||||
where haddockEscapes = backslashEscapes "\\/'`\"@<"
|
| T.all isAlphaNum t = t
|
||||||
|
| otherwise = T.concatMap escChar t
|
||||||
|
where
|
||||||
|
escChar '\\' = "\\\\"
|
||||||
|
escChar '/' = "\\/"
|
||||||
|
escChar '\'' = "\\'"
|
||||||
|
escChar '`' = "\\`"
|
||||||
|
escChar '"' = "\\\""
|
||||||
|
escChar '@' = "\\@"
|
||||||
|
escChar '<' = "\\<"
|
||||||
|
escChar c = T.singleton c
|
||||||
|
|
||||||
-- | Convert Pandoc block element to haddock.
|
-- | Convert Pandoc block element to haddock.
|
||||||
blockToHaddock :: PandocMonad m
|
blockToHaddock :: PandocMonad m
|
||||||
|
|
|
@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
|
||||||
let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
|
let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
|
||||||
(c:_) -> c
|
(c:_) -> c
|
||||||
[] -> '!'
|
[] -> '!'
|
||||||
let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
|
let isEscapable '\\' = True
|
||||||
|
isEscapable '{' = True
|
||||||
|
isEscapable '}' = True
|
||||||
|
isEscapable '%' = True
|
||||||
|
isEscapable '~' = True
|
||||||
|
isEscapable '_' = True
|
||||||
|
isEscapable '&' = True
|
||||||
|
isEscapable '#' = True
|
||||||
|
isEscapable '^' = True
|
||||||
|
isEscapable _ = False
|
||||||
|
let escChar c | isEscapable c = T.pack ['\\',c]
|
||||||
|
| otherwise = T.singleton c
|
||||||
|
let str' = T.concatMap escChar str
|
||||||
-- we always put lstinline in a dummy 'passthrough' command
|
-- we always put lstinline in a dummy 'passthrough' command
|
||||||
-- (defined in the default template) so that we don't have
|
-- (defined in the default template) so that we don't have
|
||||||
-- to change the way we escape characters depending on whether
|
-- to change the way we escape characters depending on whether
|
||||||
|
|
|
@ -84,12 +84,15 @@ noteToOrg num note = do
|
||||||
|
|
||||||
-- | Escape special characters for Org.
|
-- | Escape special characters for Org.
|
||||||
escapeString :: Text -> Text
|
escapeString :: Text -> Text
|
||||||
escapeString = escapeStringUsing
|
escapeString t
|
||||||
[ ('\x2014',"---")
|
| T.all (\c -> c < '\x2013' || c > '\x2026') t = t
|
||||||
, ('\x2013',"--")
|
| otherwise = T.concatMap escChar t
|
||||||
, ('\x2019',"'")
|
where
|
||||||
, ('\x2026',"...")
|
escChar '\x2013' = "--"
|
||||||
]
|
escChar '\x2014' = "---"
|
||||||
|
escChar '\x2019' = "'"
|
||||||
|
escChar '\x2026' = "..."
|
||||||
|
escChar c = T.singleton c
|
||||||
|
|
||||||
isRawFormat :: Format -> Bool
|
isRawFormat :: Format -> Bool
|
||||||
isRawFormat f =
|
isRawFormat f =
|
||||||
|
|
|
@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
|
||||||
import Control.Monad.Except (catchError, throwError)
|
import Control.Monad.Except (catchError, throwError)
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import Data.Char (chr, isDigit, ord)
|
import Data.Char (chr, isDigit, ord, isAlphaNum)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
|
||||||
|
|
||||||
-- | Escape special characters.
|
-- | Escape special characters.
|
||||||
escapeSpecial :: Text -> Text
|
escapeSpecial :: Text -> Text
|
||||||
escapeSpecial = escapeStringUsing $
|
escapeSpecial t
|
||||||
[ ('\t',"\\tab ")
|
| T.all isAlphaNum t = t
|
||||||
, ('\8216',"\\u8216'")
|
| otherwise = T.concatMap escChar t
|
||||||
, ('\8217',"\\u8217'")
|
where
|
||||||
, ('\8220',"\\u8220\"")
|
escChar '\t' = "\\tab "
|
||||||
, ('\8221',"\\u8221\"")
|
escChar '\8216' = "\\u8216'"
|
||||||
, ('\8211',"\\u8211-")
|
escChar '\8217' = "\\u8217'"
|
||||||
, ('\8212',"\\u8212-")
|
escChar '\8220' = "\\u8220\""
|
||||||
] <> backslashEscapes "{\\}"
|
escChar '\8221' = "\\u8221\""
|
||||||
|
escChar '\8211' = "\\u8211-"
|
||||||
|
escChar '\8212' = "\\u8212-"
|
||||||
|
escChar '{' = "\\{"
|
||||||
|
escChar '}' = "\\}"
|
||||||
|
escChar '\\' = "\\\\"
|
||||||
|
escChar c = T.singleton c
|
||||||
|
|
||||||
-- | Escape strings as needed for rich text format.
|
-- | Escape strings as needed for rich text format.
|
||||||
stringToRTF :: Text -> Text
|
stringToRTF :: Text -> Text
|
||||||
|
|
|
@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
|
||||||
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
|
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError)
|
||||||
import Control.Monad.State.Strict
|
import Control.Monad.State.Strict
|
||||||
import Data.Char (chr, ord)
|
import Data.Char (chr, ord, isAlphaNum)
|
||||||
import Data.List (maximumBy, transpose, foldl')
|
import Data.List (maximumBy, transpose, foldl')
|
||||||
import Data.List.NonEmpty (nonEmpty)
|
import Data.List.NonEmpty (nonEmpty)
|
||||||
import Data.Ord (comparing)
|
import Data.Ord (comparing)
|
||||||
|
@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
|
||||||
|
|
||||||
-- | Escape things as needed for Texinfo.
|
-- | Escape things as needed for Texinfo.
|
||||||
stringToTexinfo :: Text -> Text
|
stringToTexinfo :: Text -> Text
|
||||||
stringToTexinfo = escapeStringUsing texinfoEscapes
|
stringToTexinfo t
|
||||||
where texinfoEscapes = [ ('{', "@{")
|
| T.all isAlphaNum t = t
|
||||||
, ('}', "@}")
|
| otherwise = T.concatMap escChar t
|
||||||
, ('@', "@@")
|
where escChar '{' = "@{"
|
||||||
, ('\160', "@ ")
|
escChar '}' = "@}"
|
||||||
, ('\x2014', "---")
|
escChar '@' = "@@"
|
||||||
, ('\x2013', "--")
|
escChar '\160' = "@ "
|
||||||
, ('\x2026', "@dots{}")
|
escChar '\x2014' = "---"
|
||||||
, ('\x2019', "'")
|
escChar '\x2013' = "--"
|
||||||
]
|
escChar '\x2026' = "@dots{}"
|
||||||
|
escChar '\x2019' = "'"
|
||||||
|
escChar c = T.singleton c
|
||||||
|
|
||||||
escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
|
escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
|
||||||
escapeCommas parser = do
|
escapeCommas parser = do
|
||||||
|
|
Loading…
Reference in a new issue