LaTeX writer: avoid {} after control sequences when escaping.

`\ldots{}.` doesn't behave as well as `\ldots.` with the latex
ellipsis package.  This patch causes pandoc to avoid emitting
the `{}` when it is not necessary.  Now `\ldots` and other
control sequences used in escaping will be followed by either
a `{}`, a space, or nothing, depending on context.

Thanks to Elliott Slaughter for the suggestion.
This commit is contained in:
John MacFarlane 2019-02-01 21:17:46 -08:00
parent b436087bc8
commit 633a9ecfec
4 changed files with 41 additions and 32 deletions

View file

@ -39,8 +39,8 @@ import Prelude
import Control.Applicative ((<|>))
import Control.Monad.State.Strict
import Data.Aeson (FromJSON, object, (.=))
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord,
toLower)
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace,
isPunctuation, ord, toLower)
import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy,
stripPrefix, (\\), uncons)
import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
@ -345,10 +345,20 @@ stringToLaTeX context zs = do
Just cmd -> ((cmd ++ "{" ++ [c] ++ "}") ++)
<$> go opts ctx (drop 1 xs) -- drop combining accent
Nothing -> (c:) <$> go opts ctx xs
let emitcseq cs = do
rest <- go opts ctx xs
case rest of
c:_ | isLetter c
, ctx == TextString
-> return (cs <> " " <> rest)
| isSpace c -> return (cs <> "{}" <> rest)
| ctx == TextString
-> return (cs <> rest)
_ -> return (cs <> "{}" <> rest)
case x of
'{' -> emits "\\{"
'}' -> emits "\\}"
'`' | ctx == CodeString -> emits "\\textasciigrave{}"
'`' | ctx == CodeString -> emitcseq "\\textasciigrave"
'$' | not isUrl -> emits "\\$"
'%' -> emits "\\%"
'&' -> emits "\\&"
@ -358,19 +368,19 @@ stringToLaTeX context zs = do
-- prevent adjacent hyphens from forming ligatures
('-':_) -> emits "-\\/"
_ -> emitc '-'
'~' | not isUrl -> emits "\\textasciitilde{}"
'~' | not isUrl -> emitcseq "\\textasciitilde"
'^' -> emits "\\^{}"
'\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows
| otherwise -> emits "\\textbackslash{}"
'|' | not isUrl -> emits "\\textbar{}"
'<' -> emits "\\textless{}"
'>' -> emits "\\textgreater{}"
| otherwise -> emitcseq "\\textbackslash"
'|' | not isUrl -> emitcseq "\\textbar"
'<' -> emitcseq "\\textless"
'>' -> emitcseq "\\textgreater"
'[' -> emits "{[}" -- to avoid interpretation as
']' -> emits "{]}" -- optional arguments
'\'' | ctx == CodeString -> emits "\\textquotesingle{}"
'\'' | ctx == CodeString -> emitcseq "\\textquotesingle"
'\160' -> emits "~"
'\x202F' -> emits "\\,"
'\x2026' -> emits "\\ldots{}"
'\x2026' -> emitcseq "\\ldots"
'\x2018' | ligatures -> emits "`"
'\x2019' | ligatures -> emits "'"
'\x201C' | ligatures -> emits "``"
@ -379,22 +389,22 @@ stringToLaTeX context zs = do
'\x2013' | ligatures -> emits "--"
_ | writerPreferAscii opts
-> case x of
'ı' -> emits "\\i "
'ȷ' -> emits "\\j "
'å' -> emits "\\aa "
'Å' -> emits "\\AA "
'ß' -> emits "\\ss "
'ø' -> emits "\\o "
'Ø' -> emits "\\O "
'Ł' -> emits "\\L "
'ł' -> emits "\\l "
'æ' -> emits "\\ae "
'Æ' -> emits "\\AE "
'œ' -> emits "\\oe "
'Œ' -> emits "\\OE "
'£' -> emits "\\pounds "
'€' -> emits "\\euro "
'©' -> emits "\\copyright "
'ı' -> emitcseq "\\i"
'ȷ' -> emitcseq "\\j"
'å' -> emitcseq "\\aa"
'Å' -> emitcseq "\\AA"
'ß' -> emitcseq "\\ss"
'ø' -> emitcseq "\\o"
'Ø' -> emitcseq "\\O"
'Ł' -> emitcseq "\\L"
'ł' -> emitcseq "\\l"
'æ' -> emitcseq "\\ae"
'Æ' -> emitcseq "\\AE"
'œ' -> emitcseq "\\oe"
'Œ' -> emitcseq "\\OE"
'£' -> emitcseq "\\pounds"
'€' -> emitcseq "\\euro"
'©' -> emitcseq "\\copyright"
_ -> emitc x
| otherwise -> emitc x

View file

@ -5,9 +5,8 @@ A&=&B,\\
C&=&D
\end{eqnarray}
^D
\textbackslash{}begin\{eqnarray\}
A\&=\&B,\textbackslash{}\textbackslash{} C\&=\&D
\textbackslash{}end\{eqnarray\}
\textbackslash begin\{eqnarray\} A\&=\&B,\textbackslash\textbackslash{}
C\&=\&D \textbackslash end\{eqnarray\}
```
```

View file

@ -9,7 +9,7 @@ pandoc -t html --ascii
pandoc -t latex --ascii
äéıå
^D
\"{a}\'{e}\i \r{a}
\"{a}\'{e}\i\r{a}
```
```

View file

@ -719,7 +719,7 @@ Subscripts: H\textsubscript{2}O, H\textsubscript{23}O,
H\textsubscript{many~of~them}O.
These should not be superscripts or subscripts, because of the unescaped
spaces: a\^{}b c\^{}d, a\textasciitilde{}b c\textasciitilde{}d.
spaces: a\^{}b c\^{}d, a\textasciitilde b c\textasciitilde d.
\begin{center}\rule{0.5\linewidth}{\linethickness}\end{center}
@ -741,7 +741,7 @@ Some dashes: one---two --- three---four --- five.
Dashes between numbers: 5--7, 255--66, 1987--1999.
Ellipses\ldots{}and\ldots{}and\ldots{}.
Ellipses\ldots and\ldots and\ldots.
\begin{center}\rule{0.5\linewidth}{\linethickness}\end{center}