Groff writers escaping changes.

- Improve escaping of accented characters with `--ascii`.
  Combining accents are now handled properly.
- Don't escape spaces and tabs in code blocks. This doesn't
  seem to be necessary.
This commit is contained in:
John MacFarlane 2018-10-22 23:47:14 -07:00
parent 8b204dda77
commit 261cbad0ef
3 changed files with 56 additions and 54 deletions

View file

@ -43,12 +43,13 @@ import Data.Char (ord, isAscii)
import Control.Monad.State.Strict
import Data.List (intercalate)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
import Data.Maybe (fromMaybe, isJust, catMaybes)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Pretty
import Text.Printf (printf)
import Text.Pandoc.GroffChar (essentialEscapes, characterCodes)
import Text.Pandoc.GroffChar (essentialEscapes, characterCodes,
combiningAccents)
data WriterState = WriterState { stHasInlineMath :: Bool
, stFirstPara :: Bool
@ -79,32 +80,33 @@ type Note = [Block]
type MS = StateT WriterState
escapeChar :: Bool -> Char -> String
escapeChar useAscii c =
case Map.lookup c essentialEscapes of
Just s -> s
Nothing
| useAscii
, not (isAscii c) ->
case Map.lookup c characterCodeMap of
Just t -> "\\[" <> t <> "]"
Nothing -> printf "\\[u%04X]" (ord c)
| otherwise -> [c]
combiningAccentsMap :: Map.Map Char String
combiningAccentsMap = Map.fromList combiningAccents
-- | Escape special characters for groff.
escapeString :: Bool -> String -> String
escapeString useAscii = concatMap (escapeChar useAscii)
escapeString _ [] = []
escapeString useAscii (x:xs) =
case Map.lookup x essentialEscapes of
Just s -> s ++ escapeString useAscii xs
Nothing
| isAscii x || not useAscii -> x : escapeString useAscii xs
| otherwise ->
let accents = catMaybes $ takeWhile isJust
(map (\c -> Map.lookup c combiningAccentsMap) xs)
rest = drop (length accents) xs
s = case Map.lookup x characterCodeMap of
Just t -> "\\[" <> unwords (t:accents) <> "]"
Nothing -> "\\[" <> unwords
(printf "u%04X" (ord x) : accents) <> "]"
in s ++ escapeString useAscii rest
-- | Escape a literal (code) section for groff.
escapeCode :: Bool -> String -> String
escapeCode useAScii = intercalate "\n" . map escapeLine . lines
where escapeCodeChar ' ' = "\\ "
escapeCodeChar '\t' = "\\\t"
escapeCodeChar c = escapeChar useAScii c
escapeLine codeline =
case concatMap escapeCodeChar codeline of
a@('.':_) -> "\\&" ++ a
b -> b
escapeCode useAscii = intercalate "\n" . map escapeLine . lines
where escapeLine xs = case xs of
('.':_) -> "\\%" ++ escapeString useAscii xs
_ -> escapeString useAscii xs
characterCodeMap :: Map.Map Char String
characterCodeMap = Map.fromList characterCodes

View file

@ -54,8 +54,8 @@ Code in a block quote:
.IP
.nf
\f[C]
sub\ status\ {
\ \ \ \ print\ \[dq]working\[dq];
sub status {
print \[dq]working\[dq];
}
\f[R]
.fi
@ -88,13 +88,13 @@ Code:
.IP
.nf
\f[C]
\-\-\-\-\ (should\ be\ four\ hyphens)
\-\-\-\- (should be four hyphens)
sub\ status\ {
\ \ \ \ print\ \[dq]working\[dq];
sub status {
print \[dq]working\[dq];
}
this\ code\ block\ is\ indented\ by\ one\ tab
this code block is indented by one tab
\f[R]
.fi
.PP
@ -102,9 +102,9 @@ And:
.IP
.nf
\f[C]
\ \ \ \ this\ code\ block\ is\ indented\ by\ two\ tabs
this code block is indented by two tabs
These\ should\ not\ be\ escaped:\ \ \[rs]$\ \[rs]\[rs]\ \[rs]>\ \[rs][\ \[rs]{
These should not be escaped: \[rs]$ \[rs]\[rs] \[rs]> \[rs][ \[rs]{
\f[R]
.fi
.PP
@ -364,7 +364,7 @@ orange fruit
.IP
.nf
\f[C]
{\ orange\ code\ block\ }
{ orange code block }
\f[R]
.fi
.RS
@ -430,7 +430,7 @@ This should be a code block, though:
.nf
\f[C]
<div>
\ \ \ \ foo
foo
</div>
\f[R]
.fi
@ -454,7 +454,7 @@ Code block:
.IP
.nf
\f[C]
<!\-\-\ Comment\ \-\->
<!\-\- Comment \-\->
\f[R]
.fi
.PP
@ -464,7 +464,7 @@ Code:
.IP
.nf
\f[C]
<hr\ />
<hr />
\f[R]
.fi
.PP
@ -545,7 +545,7 @@ Here\[cq]s one that has a line break in it:
.PP
These shouldn\[cq]t be math:
.IP \[bu] 2
To get the famous equation, write \f[C]$e\ =\ mc\[ha]2$\f[R].
To get the famous equation, write \f[C]$e = mc\[ha]2$\f[R].
.IP \[bu] 2
$22,000 is a \f[I]lot\f[R] of money.
So is $34,000.
@ -653,7 +653,7 @@ This should [not][] be a link.
.IP
.nf
\f[C]
[not]:\ /url
[not]: /url
\f[R]
.fi
.PP
@ -690,7 +690,7 @@ Auto\-links should not occur here: \f[C]<http://example.com/>\f[R]
.IP
.nf
\f[C]
or\ here:\ <http://example.com/>
or here: <http://example.com/>
\f[R]
.fi
.PP
@ -733,7 +733,7 @@ with list items).
.IP
.nf
\f[C]
\ \ {\ <code>\ }
{ <code> }
\f[R]
.fi
.PP

View file

@ -158,8 +158,8 @@ Code in a block quote:
.IP
.nf
\f[C]
sub\ status\ {
\ \ \ \ print\ \[dq]working\[dq];
sub status {
print \[dq]working\[dq];
}
\f[]
.fi
@ -194,13 +194,13 @@ Code:
.IP
.nf
\f[C]
\-\-\-\-\ (should\ be\ four\ hyphens)
\-\-\-\- (should be four hyphens)
sub\ status\ {
\ \ \ \ print\ \[dq]working\[dq];
sub status {
print \[dq]working\[dq];
}
this\ code\ block\ is\ indented\ by\ one\ tab
this code block is indented by one tab
\f[]
.fi
.LP
@ -208,9 +208,9 @@ And:
.IP
.nf
\f[C]
\ \ \ \ this\ code\ block\ is\ indented\ by\ two\ tabs
this code block is indented by two tabs
These\ should\ not\ be\ escaped:\ \ \[rs]$\ \[rs]\[rs]\ \[rs]>\ \[rs][\ \[rs]{
These should not be escaped: \[rs]$ \[rs]\[rs] \[rs]> \[rs][ \[rs]{
\f[]
.fi
.HLINE
@ -496,7 +496,7 @@ orange fruit
.IP
.nf
\f[C]
{\ orange\ code\ block\ }
{ orange code block }
\f[]
.fi
.RS
@ -579,7 +579,7 @@ This should be a code block, though:
.nf
\f[C]
<div>
\ \ \ \ foo
foo
</div>
\f[]
.fi
@ -603,7 +603,7 @@ Code block:
.IP
.nf
\f[C]
<!\-\-\ Comment\ \-\->
<!\-\- Comment \-\->
\f[]
.fi
.LP
@ -613,7 +613,7 @@ Code:
.IP
.nf
\f[C]
<hr\ />
<hr />
\f[]
.fi
.LP
@ -704,7 +704,7 @@ Heres one that has a line break in it: @alpha + omega times x sup 2@.
.LP
These shouldnt be math:
.IP \[bu] 3
To get the famous equation, write \f[C]$e\ =\ mc\[ha]2$\f[R].
To get the famous equation, write \f[C]$e = mc\[ha]2$\f[R].
.IP \[bu] 3
$22,000 is a \f[I]lot\f[R] of money.
So is $34,000.
@ -867,7 +867,7 @@ This should [not][] be a link.
.IP
.nf
\f[C]
[not]:\ /url
[not]: /url
\f[]
.fi
.LP
@ -939,7 +939,7 @@ Auto\-links should not occur here: \f[C]<http://example.com/>\f[R]
.IP
.nf
\f[C]
or\ here:\ <http://example.com/>
or here: <http://example.com/>
\f[]
.fi
.HLINE
@ -975,7 +975,7 @@ with list items).
.IP
.nf
\f[C]
\ \ {\ <code>\ }
{ <code> }
\f[]
.fi
.LP