XML: toHtml5Entities: prefer shorter entities...

when there are several choices for a particular character.
This commit is contained in:
John MacFarlane 2018-11-04 22:15:53 -08:00
parent 0c84630549
commit 511d647290
2 changed files with 12 additions and 5 deletions

View file

@ -114,9 +114,16 @@ toHtml5Entities = T.concatMap go
Nothing -> T.pack ("&#" ++ show (ord c) ++ ";")
html5EntityMap :: M.Map Char Text
html5EntityMap = M.fromList [(c, T.takeWhile (/=';') (T.pack ent))
| (ent@(_:_), [c]) <- htmlEntities
, last ent == ';']
html5EntityMap = foldr go mempty htmlEntities
where go (ent, s) entmap =
case s of
[c] -> M.insertWith
(\new old -> if T.length new > T.length old
then old
else new) c ent' entmap
where ent' = T.takeWhile (/=';') (T.pack ent)
_ -> entmap
-- Unescapes XML entities
fromEntities :: String -> String

View file

@ -2,7 +2,7 @@
pandoc -t html --ascii
äéıå
^D
<p>&auml;&eacute;&inodot;&aring;</p>
<p>&auml;&eacute;&imath;&aring;</p>
```
```
@ -48,6 +48,6 @@ pandoc -t jats --ascii
pandoc -t markdown-smart --ascii
"äéıå"
^D
&ldquo;&auml;&eacute;&inodot;&aring;&rdquo;
&ldquo;&auml;&eacute;&imath;&aring;&rdquo;
```