XML: toHtml5Entities: prefer shorter entities...
when there are several choices for a particular character.
This commit is contained in:
parent
0c84630549
commit
511d647290
2 changed files with 12 additions and 5 deletions
|
@ -114,9 +114,16 @@ toHtml5Entities = T.concatMap go
|
|||
Nothing -> T.pack ("&#" ++ show (ord c) ++ ";")
|
||||
|
||||
html5EntityMap :: M.Map Char Text
|
||||
html5EntityMap = M.fromList [(c, T.takeWhile (/=';') (T.pack ent))
|
||||
| (ent@(_:_), [c]) <- htmlEntities
|
||||
, last ent == ';']
|
||||
html5EntityMap = foldr go mempty htmlEntities
|
||||
where go (ent, s) entmap =
|
||||
case s of
|
||||
[c] -> M.insertWith
|
||||
(\new old -> if T.length new > T.length old
|
||||
then old
|
||||
else new) c ent' entmap
|
||||
where ent' = T.takeWhile (/=';') (T.pack ent)
|
||||
_ -> entmap
|
||||
|
||||
|
||||
-- Unescapes XML entities
|
||||
fromEntities :: String -> String
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
pandoc -t html --ascii
|
||||
äéıå
|
||||
^D
|
||||
<p>äéıå</p>
|
||||
<p>äéıå</p>
|
||||
```
|
||||
|
||||
```
|
||||
|
@ -48,6 +48,6 @@ pandoc -t jats --ascii
|
|||
pandoc -t markdown-smart --ascii
|
||||
"äéıå"
|
||||
^D
|
||||
“äéıå”
|
||||
“äéıå”
|
||||
```
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue