Entity handling fixes:
- Text.Pandoc.XML.fromEntities: handle entities without a semicolon. Always lookup character references with the trailing ';', even if it wasn't present. And never add it when looking up numerical entities. (This is what tagsoup seems to require.) - Text.Pandoc.Parsing.characterReference: Always lookup character references with the trailing ';', and leave off the ';' when looking up numerical entities. This fixes a regression for e.g. `⟨`.
This commit is contained in:
parent
52d95ddde1
commit
12a5bd3c8d
2 changed files with 10 additions and 3 deletions
|
@ -573,7 +573,10 @@ characterReference :: Stream s m Char => ParserT s st m Char
|
|||
characterReference = try $ do
|
||||
char '&'
|
||||
ent <- many1Till nonspaceChar (char ';')
|
||||
case lookupEntity ent of
|
||||
let ent' = case ent of
|
||||
'#':_ -> ent
|
||||
_ -> ent ++ ";"
|
||||
case lookupEntity ent' of
|
||||
Just c -> return c
|
||||
Nothing -> fail "entity not found"
|
||||
|
||||
|
|
|
@ -100,11 +100,15 @@ toEntities (c:cs)
|
|||
-- Unescapes XML entities
|
||||
fromEntities :: String -> String
|
||||
fromEntities ('&':xs) =
|
||||
case lookupEntity ent of
|
||||
case lookupEntity ent' of
|
||||
Just c -> c : fromEntities rest
|
||||
Nothing -> '&' : fromEntities xs
|
||||
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
|
||||
(zs,';':ys) -> (zs,ys)
|
||||
_ -> ("",xs)
|
||||
(zs, ys) -> (zs,ys)
|
||||
ent' = case ent of
|
||||
'#':_ -> ent
|
||||
_ -> ent ++ ";"
|
||||
|
||||
fromEntities (x:xs) = x : fromEntities xs
|
||||
fromEntities [] = []
|
||||
|
|
Loading…
Add table
Reference in a new issue