From 12a5bd3c8d34eddbabee0dc54fd7ce6d9539c9d4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 8 Jan 2016 17:08:01 -0800
Subject: [PATCH] Entity handling fixes:

- Text.Pandoc.XML.fromEntities:  handle entities without a
  semicolon. Always lookup character references with the
  trailing ';', even if it wasn't present.  And never add
  it when looking up numerical entities.  (This is what
  tagsoup seems to require.)
- Text.Pandoc.Parsing.characterReference:  Always lookup
  character references with the trailing ';', and leave off
  the ';' when looking up numerical entities.

This fixes a regression for e.g. `&lang;`.
---
 src/Text/Pandoc/Parsing.hs | 5 ++++-
 src/Text/Pandoc/XML.hs     | 8 ++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 85786eb3e..db891d5d4 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -573,7 +573,10 @@ characterReference :: Stream s m Char => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
-  case lookupEntity ent of
+  let ent' = case ent of
+                  '#':_  -> ent
+                  _      -> ent ++ ";"
+  case lookupEntity ent' of
        Just c  -> return c
        Nothing -> fail "entity not found"
 
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index caa13f177..1e01b62f2 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -100,11 +100,15 @@ toEntities (c:cs)
 -- Unescapes XML entities
 fromEntities :: String -> String
 fromEntities ('&':xs) =
-  case lookupEntity ent of
+  case lookupEntity ent' of
         Just c  -> c : fromEntities rest
         Nothing -> '&' : fromEntities xs
     where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
                              (zs,';':ys) -> (zs,ys)
-                             _           -> ("",xs)
+                             (zs,    ys) -> (zs,ys)
+          ent' = case ent of
+                      '#':_ -> ent
+                      _     -> ent ++ ";"
+
 fromEntities (x:xs) = x : fromEntities xs
 fromEntities [] = []