From e2c157f86f985e4ab5c702fff87b647f4ae842c8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 5 Feb 2012 22:52:00 -0800
Subject: [PATCH] Removed module Text.Pandoc.CharacterReferences.

Moved characterReference parser to Text.Pandoc.Parsing.
decodeCharacterReferences is now replaced by fromEntities
in Text.Pandoc.XML.
---
 changelog                              |  5 ++
 pandoc.cabal                           |  1 -
 src/Text/Pandoc/CharacterReferences.hs | 72 --------------------------
 src/Text/Pandoc/Parsing.hs             | 12 ++++-
 src/Text/Pandoc/Readers/Markdown.hs    |  6 +--
 src/Text/Pandoc/Writers/HTML.hs        |  5 +-
 6 files changed, 21 insertions(+), 80 deletions(-)
 delete mode 100644 src/Text/Pandoc/CharacterReferences.hs

diff --git a/changelog b/changelog
index 68d977f4d..e1324731c 100644
--- a/changelog
+++ b/changelog
@@ -418,6 +418,11 @@ pandoc (1.9)
 
   * Removed `Text.Pandoc.S5`, which is no longer needed.
 
+  * Removed `Text.Pandoc.CharacterReferences`.  Moved
+    `characterReference` to `Text.Pandoc.Parsing`.
+    `decodeCharacterReferences` is replaced by `fromEntities`
+    in `Text.Pandoc.XML`.
+
   * Added `Text.Pandoc.ImageSize`.  This is intened for use
     in `docx` and `odt` writers, so the size and dpi of images
     can be calculated.
diff --git a/pandoc.cabal b/pandoc.cabal
index 46d09a75d..258c248e7 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -233,7 +233,6 @@ Library
 
   Exposed-Modules: Text.Pandoc,
                    Text.Pandoc.Pretty,
-                   Text.Pandoc.CharacterReferences,
                    Text.Pandoc.Shared,
                    Text.Pandoc.Parsing,
                    Text.Pandoc.Highlighting,
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
deleted file mode 100644
index 8157d94d3..000000000
--- a/src/Text/Pandoc/CharacterReferences.hs
+++ /dev/null
@@ -1,72 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.CharacterReferences
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing character references.
--}
-module Text.Pandoc.CharacterReferences (
-                     characterReference,
-                     decodeCharacterReferences,
-                    ) where
-import Text.ParserCombinators.Parsec
-import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity )
-import Data.Maybe ( fromMaybe )
-
--- | Parse character entity.
-characterReference :: GenParser Char st Char
-characterReference = try $ do
-  char '&'
-  character <- numRef <|> entity
-  char ';'
-  return character  
-
-numRef :: GenParser Char st Char
-numRef = do
-  char '#'
-  num <- hexNum <|> decNum
-  return $ fromMaybe '?' $ lookupNumericEntity num
-
-hexNum :: GenParser Char st [Char]
-hexNum = do
-  x <- oneOf "Xx"
-  num <- many1 hexDigit
-  return (x:num)
-
-decNum :: GenParser Char st [Char]
-decNum = many1 digit
-
-entity :: GenParser Char st Char
-entity = do
-  body <- many1 alphaNum
-  return $ fromMaybe '?' $ lookupNamedEntity body
-
--- | Convert entities in a string to characters.
-decodeCharacterReferences :: String -> String
-decodeCharacterReferences str = 
-  case parse (many (characterReference <|> anyChar)) str str of
-	Left err        -> error $ "\nError: " ++ show err
-	Right result    -> result
-
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index bb0ac18cf..08769a4f4 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -51,6 +51,7 @@ module Text.Pandoc.Parsing ( (>>~),
                              failIfStrict,
                              failUnlessLHS,
                              escaped,
+                             characterReference,
                              anyOrderedListMarker,
                              orderedListMarker,
                              charRef,
@@ -78,7 +79,6 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.ParserCombinators.Parsec
-import Text.Pandoc.CharacterReferences ( characterReference )
 import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
 import Data.List ( intercalate, transpose )
 import Network.URI ( parseURI, URI (..), isAllowedInURI )
@@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard )
 import Text.Pandoc.Shared
 import qualified Data.Map as M
 import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
+import Text.HTML.TagSoup.Entity ( lookupEntity )
 
 -- | Like >>, but returns the operation on the left.
 -- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -337,6 +338,15 @@ escaped :: GenParser Char st Char  -- ^ Parser for character to escape
         -> GenParser Char st Char
 escaped parser = try $ char '\\' >> parser
 
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+  char '&'
+  ent <- manyTill nonspaceChar (char ';')
+  case lookupEntity ent of
+       Just c  -> return c
+       Nothing -> return '?'
+
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
 upperRoman :: GenParser Char st (ListNumberStyle, Int)
 upperRoman = do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8da0f7c16..607d0971a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -42,7 +42,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
                                   isTextTag, isCommentTag )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.XML ( fromEntities )
 import Text.ParserCombinators.Parsec
 import Control.Monad (when, liftM, guard, mzero)
 import Text.HTML.TagSoup
@@ -244,7 +244,7 @@ referenceTitle = try $ do
         <|> do delim <- char '\'' <|> char '"'
                manyTill litChar (try (char delim >> skipSpaces >>
                                       notFollowedBy (noneOf ")\n")))
-  return $ decodeCharacterReferences tit
+  return $ fromEntities tit
 
 noteMarker :: GenParser Char ParserState [Char]
 noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@@ -1176,7 +1176,7 @@ linkTitle = try $ do
   skipSpaces
   delim <- oneOf "'\""
   tit <-   manyTill litChar (try (char delim >> skipSpaces >> eof))
-  return $ decodeCharacterReferences tit
+  return $ fromEntities tit
 
 link :: GenParser Char ParserState Inline
 link = try $ do
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index f46d08570..f35b29370 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -31,7 +31,6 @@ Conversion of 'Pandoc' documents to HTML.
 -}
 module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
 import Text.Pandoc.Definition
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Text.Pandoc.Shared
 import Text.Pandoc.Templates
 import Text.Pandoc.Generic
@@ -39,7 +38,7 @@ import Text.Pandoc.Readers.TeXMath
 import Text.Pandoc.Slides
 import Text.Pandoc.Highlighting ( highlight, styleToCss,
                                   formatHtmlInline, formatHtmlBlock )
-import Text.Pandoc.XML (stripTags, escapeStringForXML)
+import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities)
 import Network.HTTP ( urlEncode )
 import Numeric ( showHex )
 import Data.Char ( ord, toLower )
@@ -344,7 +343,7 @@ obfuscateChar char =
 
 -- | Obfuscate string using entities.
 obfuscateString :: String -> String
-obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+obfuscateString = concatMap obfuscateChar . fromEntities
 
 attrsToHtml :: WriterOptions -> Attr -> [Attribute]
 attrsToHtml opts (id',classes',keyvals) =