From 87c66d4623200a48eab6e550f73372aaf3608289 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Fri, 1 Jan 2010 04:11:48 +0000
Subject: [PATCH] Finished converting HTML writer to use unicode instead of
 entities.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1767 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 src/Text/Pandoc/Writers/HTML.hs | 27 +++++++-------
 tests/writer.html               | 66 ++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 9a093ad96..c80a3b32f 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -60,9 +60,8 @@ renderFragment opts = if writerWrapText opts
                          then renderHtmlFragment
                          else showHtmlFragment
 
--- | Slightly modified version of Text.XHtml's stringToHtml.
--- Only uses numerical entities for 0xff and greater.
--- Adds &nbsp;.
+-- | Modified version of Text.XHtml's stringToHtml.
+-- Use unicode characters wherever possible.
 stringToHtml :: String -> Html
 stringToHtml = primHtml . concatMap fixChar
     where
@@ -248,7 +247,7 @@ obfuscateLink opts txt s =
                      linkText  ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++  
                      noscript (primHtml $ obfuscateString altText)
                 _ -> error $ "Unknown obfuscation method: " ++ show meth
-        _ -> anchor ! [href s] $ primHtml txt  -- malformed email
+        _ -> anchor ! [href s] $ stringToHtml txt  -- malformed email
 
 -- | Obfuscate character as entity.
 obfuscateChar :: Char -> String
@@ -410,11 +409,11 @@ inlineToHtml opts inline =
   case inline of  
     (Str str)        -> return $ stringToHtml str
     (Space)          -> return $ stringToHtml " "
-    (LineBreak)      -> return $ br
-    (EmDash)         -> return $ primHtmlChar "mdash"
-    (EnDash)         -> return $ primHtmlChar "ndash"
-    (Ellipses)       -> return $ primHtmlChar "hellip"
-    (Apostrophe)     -> return $ primHtmlChar "rsquo"
+    (LineBreak)      -> return br
+    (EmDash)         -> return $ stringToHtml "—"
+    (EnDash)         -> return $ stringToHtml "–"
+    (Ellipses)       -> return $ stringToHtml "…"
+    (Apostrophe)     -> return $ stringToHtml "’"
     (Emph lst)       -> inlineListToHtml opts lst >>= return . emphasize
     (Strong lst)     -> inlineListToHtml opts lst >>= return . strong
     (Code str)       -> return $ thecode << str
@@ -426,10 +425,10 @@ inlineToHtml opts inline =
     (Subscript lst)   -> inlineListToHtml opts lst >>= return . sub
     (Quoted quoteType lst) ->
                         let (leftQuote, rightQuote) = case quoteType of
-                              SingleQuote -> (primHtmlChar "lsquo", 
-                                              primHtmlChar "rsquo")
-                              DoubleQuote -> (primHtmlChar "ldquo", 
-                                              primHtmlChar "rdquo")
+                              SingleQuote -> (stringToHtml "‘",
+                                              stringToHtml "’")
+                              DoubleQuote -> (stringToHtml "“",
+                                              stringToHtml "”")
                         in  do contents <- inlineListToHtml opts lst
                                return $ leftQuote +++ contents +++ rightQuote
     (Math t str) -> 
@@ -502,7 +501,7 @@ blockListToNote opts ref blocks =
   -- that block. Otherwise, insert a new Plain block with the backlink.
   let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ 
                  "\" class=\"footnoteBackLink\"" ++
-                 " title=\"Jump back to footnote " ++ ref ++ "\">&#8617;</a>"]
+                 " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"]
       blocks'  = if null blocks
                     then []
                     else let lastBlock   = last blocks
diff --git a/tests/writer.html b/tests/writer.html
index c67cfbd42..cd5a687e0 100644
--- a/tests/writer.html
+++ b/tests/writer.html
@@ -11,7 +11,7 @@
 <body>
 <h1 class="title">Pandoc Test Suite</h1>
 <p
->This is a set of tests for pandoc. Most of them are adapted from John Gruber&rsquo;s markdown test suite.</p
+>This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.</p
 ><hr
  /><div id="headers"
 ><h1
@@ -64,11 +64,11 @@
 ><h1
   >Paragraphs</h1
   ><p
-  >Here&rsquo;s a regular paragraph.</p
+  >Here’s a regular paragraph.</p
   ><p
   >In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.</p
   ><p
-  >Here&rsquo;s one with a bullet. * criminey.</p
+  >Here’s one with a bullet. * criminey.</p
   ><p
   >There should be a hard line break<br
      />here.</p
@@ -292,7 +292,7 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
       ><p
 	>Item 1, graf one.</p
 	><p
-	>Item 1. graf two. The quick brown fox jumped over the lazy dog&rsquo;s back.</p
+	>Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.</p
 	></li
       ><li
       ><p
@@ -320,7 +320,7 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
 	></li
       ></ul
     ><p
-    >Here&rsquo;s another:</p
+    >Here’s another:</p
     ><ol style="list-style-type: decimal;"
     ><li
       >First</li
@@ -634,7 +634,7 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
 
 <script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>
 <p
-  >Here&rsquo;s a simple block:</p
+  >Here’s a simple block:</p
   ><div>
     foo</div>
 <p
@@ -692,7 +692,7 @@ Blah
 </code
     ></pre
   ><p
-  >Hr&rsquo;s:</p
+  >Hr’s:</p
   ><hr>
 
 <hr />
@@ -801,25 +801,25 @@ Blah
 ><h1
   >Smart quotes, ellipses, dashes</h1
   ><p
-  >&ldquo;Hello,&rdquo; said the spider. &ldquo;&lsquo;Shelob&rsquo; is my name.&rdquo;</p
+  >“Hello,” said the spider. “‘Shelob’ is my name.”</p
   ><p
-  >&lsquo;A&rsquo;, &lsquo;B&rsquo;, and &lsquo;C&rsquo; are letters.</p
+  >‘A’, ‘B’, and ‘C’ are letters.</p
   ><p
-  >&lsquo;Oak,&rsquo; &lsquo;elm,&rsquo; and &lsquo;beech&rsquo; are names of trees. So is &lsquo;pine.&rsquo;</p
+  >‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’</p
   ><p
-  >&lsquo;He said, &ldquo;I want to go.&rdquo;&rsquo; Were you alive in the 70&rsquo;s?</p
+  >‘He said, “I want to go.”’ Were you alive in the 70’s?</p
   ><p
-  >Here is some quoted &lsquo;<code
+  >Here is some quoted ‘<code
     >code</code
-    >&rsquo; and a &ldquo;<a href="http://example.com/?foo=1&amp;bar=2"
+    >’ and a “<a href="http://example.com/?foo=1&amp;bar=2"
     >quoted link</a
-    >&rdquo;.</p
+    >”.</p
   ><p
-  >Some dashes: one&mdash;two &mdash; three&mdash;four &mdash; five.</p
+  >Some dashes: one—two — three—four — five.</p
   ><p
-  >Dashes between numbers: 5&ndash;7, 255&ndash;66, 1987&ndash;1999.</p
+  >Dashes between numbers: 5–7, 255–66, 1987–1999.</p
   ><p
-  >Ellipses&hellip;and&hellip;and&hellip;.</p
+  >Ellipses…and…and….</p
   ><hr
    /></div
 ><div id="latex"
@@ -855,7 +855,7 @@ Blah
 	></span
       >-Tree</li
     ><li
-    >Here&rsquo;s some display math: <span class="math"
+    >Here’s some display math: <span class="math"
       >\frac{<em
 	>d</em
 	>}{<em
@@ -883,7 +883,7 @@ Blah
 	>}</span
       ></li
     ><li
-    >Here&rsquo;s one that has a line break in it: <span class="math"
+    >Here’s one that has a line break in it: <span class="math"
       >α+ω × <em
 	>x</em
 	><sup
@@ -892,7 +892,7 @@ Blah
       >.</li
     ></ul
   ><p
-  >These shouldn&rsquo;t be math:</p
+  >These shouldn’t be math:</p
   ><ul
   ><li
     >To get the famous equation, write <code
@@ -901,7 +901,7 @@ Blah
     ><li
     >$22,000 is a <em
       >lot</em
-      > of money. So is $34,000. (It worked if &ldquo;lot&rdquo; is emphasized.)</li
+      > of money. So is $34,000. (It worked if “lot” is emphasized.)</li
     ><li
     >Shoes ($20) and socks ($5).</li
     ><li
@@ -912,7 +912,7 @@ Blah
       > 23$.</li
     ></ul
   ><p
-  >Here&rsquo;s a LaTeX table:</p
+  >Here’s a LaTeX table:</p
   ><p
   ></p
   ><hr
@@ -1083,19 +1083,19 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'Email link'+'<\/'+'a'+'>')
   ><h2
     >With ampersands</h2
     ><p
-    >Here&rsquo;s a <a href="http://example.com/?foo=1&amp;bar=2"
+    >Here’s a <a href="http://example.com/?foo=1&amp;bar=2"
       >link with an ampersand in the URL</a
       >.</p
     ><p
-    >Here&rsquo;s a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T"
+    >Here’s a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T"
       >AT&amp;T</a
       >.</p
     ><p
-    >Here&rsquo;s an <a href="/script?foo=1&amp;bar=2"
+    >Here’s an <a href="/script?foo=1&amp;bar=2"
       >inline link</a
       >.</p
     ><p
-    >Here&rsquo;s an <a href="/script?foo=1&amp;bar=2"
+    >Here’s an <a href="/script?foo=1&amp;bar=2"
       >inline link in pointy braces</a
       >.</p
     ></div
@@ -1155,7 +1155,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+
 ><h1
   >Images</h1
   ><p
-  >From &ldquo;Voyage dans la Lune&rdquo; by Georges Melies (1902):</p
+  >From “Voyage dans la Lune” by Georges Melies (1902):</p
   ><p
   ><img src="lalune.jpg" title="Voyage dans la Lune" alt="lalune"
      /></p
@@ -1207,11 +1207,11 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+
    /><ol
   ><li id="fn1"
     ><p
-      >Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. <a href="#fnref1" class="footnoteBackLink" title="Jump back to footnote 1">&#8617;</a></p
+      >Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. <a href="#fnref1" class="footnoteBackLink" title="Jump back to footnote 1">↩</a></p
       ></li
     ><li id="fn2"
     ><p
-      >Here&rsquo;s the long note. This one contains multiple blocks.</p
+      >Here’s the long note. This one contains multiple blocks.</p
       ><p
       >Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p
       ><pre
@@ -1220,7 +1220,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+
 </code
 	></pre
       ><p
-      >If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. <a href="#fnref2" class="footnoteBackLink" title="Jump back to footnote 2">&#8617;</a></p
+      >If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. <a href="#fnref2" class="footnoteBackLink" title="Jump back to footnote 2">↩</a></p
       ></li
     ><li id="fn3"
     ><p
@@ -1230,15 +1230,15 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+
 	>links</a
 	> and <code
 	>]</code
-	> verbatim characters, as well as [bracketed text]. <a href="#fnref3" class="footnoteBackLink" title="Jump back to footnote 3">&#8617;</a></p
+	> verbatim characters, as well as [bracketed text]. <a href="#fnref3" class="footnoteBackLink" title="Jump back to footnote 3">↩</a></p
       ></li
     ><li id="fn4"
     ><p
-      >In quote. <a href="#fnref4" class="footnoteBackLink" title="Jump back to footnote 4">&#8617;</a></p
+      >In quote. <a href="#fnref4" class="footnoteBackLink" title="Jump back to footnote 4">↩</a></p
       ></li
     ><li id="fn5"
     ><p
-      >In list. <a href="#fnref5" class="footnoteBackLink" title="Jump back to footnote 5">&#8617;</a></p
+      >In list. <a href="#fnref5" class="footnoteBackLink" title="Jump back to footnote 5">↩</a></p
       ></li
     ></ol
   ></div