From 87c66d4623200a48eab6e550f73372aaf3608289 Mon Sep 17 00:00:00 2001 From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> Date: Fri, 1 Jan 2010 04:11:48 +0000 Subject: [PATCH] Finished converting HTML writer to use unicode instead of entities. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1767 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Writers/HTML.hs | 27 +++++++------- tests/writer.html | 66 ++++++++++++++++----------------- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9a093ad96..c80a3b32f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -60,9 +60,8 @@ renderFragment opts = if writerWrapText opts then renderHtmlFragment else showHtmlFragment --- | Slightly modified version of Text.XHtml's stringToHtml. --- Only uses numerical entities for 0xff and greater. --- Adds . +-- | Modified version of Text.XHtml's stringToHtml. +-- Use unicode characters wherever possible. stringToHtml :: String -> Html stringToHtml = primHtml . concatMap fixChar where @@ -248,7 +247,7 @@ obfuscateLink opts txt s = linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> anchor ! [href s] $ primHtml txt -- malformed email + _ -> anchor ! [href s] $ stringToHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -410,11 +409,11 @@ inlineToHtml opts inline = case inline of (Str str) -> return $ stringToHtml str (Space) -> return $ stringToHtml " " - (LineBreak) -> return $ br - (EmDash) -> return $ primHtmlChar "mdash" - (EnDash) -> return $ primHtmlChar "ndash" - (Ellipses) -> return $ primHtmlChar "hellip" - (Apostrophe) -> return $ primHtmlChar "rsquo" + (LineBreak) -> return br + (EmDash) -> return $ stringToHtml "—" + (EnDash) -> return $ stringToHtml "–" + (Ellipses) -> return $ stringToHtml "…" + (Apostrophe) -> return $ stringToHtml "’" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code str) -> return $ thecode << str @@ -426,10 +425,10 @@ inlineToHtml opts inline = (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (primHtmlChar "lsquo", - primHtmlChar "rsquo") - DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") + SingleQuote -> (stringToHtml "‘", + stringToHtml "’") + DoubleQuote -> (stringToHtml "“", + stringToHtml "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote (Math t str) -> @@ -502,7 +501,7 @@ blockListToNote opts ref blocks = -- that block. Otherwise, insert a new Plain block with the backlink. let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ "\" class=\"footnoteBackLink\"" ++ - " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] + " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] blocks' = if null blocks then [] else let lastBlock = last blocks diff --git a/tests/writer.html b/tests/writer.html index c67cfbd42..cd5a687e0 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -11,7 +11,7 @@ <body> <h1 class="title">Pandoc Test Suite</h1> <p ->This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.</p +>This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.</p ><hr /><div id="headers" ><h1 @@ -64,11 +64,11 @@ ><h1 >Paragraphs</h1 ><p - >Here’s a regular paragraph.</p + >Here’s a regular paragraph.</p ><p >In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.</p ><p - >Here’s one with a bullet. * criminey.</p + >Here’s one with a bullet. * criminey.</p ><p >There should be a hard line break<br />here.</p @@ -292,7 +292,7 @@ These should not be escaped: \$ \\ \> \[ \{ ><p >Item 1, graf one.</p ><p - >Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.</p + >Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.</p ></li ><li ><p @@ -320,7 +320,7 @@ These should not be escaped: \$ \\ \> \[ \{ ></li ></ul ><p - >Here’s another:</p + >Here’s another:</p ><ol style="list-style-type: decimal;" ><li >First</li @@ -634,7 +634,7 @@ These should not be escaped: \$ \\ \> \[ \{ <script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> <p - >Here’s a simple block:</p + >Here’s a simple block:</p ><div> foo</div> <p @@ -692,7 +692,7 @@ Blah </code ></pre ><p - >Hr’s:</p + >Hr’s:</p ><hr> <hr /> @@ -801,25 +801,25 @@ Blah ><h1 >Smart quotes, ellipses, dashes</h1 ><p - >“Hello,” said the spider. “‘Shelob’ is my name.”</p + >“Hello,” said the spider. “‘Shelob’ is my name.”</p ><p - >‘A’, ‘B’, and ‘C’ are letters.</p + >‘A’, ‘B’, and ‘C’ are letters.</p ><p - >‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’</p + >‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’</p ><p - >‘He said, “I want to go.”’ Were you alive in the 70’s?</p + >‘He said, “I want to go.”’ Were you alive in the 70’s?</p ><p - >Here is some quoted ‘<code + >Here is some quoted ‘<code >code</code - >’ and a “<a href="http://example.com/?foo=1&bar=2" + >’ and a “<a href="http://example.com/?foo=1&bar=2" >quoted link</a - >”.</p + >”.</p ><p - >Some dashes: one—two — three—four — five.</p + >Some dashes: one—two — three—four — five.</p ><p - >Dashes between numbers: 5–7, 255–66, 1987–1999.</p + >Dashes between numbers: 5–7, 255–66, 1987–1999.</p ><p - >Ellipses…and…and….</p + >Ellipses…and…and….</p ><hr /></div ><div id="latex" @@ -855,7 +855,7 @@ Blah ></span >-Tree</li ><li - >Here’s some display math: <span class="math" + >Here’s some display math: <span class="math" >\frac{<em >d</em >}{<em @@ -883,7 +883,7 @@ Blah >}</span ></li ><li - >Here’s one that has a line break in it: <span class="math" + >Here’s one that has a line break in it: <span class="math" >α+ω × <em >x</em ><sup @@ -892,7 +892,7 @@ Blah >.</li ></ul ><p - >These shouldn’t be math:</p + >These shouldn’t be math:</p ><ul ><li >To get the famous equation, write <code @@ -901,7 +901,7 @@ Blah ><li >$22,000 is a <em >lot</em - > of money. So is $34,000. (It worked if “lot” is emphasized.)</li + > of money. So is $34,000. (It worked if “lot” is emphasized.)</li ><li >Shoes ($20) and socks ($5).</li ><li @@ -912,7 +912,7 @@ Blah > 23$.</li ></ul ><p - >Here’s a LaTeX table:</p + >Here’s a LaTeX table:</p ><p ></p ><hr @@ -1083,19 +1083,19 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'Email link'+'<\/'+'a'+'>') ><h2 >With ampersands</h2 ><p - >Here’s a <a href="http://example.com/?foo=1&bar=2" + >Here’s a <a href="http://example.com/?foo=1&bar=2" >link with an ampersand in the URL</a >.</p ><p - >Here’s a link with an amersand in the link text: <a href="http://att.com/" title="AT&T" + >Here’s a link with an amersand in the link text: <a href="http://att.com/" title="AT&T" >AT&T</a >.</p ><p - >Here’s an <a href="/script?foo=1&bar=2" + >Here’s an <a href="/script?foo=1&bar=2" >inline link</a >.</p ><p - >Here’s an <a href="/script?foo=1&bar=2" + >Here’s an <a href="/script?foo=1&bar=2" >inline link in pointy braces</a >.</p ></div @@ -1155,7 +1155,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+ ><h1 >Images</h1 ><p - >From “Voyage dans la Lune” by Georges Melies (1902):</p + >From “Voyage dans la Lune” by Georges Melies (1902):</p ><p ><img src="lalune.jpg" title="Voyage dans la Lune" alt="lalune" /></p @@ -1207,11 +1207,11 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+ /><ol ><li id="fn1" ><p - >Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. <a href="#fnref1" class="footnoteBackLink" title="Jump back to footnote 1">↩</a></p + >Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. <a href="#fnref1" class="footnoteBackLink" title="Jump back to footnote 1">↩</a></p ></li ><li id="fn2" ><p - >Here’s the long note. This one contains multiple blocks.</p + >Here’s the long note. This one contains multiple blocks.</p ><p >Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p ><pre @@ -1220,7 +1220,7 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+ </code ></pre ><p - >If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. <a href="#fnref2" class="footnoteBackLink" title="Jump back to footnote 2">↩</a></p + >If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. <a href="#fnref2" class="footnoteBackLink" title="Jump back to footnote 2">↩</a></p ></li ><li id="fn3" ><p @@ -1230,15 +1230,15 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+'<code>'+e+'</code>'+'<\/'+ >links</a > and <code >]</code - > verbatim characters, as well as [bracketed text]. <a href="#fnref3" class="footnoteBackLink" title="Jump back to footnote 3">↩</a></p + > verbatim characters, as well as [bracketed text]. <a href="#fnref3" class="footnoteBackLink" title="Jump back to footnote 3">↩</a></p ></li ><li id="fn4" ><p - >In quote. <a href="#fnref4" class="footnoteBackLink" title="Jump back to footnote 4">↩</a></p + >In quote. <a href="#fnref4" class="footnoteBackLink" title="Jump back to footnote 4">↩</a></p ></li ><li id="fn5" ><p - >In list. <a href="#fnref5" class="footnoteBackLink" title="Jump back to footnote 5">↩</a></p + >In list. <a href="#fnref5" class="footnoteBackLink" title="Jump back to footnote 5">↩</a></p ></li ></ol ></div