pandoc/src/Text/Pandoc/Writers/HTML.hs
fiddlosopher 5cf769b1cd Modified the HTML writer to add invisible anchors to each section
heading.  The anchors are derived form the text of the section
heading as described in README.  This makes it easy to insert
links that jump from one part of a document to another:
for example, '[back to the Introduction](#Introduction)'.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@246 788f1e2b-df1e-0410-8736-df70ead52e1b
2006-12-20 00:25:54 +00:00

212 lines
11 KiB
Haskell

-- | Converts Pandoc to HTML.
module Text.Pandoc.Writers.HTML (
writeHtml
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Html ( stringToHtmlString )
import Text.Regex ( mkRegex )
import Numeric ( showHex )
import Char ( ord )
import Data.List ( isPrefixOf, partition )
-- | Convert Pandoc document to string in HTML format.
writeHtml :: WriterOptions -> Pandoc -> String
writeHtml options (Pandoc (Meta title authors date) blocks) =
let titlePrefix = writerTitlePrefix options in
let topTitle = if not (null titlePrefix) then
[Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else [])
else
title in
let head = if (writerStandalone options) then
htmlHeader options (Meta topTitle authors date)
else
""
titleBlocks = if (writerStandalone options) && (not (null title)) &&
(not (writerS5 options)) then
[RawHtml "<h1 class=\"title\">", Plain title, RawHtml "</h1>\n"]
else
[]
foot = if (writerStandalone options) then "</body>\n</html>\n" else ""
blocks' = replaceReferenceLinks (titleBlocks ++ blocks)
(noteBlocks, blocks'') = partition isNoteBlock blocks'
body = (writerIncludeBefore options) ++
concatMap (blockToHtml options) blocks'' ++
footnoteSection options noteBlocks ++
(writerIncludeAfter options) in
head ++ body ++ foot
-- | Convert list of Note blocks to a footnote <div>. Assumes notes are sorted.
footnoteSection :: WriterOptions -> [Block] -> String
footnoteSection options notes =
if null notes
then ""
else "<div class=\"footnotes\">\n<hr />\n<ol>\n" ++
concatMap (blockToHtml options) notes ++
"</ol>\n</div>\n"
-- | Obfuscate a "mailto:" link using Javascript.
obfuscateLink :: WriterOptions -> [Inline] -> String -> String
obfuscateLink options text src =
let text' = inlineListToHtml options text in
let linkText = if src == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'"
altText = if src == ("mailto:" ++ text') then "\\1 [at] \\2" else text' ++ " (\\1 [at] \\2)" in
gsub "mailto:([^@]*)@(.*)" ("<script type=\"text/javascript\">h='\\2';n='\\1';e=n+'@'+h;document.write('<a href=\"mailto:'+e+'\">'+" ++ linkText ++ "+'<\\/a>');</script><noscript>" ++ altText ++ "</noscript>") src
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
obfuscateChar char = let num = ord char in
let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
"&#" ++ numstr ++ ";"
-- | Escape string, preserving character entities and quote.
stringToHtml :: String -> String
stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str
-- | Escape string as in 'stringToHtml' but add smart typography filter.
stringToSmartHtml :: String -> String
stringToSmartHtml =
let escapeDoubleQuotes =
gsub "(\"|&quot;)" "&rdquo;" . -- rest are right quotes
gsub "(\"|&quot;)(&r[sd]quo;)" "&rdquo;\\2" . -- never left quo before right quo
gsub "(&l[sd]quo;)(\"|&quot;)" "\\2&ldquo;" . -- never right quo after left quo
gsub "([ \t])(\"|&quot;)" "\\1&ldquo;" . -- never right quo after space
gsub "(\"|&quot;)([^,.;:!?^) \t-])" "&ldquo;\\2" . -- "word left
gsub "(\"|&quot;)('|`|&lsquo;)" "&rdquo;&rsquo;" . -- right if it got through last filter
gsub "(\"|&quot;)('|`|&lsquo;)([^,.;:!?^) \t-])" "&ldquo;&lsquo;\\3" . -- "'word left
gsub "``" "&ldquo;" .
gsub "''" "&rdquo;"
escapeSingleQuotes =
gsub "'" "&rsquo;" . -- otherwise right
gsub "'(&r[sd]quo;)" "&rsquo;\\1" . -- never left quo before right quo
gsub "(&l[sd]quo;)'" "\\1&lsquo;" . -- never right quo after left quo
gsub "([ \t])'" "\\1&lsquo;" . -- never right quo after space
gsub "`" "&lsquo;" . -- ` is left
gsub "([^,.;:!?^) \t-])'" "\\1&rsquo;" . -- word' right
gsub "^('|`)([^,.;:!?^) \t-])" "&lsquo;\\2" . -- 'word left
gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
gsub "([^,.;:!?^) \t-])'(s|S)" "\\1&rsquo;\\2" . -- possessive
gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1&lsquo;\\2" . -- 'word left
gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
escapeDashes = gsub " ?-- ?" "&mdash;" .
gsub " ?--- ?" "&mdash;" .
gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;" in
escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml
-- | Escape code string as needed for HTML.
codeStringToHtml :: String -> String
codeStringToHtml [] = []
codeStringToHtml (x:xs) = case x of
'&' -> "&amp;" ++ codeStringToHtml xs
'<' -> "&lt;" ++ codeStringToHtml xs
_ -> x:(codeStringToHtml xs)
-- | Escape string to HTML appropriate for attributes
attributeStringToHtml :: String -> String
attributeStringToHtml = gsub "\"" "&quot;"
-- | Returns an HTML header with appropriate bibliographic information.
htmlHeader :: WriterOptions -> Meta -> String
htmlHeader options (Meta title authors date) =
let titletext = "<title>" ++ (inlineListToHtml options title) ++ "</title>\n"
authortext = if (null authors) then
""
else
"<meta name=\"author\" content=\"" ++
(joinWithSep ", " (map stringToHtml authors)) ++ "\" />\n"
datetext = if (date == "") then
""
else
"<meta name=\"date\" content=\"" ++ (stringToHtml date) ++ "\" />\n" in
(writerHeader options) ++ authortext ++ datetext ++ titletext ++ "</head>\n<body>\n"
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> String
blockToHtml options Blank = "\n"
blockToHtml options Null = ""
blockToHtml options (Plain lst) = inlineListToHtml options lst
blockToHtml options (Para lst) = "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
blockToHtml options (BlockQuote blocks) =
if (writerS5 options) then -- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental; otherwise incremental
let inc = not (writerIncremental options) in
case blocks of
[BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst)
[OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst)
otherwise -> "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++
"</blockquote>\n"
else
"<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ "</blockquote>\n"
blockToHtml options (Note ref lst) =
let contents = (concatMap (blockToHtml options) lst) in
"<li id=\"fn" ++ ref ++ "\">" ++ contents ++ " <a href=\"#fnref" ++ ref ++
"\" class=\"footnoteBacklink\" title=\"Jump back to footnote " ++ ref ++
"\">&#8617;</a></li>\n"
blockToHtml options (Key _ _) = ""
blockToHtml options (CodeBlock str) = "<pre><code>" ++ (codeStringToHtml str) ++
"\n</code></pre>\n"
blockToHtml options (RawHtml str) = str
blockToHtml options (BulletList lst) =
let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
"<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ul>\n"
blockToHtml options (OrderedList lst) =
let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
"<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ol>\n"
blockToHtml options HorizontalRule = "<hr />\n"
blockToHtml options (Header level lst) =
let contents = inlineListToHtml options lst in
let simplify = gsub "<[^>]*>" "" . gsub " " "_" in
if ((level > 0) && (level <= 6))
then "<a id=\"" ++ simplify contents ++ "\"></a>\n" ++
"<h" ++ (show level) ++ ">" ++ contents ++
"</h" ++ (show level) ++ ">\n"
else "<p>" ++ contents ++ "</p>\n"
listItemToHtml options list = "<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n"
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> String
inlineListToHtml options lst =
-- consolidate adjacent Str and Space elements for more intelligent
-- smart typography filtering
let lst' = consolidateList lst in
concatMap (inlineToHtml options) lst'
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> String
inlineToHtml options (Emph lst) = "<em>" ++ (inlineListToHtml options lst) ++ "</em>"
inlineToHtml options (Strong lst) = "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
inlineToHtml options (Code str) = "<code>" ++ (codeStringToHtml str) ++ "</code>"
inlineToHtml options (Str str) = if (writerSmart options) then
stringToSmartHtml str
else
stringToHtml str
inlineToHtml options (TeX str) = (codeStringToHtml str)
inlineToHtml options (HtmlInline str) = str
inlineToHtml options (LineBreak) = "<br />\n"
inlineToHtml options Space = " "
inlineToHtml options (Link text (Src src tit)) =
let title = attributeStringToHtml tit in
if (isPrefixOf "mailto:" src) then
obfuscateLink options text src
else
"<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
(inlineListToHtml options text) ++ "</a>"
inlineToHtml options (Link text (Ref [])) = "[" ++ (inlineListToHtml options text) ++ "]"
inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++
(inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse
inlineToHtml options (Image alt (Src source tit)) =
let title = attributeStringToHtml tit
alternate = inlineListToHtml options alt in
"<img src=\"" ++ source ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++
(if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">"
inlineToHtml options (Image alternate (Ref [])) =
"![" ++ (inlineListToHtml options alternate) ++ "]"
inlineToHtml options (Image alternate (Ref ref)) =
"![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]"
inlineToHtml options (NoteRef ref) =
"<sup class=\"footnoteRef\" id=\"fnref" ++ ref ++ "\"><a href=\"#fn" ++ ref ++
"\">" ++ ref ++ "</a></sup>"