2006-12-20 20:54:23 +00:00
|
|
|
{-
|
|
|
|
Copyright (C) 2006 John MacFarlane <jgm at berkeley dot edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Writers.HTML
|
|
|
|
Copyright : Copyright (C) 2006 John MacFarlane
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm at berkeley dot edu>
|
2006-12-20 20:20:10 +00:00
|
|
|
Stability : alpha
|
2006-12-20 06:50:14 +00:00
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Conversion of 'Pandoc' documents to HTML.
|
|
|
|
-}
|
2006-10-17 14:22:29 +00:00
|
|
|
module Text.Pandoc.Writers.HTML (
|
|
|
|
writeHtml
|
|
|
|
) where
|
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Text.Pandoc.Shared
|
|
|
|
import Text.Html ( stringToHtmlString )
|
2006-12-20 18:16:07 +00:00
|
|
|
import Text.Regex ( mkRegex, matchRegex )
|
2006-10-17 14:22:29 +00:00
|
|
|
import Numeric ( showHex )
|
2006-12-20 18:16:07 +00:00
|
|
|
import Data.Char ( ord, toLower )
|
2006-12-19 23:13:03 +00:00
|
|
|
import Data.List ( isPrefixOf, partition )
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Convert Pandoc document to string in HTML format.
|
|
|
|
writeHtml :: WriterOptions -> Pandoc -> String
|
|
|
|
writeHtml options (Pandoc (Meta title authors date) blocks) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let titlePrefix = writerTitlePrefix options in
|
|
|
|
let topTitle = if not (null titlePrefix)
|
|
|
|
then [Str titlePrefix] ++ (if not (null title)
|
|
|
|
then [Str " - "] ++ title
|
|
|
|
else [])
|
|
|
|
else title in
|
|
|
|
let head = if (writerStandalone options)
|
|
|
|
then htmlHeader options (Meta topTitle authors date)
|
|
|
|
else ""
|
|
|
|
titleBlocks = if (writerStandalone options) && (not (null title)) &&
|
|
|
|
(not (writerS5 options))
|
|
|
|
then [RawHtml "<h1 class=\"title\">", Plain title,
|
|
|
|
RawHtml "</h1>\n"]
|
|
|
|
else []
|
|
|
|
foot = if (writerStandalone options) then "</body>\n</html>\n" else ""
|
|
|
|
blocks' = replaceReferenceLinks (titleBlocks ++ blocks)
|
|
|
|
(noteBlocks, blocks'') = partition isNoteBlock blocks'
|
|
|
|
body = (writerIncludeBefore options) ++
|
|
|
|
concatMap (blockToHtml options) blocks'' ++
|
|
|
|
footnoteSection options noteBlocks ++
|
|
|
|
(writerIncludeAfter options) in
|
|
|
|
head ++ body ++ foot
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
-- | Convert list of Note blocks to a footnote <div>.
|
|
|
|
-- Assumes notes are sorted.
|
2006-12-19 23:13:03 +00:00
|
|
|
footnoteSection :: WriterOptions -> [Block] -> String
|
|
|
|
footnoteSection options notes =
|
2006-12-20 06:50:14 +00:00
|
|
|
if null notes
|
|
|
|
then ""
|
|
|
|
else "<div class=\"footnotes\">\n<hr />\n<ol>\n" ++
|
|
|
|
concatMap (blockToHtml options) notes ++
|
|
|
|
"</ol>\n</div>\n"
|
2006-12-19 23:13:03 +00:00
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
-- | Obfuscate a "mailto:" link using Javascript.
|
|
|
|
obfuscateLink :: WriterOptions -> [Inline] -> String -> String
|
|
|
|
obfuscateLink options text src =
|
2006-12-20 18:16:07 +00:00
|
|
|
let emailRegex = mkRegex "mailto:*([^@]*)@(.*)"
|
|
|
|
text' = inlineListToHtml options text
|
|
|
|
src' = map toLower src in
|
|
|
|
case (matchRegex emailRegex src') of
|
|
|
|
(Just [name, domain]) ->
|
|
|
|
let domain' = gsub "\\." " dot " domain
|
|
|
|
at' = obfuscateChar '@' in
|
|
|
|
let linkText = if src' == ("mailto:" ++ text')
|
|
|
|
then "e"
|
|
|
|
else "'" ++ text' ++ "'"
|
|
|
|
altText = if src' == ("mailto:" ++ text')
|
|
|
|
then name ++ " at " ++ domain'
|
|
|
|
else text' ++ " (" ++ name ++ " at " ++
|
|
|
|
domain' ++ ")" in
|
|
|
|
"<script type=\"text/javascript\">\n<!--\nh='" ++
|
|
|
|
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
|
|
|
|
obfuscateString name ++ "';e=n+a+h;\n" ++
|
|
|
|
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
|
|
|
|
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n</script><noscript>" ++
|
|
|
|
obfuscateString altText ++ "</noscript>"
|
|
|
|
_ -> "<a href=\"" ++ src ++ "\">" ++ text' ++ "</a>" -- malformed email
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Obfuscate character as entity.
|
|
|
|
obfuscateChar :: Char -> String
|
2006-12-20 06:50:14 +00:00
|
|
|
obfuscateChar char =
|
|
|
|
let num = ord char in
|
|
|
|
let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
|
|
|
|
"&#" ++ numstr ++ ";"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2006-12-20 18:16:07 +00:00
|
|
|
-- | Obfuscate string using entities.
|
|
|
|
obfuscateString :: String -> String
|
|
|
|
obfuscateString = concatMap obfuscateChar
|
|
|
|
|
2006-10-17 14:22:29 +00:00
|
|
|
-- | Escape string, preserving character entities and quote.
|
|
|
|
stringToHtml :: String -> String
|
2006-12-20 06:50:14 +00:00
|
|
|
stringToHtml str = escapePreservingRegex stringToHtmlString
|
|
|
|
(mkRegex "\"|(&[[:alnum:]]*;)") str
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2006-12-18 22:02:39 +00:00
|
|
|
-- | Escape string as in 'stringToHtml' but add smart typography filter.
|
2006-10-17 14:22:29 +00:00
|
|
|
stringToSmartHtml :: String -> String
|
|
|
|
stringToSmartHtml =
|
2006-12-20 06:50:14 +00:00
|
|
|
let escapeDoubleQuotes =
|
|
|
|
gsub "(\"|")" "”" . -- rest are right quotes
|
|
|
|
gsub "(\"|")(&r[sd]quo;)" "”\\2" .
|
|
|
|
-- never left quo before right quo
|
|
|
|
gsub "(&l[sd]quo;)(\"|")" "\\2“" .
|
|
|
|
-- never right quo after left quo
|
|
|
|
gsub "([ \t])(\"|")" "\\1“" .
|
|
|
|
-- never right quo after space
|
|
|
|
gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left
|
|
|
|
gsub "(\"|")('|`|‘)" "”’" .
|
|
|
|
-- right if it got through last filter
|
|
|
|
gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" .
|
|
|
|
-- "'word left
|
|
|
|
gsub "``" "“" .
|
|
|
|
gsub "''" "”"
|
|
|
|
escapeSingleQuotes =
|
|
|
|
gsub "'" "’" . -- otherwise right
|
|
|
|
gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo
|
|
|
|
gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo
|
|
|
|
gsub "([ \t])'" "\\1‘" . -- never right quo after space
|
|
|
|
gsub "`" "‘" . -- ` is left
|
|
|
|
gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right
|
|
|
|
gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left
|
|
|
|
gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left
|
|
|
|
gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive
|
|
|
|
gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left
|
|
|
|
gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs.
|
|
|
|
escapeDashes =
|
|
|
|
gsub " ?-- ?" "—" .
|
|
|
|
gsub " ?--- ?" "—" .
|
|
|
|
gsub "([0-9])--?([0-9])" "\\1–\\2"
|
|
|
|
escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in
|
|
|
|
escapeSingleQuotes . escapeDoubleQuotes . escapeDashes .
|
|
|
|
escapeEllipses . stringToHtml
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Escape code string as needed for HTML.
|
|
|
|
codeStringToHtml :: String -> String
|
|
|
|
codeStringToHtml [] = []
|
|
|
|
codeStringToHtml (x:xs) = case x of
|
2006-12-20 06:50:14 +00:00
|
|
|
'&' -> "&" ++ codeStringToHtml xs
|
|
|
|
'<' -> "<" ++ codeStringToHtml xs
|
|
|
|
_ -> x:(codeStringToHtml xs)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Escape string to HTML appropriate for attributes
|
|
|
|
attributeStringToHtml :: String -> String
|
|
|
|
attributeStringToHtml = gsub "\"" """
|
|
|
|
|
|
|
|
-- | Returns an HTML header with appropriate bibliographic information.
|
|
|
|
htmlHeader :: WriterOptions -> Meta -> String
|
|
|
|
htmlHeader options (Meta title authors date) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let titletext = "<title>" ++ (inlineListToHtml options title) ++
|
|
|
|
"</title>\n"
|
|
|
|
authortext = if (null authors)
|
|
|
|
then ""
|
|
|
|
else "<meta name=\"author\" content=\"" ++
|
|
|
|
(joinWithSep ", " (map stringToHtml authors)) ++
|
|
|
|
"\" />\n"
|
|
|
|
datetext = if (date == "")
|
|
|
|
then ""
|
|
|
|
else "<meta name=\"date\" content=\"" ++
|
|
|
|
(stringToHtml date) ++ "\" />\n" in
|
|
|
|
(writerHeader options) ++ authortext ++ datetext ++ titletext ++
|
|
|
|
"</head>\n<body>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Convert Pandoc block element to HTML.
|
|
|
|
blockToHtml :: WriterOptions -> Block -> String
|
|
|
|
blockToHtml options Blank = "\n"
|
|
|
|
blockToHtml options Null = ""
|
|
|
|
blockToHtml options (Plain lst) = inlineListToHtml options lst
|
|
|
|
blockToHtml options (Para lst) = "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
|
|
|
|
blockToHtml options (BlockQuote blocks) =
|
2006-12-20 06:50:14 +00:00
|
|
|
if (writerS5 options)
|
|
|
|
then -- in S5, treat list in blockquote specially
|
|
|
|
-- if default is incremental, make it nonincremental;
|
|
|
|
-- otherwise incremental
|
|
|
|
let inc = not (writerIncremental options) in
|
|
|
|
case blocks of
|
|
|
|
[BulletList lst] -> blockToHtml (options {writerIncremental =
|
|
|
|
inc}) (BulletList lst)
|
|
|
|
[OrderedList lst] -> blockToHtml (options {writerIncremental =
|
|
|
|
inc}) (OrderedList lst)
|
|
|
|
otherwise -> "<blockquote>\n" ++
|
|
|
|
(concatMap (blockToHtml options) blocks) ++
|
|
|
|
"</blockquote>\n"
|
|
|
|
else "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++
|
|
|
|
"</blockquote>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
blockToHtml options (Note ref lst) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let contents = (concatMap (blockToHtml options) lst) in
|
|
|
|
"<li id=\"fn" ++ ref ++ "\">" ++ contents ++ " <a href=\"#fnref" ++ ref ++
|
|
|
|
"\" class=\"footnoteBacklink\" title=\"Jump back to footnote " ++ ref ++
|
|
|
|
"\">↩</a></li>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
blockToHtml options (Key _ _) = ""
|
2006-12-20 06:50:14 +00:00
|
|
|
blockToHtml options (CodeBlock str) =
|
|
|
|
"<pre><code>" ++ (codeStringToHtml str) ++ "\n</code></pre>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
blockToHtml options (RawHtml str) = str
|
|
|
|
blockToHtml options (BulletList lst) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let attribs = if (writerIncremental options)
|
|
|
|
then " class=\"incremental\""
|
|
|
|
else "" in
|
|
|
|
"<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++
|
|
|
|
"</ul>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
blockToHtml options (OrderedList lst) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let attribs = if (writerIncremental options)
|
|
|
|
then " class=\"incremental\""
|
|
|
|
else "" in
|
|
|
|
"<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++
|
|
|
|
"</ol>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
blockToHtml options HorizontalRule = "<hr />\n"
|
2006-12-20 00:25:54 +00:00
|
|
|
blockToHtml options (Header level lst) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let contents = inlineListToHtml options lst in
|
|
|
|
if ((level > 0) && (level <= 6))
|
2006-12-29 08:04:39 +00:00
|
|
|
then "<h" ++ (show level) ++ ">" ++ contents ++
|
2006-12-20 06:50:14 +00:00
|
|
|
"</h" ++ (show level) ++ ">\n"
|
|
|
|
else "<p>" ++ contents ++ "</p>\n"
|
|
|
|
listItemToHtml options list =
|
|
|
|
"<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Convert list of Pandoc inline elements to HTML.
|
|
|
|
inlineListToHtml :: WriterOptions -> [Inline] -> String
|
|
|
|
inlineListToHtml options lst =
|
2006-12-20 06:50:14 +00:00
|
|
|
-- consolidate adjacent Str and Space elements for more intelligent
|
|
|
|
-- smart typography filtering
|
|
|
|
let lst' = consolidateList lst in
|
|
|
|
concatMap (inlineToHtml options) lst'
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- | Convert Pandoc inline element to HTML.
|
|
|
|
inlineToHtml :: WriterOptions -> Inline -> String
|
2006-12-20 06:50:14 +00:00
|
|
|
inlineToHtml options (Emph lst) =
|
|
|
|
"<em>" ++ (inlineListToHtml options lst) ++ "</em>"
|
|
|
|
inlineToHtml options (Strong lst) =
|
|
|
|
"<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
|
|
|
|
inlineToHtml options (Code str) =
|
|
|
|
"<code>" ++ (codeStringToHtml str) ++ "</code>"
|
|
|
|
inlineToHtml options (Str str) =
|
|
|
|
if (writerSmart options) then stringToSmartHtml str else stringToHtml str
|
2006-10-17 14:22:29 +00:00
|
|
|
inlineToHtml options (TeX str) = (codeStringToHtml str)
|
|
|
|
inlineToHtml options (HtmlInline str) = str
|
|
|
|
inlineToHtml options (LineBreak) = "<br />\n"
|
|
|
|
inlineToHtml options Space = " "
|
|
|
|
inlineToHtml options (Link text (Src src tit)) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let title = attributeStringToHtml tit in
|
|
|
|
if (isPrefixOf "mailto:" src)
|
|
|
|
then obfuscateLink options text src
|
|
|
|
else "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
|
|
|
|
(if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
|
|
|
|
(inlineListToHtml options text) ++ "</a>"
|
|
|
|
inlineToHtml options (Link text (Ref [])) =
|
|
|
|
"[" ++ (inlineListToHtml options text) ++ "]"
|
|
|
|
inlineToHtml options (Link text (Ref ref)) =
|
|
|
|
"[" ++ (inlineListToHtml options text) ++ "][" ++
|
|
|
|
(inlineListToHtml options ref) ++ "]"
|
|
|
|
-- this is what markdown does, for better or worse
|
2006-10-17 14:22:29 +00:00
|
|
|
inlineToHtml options (Image alt (Src source tit)) =
|
2006-12-20 06:50:14 +00:00
|
|
|
let title = attributeStringToHtml tit
|
|
|
|
alternate = inlineListToHtml options alt in
|
|
|
|
"<img src=\"" ++ source ++ "\"" ++
|
|
|
|
(if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++
|
|
|
|
(if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">"
|
2006-10-17 14:22:29 +00:00
|
|
|
inlineToHtml options (Image alternate (Ref [])) =
|
2006-12-20 06:50:14 +00:00
|
|
|
"![" ++ (inlineListToHtml options alternate) ++ "]"
|
2006-10-17 14:22:29 +00:00
|
|
|
inlineToHtml options (Image alternate (Ref ref)) =
|
2006-12-20 06:50:14 +00:00
|
|
|
"![" ++ (inlineListToHtml options alternate) ++ "][" ++
|
|
|
|
(inlineListToHtml options ref) ++ "]"
|
2006-10-17 14:22:29 +00:00
|
|
|
inlineToHtml options (NoteRef ref) =
|
2006-12-20 06:50:14 +00:00
|
|
|
"<sup class=\"footnoteRef\" id=\"fnref" ++ ref ++ "\"><a href=\"#fn" ++
|
|
|
|
ref ++ "\">" ++ ref ++ "</a></sup>"
|