Added --email-obfuscation option.

+ Added writer option for email obfuscation.
+ Implemented email obfuscation options in HTML writer.
+ Added option to option parser.
+ Documented in README and pandoc man page.
+ Resolves Issue #97.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1523 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2009-01-24 19:58:48 +00:00
parent 46a3b228fa
commit 2806aee9b2
5 changed files with 110 additions and 60 deletions

65
Main.hs
View file

@ -32,7 +32,7 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy )
import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy, ObfuscationMethod (..) )
import Text.Pandoc.Highlighting ( languages )
import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
@ -156,6 +156,7 @@ data Opt = Opt
, optWrapText :: Bool -- ^ Wrap text
, optSanitizeHTML :: Bool -- ^ Sanitize HTML
, optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
, optEmailObfuscation :: ObfuscationMethod
#ifdef _CITEPROC
, optBiblioFile :: String
, optBiblioFormat :: String
@ -191,6 +192,7 @@ defaultOpts = Opt
, optWrapText = True
, optSanitizeHTML = False
, optPlugins = []
, optEmailObfuscation = JavascriptObfuscation
#ifdef _CITEPROC
, optBiblioFile = []
, optBiblioFormat = []
@ -301,6 +303,19 @@ options =
(\opt -> return opt { optSanitizeHTML = True }))
"" -- "Sanitize HTML"
, Option "" ["email-obfuscation"]
(ReqArg
(\arg opt -> do
method <- case arg of
"references" -> return ReferenceObfuscation
"javascript" -> return JavascriptObfuscation
"none" -> return NoObfuscation
_ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
exitWith (ExitFailure 6)
return opt { optEmailObfuscation = method })
"none|javascript|references")
"" -- "Method for obfuscating email in HTML"
, Option "" ["toc", "table-of-contents"]
(NoArg
(\opt -> return opt { optTableOfContents = True }))
@ -533,10 +548,11 @@ main = do
, optWrapText = wrap
, optSanitizeHTML = sanitize
, optPlugins = plugins
, optEmailObfuscation = obfuscationMethod
#ifdef _CITEPROC
, optBiblioFile = biblioFile
, optBiblioFormat = biblioFormat
, optCslFile = cslFile
, optBiblioFile = biblioFile
, optBiblioFormat = biblioFormat
, optCslFile = cslFile
#endif
} = opts
@ -614,25 +630,28 @@ main = do
let header = (if (customHeader == "DEFAULT")
then defaultHeader
else customHeader) ++ csslink ++ includeHeader
let writerOptions = WriterOptions { writerStandalone = standalone',
writerHeader = header,
writerTitlePrefix = titlePrefix,
writerTabStop = tabStop,
writerTableOfContents = toc &&
(not strict) &&
writerName' /= "s5",
writerHTMLMathMethod = mathMethod,
writerS5 = (writerName' == "s5"),
writerIgnoreNotes = False,
writerIncremental = incremental,
writerNumberSections = numberSections,
writerIncludeBefore = includeBefore,
writerIncludeAfter = includeAfter,
writerStrictMarkdown = strict,
writerReferenceLinks = referenceLinks,
writerWrapText = wrap,
writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
lhsExtension [outputFile] }
let writerOptions = WriterOptions { writerStandalone = standalone',
writerHeader = header,
writerTitlePrefix = titlePrefix,
writerTabStop = tabStop,
writerTableOfContents = toc &&
(not strict) &&
writerName' /= "s5",
writerHTMLMathMethod = mathMethod,
writerS5 = (writerName' == "s5"),
writerIgnoreNotes = False,
writerIncremental = incremental,
writerNumberSections = numberSections,
writerIncludeBefore = includeBefore,
writerIncludeAfter = includeAfter,
writerStrictMarkdown = strict,
writerReferenceLinks = referenceLinks,
writerWrapText = wrap,
writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
lhsExtension [outputFile],
writerEmailObfuscation = if strict
then ReferenceObfuscation
else obfuscationMethod }
if isNonTextOutput writerName' && outputFile == "-"
then do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++

8
README
View file

@ -383,6 +383,14 @@ For further documentation, see the `pandoc(1)` man page.
are omitted. URIs in links and images are also checked against a
whitelist of URI schemes.
`--email-obfuscation`*=none|javascript|references*
: specifies a method for obfuscating `mailto:` links in HTML documents.
*none* leaves `mailto:` links as they are. *javascript* obfuscates
them using javascript. *references* obfuscates them by printing their
letters as decimal or hexadecimal character references. If `--strict`
is specified, *references* is used regardless of the presence
of this option.
`--dump-args`
: is intended to make it easier to create wrapper scripts that use
Pandoc. It causes Pandoc to dump information about the arguments

View file

@ -99,6 +99,7 @@ module Text.Pandoc.Shared (
isHeaderBlock,
-- * Writer options
HTMLMathMethod (..),
ObfuscationMethod (..),
WriterOptions (..),
defaultWriterOptions,
-- * File handling
@ -889,45 +890,53 @@ data HTMLMathMethod = PlainMath
| MimeTeX String -- url of mimetex.cgi
deriving (Show, Read, Eq)
-- | Methods for obfuscating email addresses in HTML.
data ObfuscationMethod = NoObfuscation
| ReferenceObfuscation
| JavascriptObfuscation
deriving (Show, Read, Eq)
-- | Options for writers
data WriterOptions = WriterOptions
{ writerStandalone :: Bool -- ^ Include header and footer
, writerHeader :: String -- ^ Header for the document
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
, writerTableOfContents :: Bool -- ^ Include table of contents
, writerS5 :: Bool -- ^ We're writing S5
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
, writerIncremental :: Bool -- ^ Incremental S5 lists
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
, writerIncludeBefore :: String -- ^ String to include before the body
, writerIncludeAfter :: String -- ^ String to include after the body
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, writerWrapText :: Bool -- ^ Wrap text to line length
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
{ writerStandalone :: Bool -- ^ Include header and footer
, writerHeader :: String -- ^ Header for the document
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
, writerTableOfContents :: Bool -- ^ Include table of contents
, writerS5 :: Bool -- ^ We're writing S5
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
, writerIncremental :: Bool -- ^ Incremental S5 lists
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
, writerIncludeBefore :: String -- ^ String to include before the body
, writerIncludeAfter :: String -- ^ String to include after the body
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, writerWrapText :: Bool -- ^ Wrap text to line length
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
, writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
} deriving Show
-- | Default writer options.
defaultWriterOptions :: WriterOptions
defaultWriterOptions =
WriterOptions { writerStandalone = False
, writerHeader = ""
, writerTitlePrefix = ""
, writerTabStop = 4
, writerTableOfContents = False
, writerS5 = False
, writerHTMLMathMethod = PlainMath
, writerIgnoreNotes = False
, writerIncremental = False
, writerNumberSections = False
, writerIncludeBefore = ""
, writerIncludeAfter = ""
, writerStrictMarkdown = False
, writerReferenceLinks = False
, writerWrapText = True
, writerLiterateHaskell = False
WriterOptions { writerStandalone = False
, writerHeader = ""
, writerTitlePrefix = ""
, writerTabStop = 4
, writerTableOfContents = False
, writerS5 = False
, writerHTMLMathMethod = PlainMath
, writerIgnoreNotes = False
, writerIncremental = False
, writerNumberSections = False
, writerIncludeBefore = ""
, writerIncludeAfter = ""
, writerStrictMarkdown = False
, writerReferenceLinks = False
, writerWrapText = True
, writerLiterateHaskell = False
, writerEmailObfuscation = JavascriptObfuscation
}
--

View file

@ -193,10 +193,13 @@ parseMailto ('m':'a':'i':'l':'t':'o':':':addr) =
in Just (name', domain)
parseMailto _ = Nothing
-- | Obfuscate a "mailto:" link using Javascript.
-- | Obfuscate a "mailto:" link.
obfuscateLink :: WriterOptions -> String -> String -> Html
obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
anchor ! [href s] << txt
obfuscateLink opts txt s =
let s' = map toLower s
let meth = writerEmailObfuscation opts
s' = map toLower s
in case parseMailto s' of
(Just (name', domain)) ->
let domain' = substitute "." " dot " domain
@ -206,17 +209,20 @@ obfuscateLink opts txt s =
then ("'<code>'+e+'</code>'", name' ++ " at " ++ domain')
else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++
domain' ++ ")")
in if writerStrictMarkdown opts
then -- need to use primHtml or &'s are escaped to &amp; in URL
in case meth of
ReferenceObfuscation ->
-- need to use primHtml or &'s are escaped to &amp; in URL
primHtml $ "<a href=\"" ++ (obfuscateString s')
++ "\">" ++ (obfuscateString txt) ++ "</a>"
else (script ! [thetype "text/javascript"] $
JavascriptObfuscation ->
(script ! [thetype "text/javascript"] $
primHtml ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name' ++ "';e=n+a+h;\n" ++
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
noscript (primHtml $ obfuscateString altText)
_ -> error $ "Unknown obfuscation method: " ++ show meth
_ -> anchor ! [href s] $ primHtml txt -- malformed email
-- | Obfuscate character as entity.

View file

@ -146,6 +146,14 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`.
are omitted. URIs in links and images are also checked against a
whitelist of URI schemes.
\--email-obfuscation=*none|javascript|references*
: Specify a method for obfuscating `mailto:` links in HTML documents.
*none* leaves `mailto:` links as they are. *javascript* obfuscates
them using javascript. *references* obfuscates them by printing their
letters as decimal or hexadecimal character references.
If `--strict` is specified, *references* is used regardless of the
presence of this option.
\--toc, \--table-of-contents
: Include an automatically generated table of contents (HTML, markdown,
RTF) or an instruction to create one (LaTeX, reStructuredText).