Added --email-obfuscation option.
+ Added writer option for email obfuscation. + Implemented email obfuscation options in HTML writer. + Added option to option parser. + Documented in README and pandoc man page. + Resolves Issue #97. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1523 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
46a3b228fa
commit
2806aee9b2
5 changed files with 110 additions and 60 deletions
65
Main.hs
65
Main.hs
|
@ -32,7 +32,7 @@ writers.
|
|||
module Main where
|
||||
import Text.Pandoc
|
||||
import Text.Pandoc.ODT
|
||||
import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy )
|
||||
import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy, ObfuscationMethod (..) )
|
||||
import Text.Pandoc.Highlighting ( languages )
|
||||
import System.Environment ( getArgs, getProgName, getEnvironment )
|
||||
import System.Exit ( exitWith, ExitCode (..) )
|
||||
|
@ -156,6 +156,7 @@ data Opt = Opt
|
|||
, optWrapText :: Bool -- ^ Wrap text
|
||||
, optSanitizeHTML :: Bool -- ^ Sanitize HTML
|
||||
, optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
|
||||
, optEmailObfuscation :: ObfuscationMethod
|
||||
#ifdef _CITEPROC
|
||||
, optBiblioFile :: String
|
||||
, optBiblioFormat :: String
|
||||
|
@ -191,6 +192,7 @@ defaultOpts = Opt
|
|||
, optWrapText = True
|
||||
, optSanitizeHTML = False
|
||||
, optPlugins = []
|
||||
, optEmailObfuscation = JavascriptObfuscation
|
||||
#ifdef _CITEPROC
|
||||
, optBiblioFile = []
|
||||
, optBiblioFormat = []
|
||||
|
@ -301,6 +303,19 @@ options =
|
|||
(\opt -> return opt { optSanitizeHTML = True }))
|
||||
"" -- "Sanitize HTML"
|
||||
|
||||
, Option "" ["email-obfuscation"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
method <- case arg of
|
||||
"references" -> return ReferenceObfuscation
|
||||
"javascript" -> return JavascriptObfuscation
|
||||
"none" -> return NoObfuscation
|
||||
_ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
|
||||
exitWith (ExitFailure 6)
|
||||
return opt { optEmailObfuscation = method })
|
||||
"none|javascript|references")
|
||||
"" -- "Method for obfuscating email in HTML"
|
||||
|
||||
, Option "" ["toc", "table-of-contents"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optTableOfContents = True }))
|
||||
|
@ -533,10 +548,11 @@ main = do
|
|||
, optWrapText = wrap
|
||||
, optSanitizeHTML = sanitize
|
||||
, optPlugins = plugins
|
||||
, optEmailObfuscation = obfuscationMethod
|
||||
#ifdef _CITEPROC
|
||||
, optBiblioFile = biblioFile
|
||||
, optBiblioFormat = biblioFormat
|
||||
, optCslFile = cslFile
|
||||
, optBiblioFile = biblioFile
|
||||
, optBiblioFormat = biblioFormat
|
||||
, optCslFile = cslFile
|
||||
#endif
|
||||
} = opts
|
||||
|
||||
|
@ -614,25 +630,28 @@ main = do
|
|||
let header = (if (customHeader == "DEFAULT")
|
||||
then defaultHeader
|
||||
else customHeader) ++ csslink ++ includeHeader
|
||||
let writerOptions = WriterOptions { writerStandalone = standalone',
|
||||
writerHeader = header,
|
||||
writerTitlePrefix = titlePrefix,
|
||||
writerTabStop = tabStop,
|
||||
writerTableOfContents = toc &&
|
||||
(not strict) &&
|
||||
writerName' /= "s5",
|
||||
writerHTMLMathMethod = mathMethod,
|
||||
writerS5 = (writerName' == "s5"),
|
||||
writerIgnoreNotes = False,
|
||||
writerIncremental = incremental,
|
||||
writerNumberSections = numberSections,
|
||||
writerIncludeBefore = includeBefore,
|
||||
writerIncludeAfter = includeAfter,
|
||||
writerStrictMarkdown = strict,
|
||||
writerReferenceLinks = referenceLinks,
|
||||
writerWrapText = wrap,
|
||||
writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
|
||||
lhsExtension [outputFile] }
|
||||
let writerOptions = WriterOptions { writerStandalone = standalone',
|
||||
writerHeader = header,
|
||||
writerTitlePrefix = titlePrefix,
|
||||
writerTabStop = tabStop,
|
||||
writerTableOfContents = toc &&
|
||||
(not strict) &&
|
||||
writerName' /= "s5",
|
||||
writerHTMLMathMethod = mathMethod,
|
||||
writerS5 = (writerName' == "s5"),
|
||||
writerIgnoreNotes = False,
|
||||
writerIncremental = incremental,
|
||||
writerNumberSections = numberSections,
|
||||
writerIncludeBefore = includeBefore,
|
||||
writerIncludeAfter = includeAfter,
|
||||
writerStrictMarkdown = strict,
|
||||
writerReferenceLinks = referenceLinks,
|
||||
writerWrapText = wrap,
|
||||
writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
|
||||
lhsExtension [outputFile],
|
||||
writerEmailObfuscation = if strict
|
||||
then ReferenceObfuscation
|
||||
else obfuscationMethod }
|
||||
|
||||
if isNonTextOutput writerName' && outputFile == "-"
|
||||
then do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
|
||||
|
|
8
README
8
README
|
@ -383,6 +383,14 @@ For further documentation, see the `pandoc(1)` man page.
|
|||
are omitted. URIs in links and images are also checked against a
|
||||
whitelist of URI schemes.
|
||||
|
||||
`--email-obfuscation`*=none|javascript|references*
|
||||
: specifies a method for obfuscating `mailto:` links in HTML documents.
|
||||
*none* leaves `mailto:` links as they are. *javascript* obfuscates
|
||||
them using javascript. *references* obfuscates them by printing their
|
||||
letters as decimal or hexadecimal character references. If `--strict`
|
||||
is specified, *references* is used regardless of the presence
|
||||
of this option.
|
||||
|
||||
`--dump-args`
|
||||
: is intended to make it easier to create wrapper scripts that use
|
||||
Pandoc. It causes Pandoc to dump information about the arguments
|
||||
|
|
|
@ -99,6 +99,7 @@ module Text.Pandoc.Shared (
|
|||
isHeaderBlock,
|
||||
-- * Writer options
|
||||
HTMLMathMethod (..),
|
||||
ObfuscationMethod (..),
|
||||
WriterOptions (..),
|
||||
defaultWriterOptions,
|
||||
-- * File handling
|
||||
|
@ -889,45 +890,53 @@ data HTMLMathMethod = PlainMath
|
|||
| MimeTeX String -- url of mimetex.cgi
|
||||
deriving (Show, Read, Eq)
|
||||
|
||||
-- | Methods for obfuscating email addresses in HTML.
|
||||
data ObfuscationMethod = NoObfuscation
|
||||
| ReferenceObfuscation
|
||||
| JavascriptObfuscation
|
||||
deriving (Show, Read, Eq)
|
||||
|
||||
-- | Options for writers
|
||||
data WriterOptions = WriterOptions
|
||||
{ writerStandalone :: Bool -- ^ Include header and footer
|
||||
, writerHeader :: String -- ^ Header for the document
|
||||
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
|
||||
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
|
||||
, writerTableOfContents :: Bool -- ^ Include table of contents
|
||||
, writerS5 :: Bool -- ^ We're writing S5
|
||||
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
|
||||
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
|
||||
, writerIncremental :: Bool -- ^ Incremental S5 lists
|
||||
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
|
||||
, writerIncludeBefore :: String -- ^ String to include before the body
|
||||
, writerIncludeAfter :: String -- ^ String to include after the body
|
||||
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
|
||||
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
|
||||
, writerWrapText :: Bool -- ^ Wrap text to line length
|
||||
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
|
||||
{ writerStandalone :: Bool -- ^ Include header and footer
|
||||
, writerHeader :: String -- ^ Header for the document
|
||||
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
|
||||
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
|
||||
, writerTableOfContents :: Bool -- ^ Include table of contents
|
||||
, writerS5 :: Bool -- ^ We're writing S5
|
||||
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
|
||||
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
|
||||
, writerIncremental :: Bool -- ^ Incremental S5 lists
|
||||
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
|
||||
, writerIncludeBefore :: String -- ^ String to include before the body
|
||||
, writerIncludeAfter :: String -- ^ String to include after the body
|
||||
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
|
||||
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
|
||||
, writerWrapText :: Bool -- ^ Wrap text to line length
|
||||
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
|
||||
, writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
|
||||
} deriving Show
|
||||
|
||||
-- | Default writer options.
|
||||
defaultWriterOptions :: WriterOptions
|
||||
defaultWriterOptions =
|
||||
WriterOptions { writerStandalone = False
|
||||
, writerHeader = ""
|
||||
, writerTitlePrefix = ""
|
||||
, writerTabStop = 4
|
||||
, writerTableOfContents = False
|
||||
, writerS5 = False
|
||||
, writerHTMLMathMethod = PlainMath
|
||||
, writerIgnoreNotes = False
|
||||
, writerIncremental = False
|
||||
, writerNumberSections = False
|
||||
, writerIncludeBefore = ""
|
||||
, writerIncludeAfter = ""
|
||||
, writerStrictMarkdown = False
|
||||
, writerReferenceLinks = False
|
||||
, writerWrapText = True
|
||||
, writerLiterateHaskell = False
|
||||
WriterOptions { writerStandalone = False
|
||||
, writerHeader = ""
|
||||
, writerTitlePrefix = ""
|
||||
, writerTabStop = 4
|
||||
, writerTableOfContents = False
|
||||
, writerS5 = False
|
||||
, writerHTMLMathMethod = PlainMath
|
||||
, writerIgnoreNotes = False
|
||||
, writerIncremental = False
|
||||
, writerNumberSections = False
|
||||
, writerIncludeBefore = ""
|
||||
, writerIncludeAfter = ""
|
||||
, writerStrictMarkdown = False
|
||||
, writerReferenceLinks = False
|
||||
, writerWrapText = True
|
||||
, writerLiterateHaskell = False
|
||||
, writerEmailObfuscation = JavascriptObfuscation
|
||||
}
|
||||
|
||||
--
|
||||
|
|
|
@ -193,10 +193,13 @@ parseMailto ('m':'a':'i':'l':'t':'o':':':addr) =
|
|||
in Just (name', domain)
|
||||
parseMailto _ = Nothing
|
||||
|
||||
-- | Obfuscate a "mailto:" link using Javascript.
|
||||
-- | Obfuscate a "mailto:" link.
|
||||
obfuscateLink :: WriterOptions -> String -> String -> Html
|
||||
obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
|
||||
anchor ! [href s] << txt
|
||||
obfuscateLink opts txt s =
|
||||
let s' = map toLower s
|
||||
let meth = writerEmailObfuscation opts
|
||||
s' = map toLower s
|
||||
in case parseMailto s' of
|
||||
(Just (name', domain)) ->
|
||||
let domain' = substitute "." " dot " domain
|
||||
|
@ -206,17 +209,20 @@ obfuscateLink opts txt s =
|
|||
then ("'<code>'+e+'</code>'", name' ++ " at " ++ domain')
|
||||
else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++
|
||||
domain' ++ ")")
|
||||
in if writerStrictMarkdown opts
|
||||
then -- need to use primHtml or &'s are escaped to & in URL
|
||||
in case meth of
|
||||
ReferenceObfuscation ->
|
||||
-- need to use primHtml or &'s are escaped to & in URL
|
||||
primHtml $ "<a href=\"" ++ (obfuscateString s')
|
||||
++ "\">" ++ (obfuscateString txt) ++ "</a>"
|
||||
else (script ! [thetype "text/javascript"] $
|
||||
JavascriptObfuscation ->
|
||||
(script ! [thetype "text/javascript"] $
|
||||
primHtml ("\n<!--\nh='" ++
|
||||
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
|
||||
obfuscateString name' ++ "';e=n+a+h;\n" ++
|
||||
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
|
||||
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
|
||||
noscript (primHtml $ obfuscateString altText)
|
||||
_ -> error $ "Unknown obfuscation method: " ++ show meth
|
||||
_ -> anchor ! [href s] $ primHtml txt -- malformed email
|
||||
|
||||
-- | Obfuscate character as entity.
|
||||
|
|
|
@ -146,6 +146,14 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`.
|
|||
are omitted. URIs in links and images are also checked against a
|
||||
whitelist of URI schemes.
|
||||
|
||||
\--email-obfuscation=*none|javascript|references*
|
||||
: Specify a method for obfuscating `mailto:` links in HTML documents.
|
||||
*none* leaves `mailto:` links as they are. *javascript* obfuscates
|
||||
them using javascript. *references* obfuscates them by printing their
|
||||
letters as decimal or hexadecimal character references.
|
||||
If `--strict` is specified, *references* is used regardless of the
|
||||
presence of this option.
|
||||
|
||||
\--toc, \--table-of-contents
|
||||
: Include an automatically generated table of contents (HTML, markdown,
|
||||
RTF) or an instruction to create one (LaTeX, reStructuredText).
|
||||
|
|
Loading…
Add table
Reference in a new issue