Added security measures to html2x.pl.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1018 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-09-15 03:15:27 +00:00
parent b5819b8ed6
commit bf100f8276

View file

@ -1,34 +1,38 @@
#!/usr/bin/env perl
use strict;
use CGI qw/:standard/;
use CGI::Carp 'fatalsToBrowser';
$CGI::POST_MAX=1024 * 100; # max 100K posts
$CGI::DISABLE_UPLOADS = 1; # no uploads
if (param('url') && param('format')) {
$options = '--standalone --reference-links';
$url = param('url');
$format = param('format') || 'markdown';
if ($format =~ '^markdown$') {
$options .= ' --strict';
}
if ($format =~ '^markdown\+$') {
$format = 'markdown';
}
$output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`;
if ($format =~ "rtf") {
$type = "application/rtf"
} else {
$type = "text/plain"
};
print header(-charset=>"utf8",-type=>"$type"),
$output;
} else {
print start_html(-title=>"html2x"),
h1("Usage"),
p("You have tried to call html2x.pl without the proper parameters."),
p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."),
end_html();
param('url') && param('format') or die "Missing url and/or format parameters.\n";
my $options = '-r html --standalone --reference-links';
my $url = param('url');
my $format = param('format') || 'markdown';
if ($format =~ /^markdown$/) {
$options .= ' --strict';
}
if ($format =~ /^markdown\+$/) {
$format = 'markdown';
}
# Validate URL and format
unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) {
die "Illegal URL: $url\n" ;
}
unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) {
die "Illegal format: $format\n";
}
my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`;
if ($output =~ /^\s*$/) {
print start_html,
h1("No output"),
p("Either $url could not be retrieved, or its HTML was too malformed to parse."),
end_html;
exit 0;
}
print header(-charset=>"utf8",-type=>"text/plain"),
$output;