Added security measures to html2x.pl.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1018 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
b5819b8ed6
commit
bf100f8276
1 changed files with 28 additions and 24 deletions
|
@ -1,34 +1,38 @@
|
||||||
#!/usr/bin/env perl
|
#!/usr/bin/env perl
|
||||||
|
use strict;
|
||||||
use CGI qw/:standard/;
|
use CGI qw/:standard/;
|
||||||
use CGI::Carp 'fatalsToBrowser';
|
use CGI::Carp 'fatalsToBrowser';
|
||||||
|
|
||||||
$CGI::POST_MAX=1024 * 100; # max 100K posts
|
$CGI::POST_MAX=1024 * 100; # max 100K posts
|
||||||
$CGI::DISABLE_UPLOADS = 1; # no uploads
|
$CGI::DISABLE_UPLOADS = 1; # no uploads
|
||||||
|
|
||||||
if (param('url') && param('format')) {
|
param('url') && param('format') or die "Missing url and/or format parameters.\n";
|
||||||
$options = '--standalone --reference-links';
|
|
||||||
$url = param('url');
|
my $options = '-r html --standalone --reference-links';
|
||||||
$format = param('format') || 'markdown';
|
my $url = param('url');
|
||||||
if ($format =~ '^markdown$') {
|
my $format = param('format') || 'markdown';
|
||||||
$options .= ' --strict';
|
if ($format =~ /^markdown$/) {
|
||||||
}
|
$options .= ' --strict';
|
||||||
if ($format =~ '^markdown\+$') {
|
}
|
||||||
$format = 'markdown';
|
if ($format =~ /^markdown\+$/) {
|
||||||
}
|
$format = 'markdown';
|
||||||
$output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`;
|
|
||||||
if ($format =~ "rtf") {
|
|
||||||
$type = "application/rtf"
|
|
||||||
} else {
|
|
||||||
$type = "text/plain"
|
|
||||||
};
|
|
||||||
print header(-charset=>"utf8",-type=>"$type"),
|
|
||||||
$output;
|
|
||||||
} else {
|
|
||||||
print start_html(-title=>"html2x"),
|
|
||||||
h1("Usage"),
|
|
||||||
p("You have tried to call html2x.pl without the proper parameters."),
|
|
||||||
p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."),
|
|
||||||
end_html();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Validate URL and format
|
||||||
|
unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) {
|
||||||
|
die "Illegal URL: $url\n" ;
|
||||||
|
}
|
||||||
|
unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) {
|
||||||
|
die "Illegal format: $format\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`;
|
||||||
|
if ($output =~ /^\s*$/) {
|
||||||
|
print start_html,
|
||||||
|
h1("No output"),
|
||||||
|
p("Either $url could not be retrieved, or its HTML was too malformed to parse."),
|
||||||
|
end_html;
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
print header(-charset=>"utf8",-type=>"text/plain"),
|
||||||
|
$output;
|
||||||
|
|
Loading…
Reference in a new issue