Added security measures to html2x.pl.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1018 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
b5819b8ed6
commit
bf100f8276
1 changed files with 28 additions and 24 deletions
|
@ -1,34 +1,38 @@
|
|||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
use CGI qw/:standard/;
|
||||
use CGI::Carp 'fatalsToBrowser';
|
||||
|
||||
$CGI::POST_MAX=1024 * 100; # max 100K posts
|
||||
$CGI::DISABLE_UPLOADS = 1; # no uploads
|
||||
|
||||
if (param('url') && param('format')) {
|
||||
$options = '--standalone --reference-links';
|
||||
$url = param('url');
|
||||
$format = param('format') || 'markdown';
|
||||
if ($format =~ '^markdown$') {
|
||||
param('url') && param('format') or die "Missing url and/or format parameters.\n";
|
||||
|
||||
my $options = '-r html --standalone --reference-links';
|
||||
my $url = param('url');
|
||||
my $format = param('format') || 'markdown';
|
||||
if ($format =~ /^markdown$/) {
|
||||
$options .= ' --strict';
|
||||
}
|
||||
if ($format =~ '^markdown\+$') {
|
||||
if ($format =~ /^markdown\+$/) {
|
||||
$format = 'markdown';
|
||||
}
|
||||
$output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`;
|
||||
if ($format =~ "rtf") {
|
||||
$type = "application/rtf"
|
||||
} else {
|
||||
$type = "text/plain"
|
||||
};
|
||||
print header(-charset=>"utf8",-type=>"$type"),
|
||||
$output;
|
||||
} else {
|
||||
print start_html(-title=>"html2x"),
|
||||
h1("Usage"),
|
||||
p("You have tried to call html2x.pl without the proper parameters."),
|
||||
p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."),
|
||||
end_html();
|
||||
|
||||
# Validate URL and format
|
||||
unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) {
|
||||
die "Illegal URL: $url\n" ;
|
||||
}
|
||||
unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) {
|
||||
die "Illegal format: $format\n";
|
||||
}
|
||||
|
||||
|
||||
my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`;
|
||||
if ($output =~ /^\s*$/) {
|
||||
print start_html,
|
||||
h1("No output"),
|
||||
p("Either $url could not be retrieved, or its HTML was too malformed to parse."),
|
||||
end_html;
|
||||
exit 0;
|
||||
}
|
||||
print header(-charset=>"utf8",-type=>"text/plain"),
|
||||
$output;
|
||||
|
|
Loading…
Reference in a new issue