From bf100f82769b194336ef4b92f5a9803f262f0d8b Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 15 Sep 2007 03:15:27 +0000 Subject: [PATCH] Added security measures to html2x.pl. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1018 788f1e2b-df1e-0410-8736-df70ead52e1b --- web/html2x.pl | 52 +++++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/web/html2x.pl b/web/html2x.pl index 98c23dccc..a034f0e58 100755 --- a/web/html2x.pl +++ b/web/html2x.pl @@ -1,34 +1,38 @@ #!/usr/bin/env perl +use strict; use CGI qw/:standard/; use CGI::Carp 'fatalsToBrowser'; $CGI::POST_MAX=1024 * 100; # max 100K posts $CGI::DISABLE_UPLOADS = 1; # no uploads -if (param('url') && param('format')) { - $options = '--standalone --reference-links'; - $url = param('url'); - $format = param('format') || 'markdown'; - if ($format =~ '^markdown$') { - $options .= ' --strict'; - } - if ($format =~ '^markdown\+$') { - $format = 'markdown'; - } - $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`; - if ($format =~ "rtf") { - $type = "application/rtf" - } else { - $type = "text/plain" - }; - print header(-charset=>"utf8",-type=>"$type"), - $output; -} else { - print start_html(-title=>"html2x"), - h1("Usage"), - p("You have tried to call html2x.pl without the proper parameters."), - p("Please use this form."), - end_html(); +param('url') && param('format') or die "Missing url and/or format parameters.\n"; + +my $options = '-r html --standalone --reference-links'; +my $url = param('url'); +my $format = param('format') || 'markdown'; +if ($format =~ /^markdown$/) { + $options .= ' --strict'; +} +if ($format =~ /^markdown\+$/) { + $format = 'markdown'; } +# Validate URL and format +unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) { + die "Illegal URL: $url\n" ; +} +unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) { + die "Illegal format: $format\n"; +} +my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`; +if ($output =~ /^\s*$/) { + print start_html, + h1("No output"), + p("Either $url could not be retrieved, or its HTML was too malformed to parse."), + end_html; + exit 0; +} +print header(-charset=>"utf8",-type=>"text/plain"), + $output;