Modified html2markdown. Previously html2markdown piped all input
through html tidy before passing it to pandoc. This causes problems on certain sites (e.g. daringfireball.com/markdown) which have well-formed xhtml that causes tidy to choke. Solution is to try pandoc on the original HTML, and run it through tidy only if that fails. This means that a temp file is now always used, even when input comes from a local file or standard input. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1039 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
5f64258a4e
commit
a1ad3b4e5f
1 changed files with 15 additions and 10 deletions
|
@ -104,11 +104,11 @@ if [ -n "$argument" ] && ! [ -f "$argument" ]; then
|
|||
inurl="$argument"
|
||||
fi
|
||||
|
||||
### tempdir.sh
|
||||
|
||||
if [ -n "$inurl" ]; then
|
||||
err "Attempting to fetch file from '$inurl'..."
|
||||
|
||||
### tempdir.sh
|
||||
|
||||
grabber_out=$THIS_TEMPDIR/grabber.out
|
||||
grabber_log=$THIS_TEMPDIR/grabber.log
|
||||
if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
|
||||
|
@ -144,14 +144,19 @@ else # assume UTF-8
|
|||
alias to_utf8='cat'
|
||||
fi
|
||||
|
||||
htmlinput=$THIS_TEMPDIR/htmlinput
|
||||
|
||||
if [ -z "$argument" ]; then
|
||||
tidy -asxhtml -utf8 2>/dev/null | pandoc --ignore-args -r html -w markdown "$@"
|
||||
to_utf8 > $htmlinput # read from STDIN
|
||||
elif [ -f "$argument" ]; then
|
||||
to_utf8 "$argument" > $htmlinput # read from file
|
||||
else
|
||||
if [ -f "$argument" ]; then
|
||||
to_utf8 "$argument" |
|
||||
tidy -asxhtml -utf8 2>/dev/null | pandoc --ignore-args -r html -w markdown "$@"
|
||||
else
|
||||
err "File '$argument' not found."
|
||||
exit 1
|
||||
fi
|
||||
err "File '$argument' not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then
|
||||
err "Failed to parse HTML. Trying again with tidy..."
|
||||
tidy -q -asxhtml -utf8 $htmlinput | \
|
||||
pandoc --ignore-args -r html -w markdown "$@"
|
||||
fi
|
||||
|
|
Loading…
Add table
Reference in a new issue