Merged changes from branches/wrappers since r177.

Summary of main changes:
+ Added -o/--output and -d/--debug options to pandoc.
+ Modified pandoc to behave differently depending on the name
  of the program.  For example, if the program name is 'html2latex',
  the default reader will be html and the default writer latex. 
+ Removed most of the old wrappers, replacing them with symlinks
  to pandoc.
+ Rewrote markdown2pdf and created a new wrapper web2markdown,
  with the functionality of the old html2markdown script.  These
  new scripts exploit pandoc's -d option to avoid having to do
  complex command-line parsing.
+ Revised man pages and documentation appropriately.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@279 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2006-12-22 20:16:03 +00:00
parent cfaf0c178c
commit d829c4820a
27 changed files with 713 additions and 799 deletions

View file

@ -23,15 +23,16 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL).in)
#-------------------------------------------------------------------------------
# Install targets
#-------------------------------------------------------------------------------
WRAPPERS := web2markdown markdown2pdf
SYMLINKS := markdown2html markdown2latex markdown2s5 markdown2rst \
markdown2rtf html2markdown latex2markdown rst2markdown
PROGS := $(EXECS) $(WRAPPERS)
# Add .exe extensions if we're running Windows/Cygwin.
EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \
sed -ne 's/^cygwin.*$$/\.exe/p')
EXECS := $(addsuffix $(EXTENSION),$(EXECSBASE))
# First entry in Cabal's executable stanza is the main executable.
MAIN := $(firstword $(EXECS))
WRAPPERS := html2markdown latex2markdown markdown2html \
markdown2latex markdown2pdf
PROGS := $(EXECS) $(WRAPPERS)
DOCS := README.html README BUGS
#-------------------------------------------------------------------------------
@ -92,6 +93,12 @@ all: build-program
templates: $(SRCDIR)/templates
$(MAKE) -C $(SRCDIR)/templates
.PHONY: symlinks
cleanup_files+=$(SYMLINKS)
symlinks: $(SYMLINKS)
$(SYMLINKS): $(MAIN)
ln -sf ./$(MAIN) $@
define generate-shell-script
echo "Generating $@..."; \
awk ' \
@ -131,7 +138,7 @@ build: configure
$(BUILDCMD) build
.PHONY: build-exec
build-exec: $(PROGS)
build-exec: $(PROGS) $(SYMLINKS)
cleanup_files+=$(EXECS)
$(EXECS): build
for f in $@; do \
@ -191,8 +198,9 @@ install-exec: build-exec
fi; \
$(INSTALL_PROGRAM) $$f $(BINPATH)/; \
done
cd $(BINPATH); for f in $(SYMLINKS); do ln -sf $(MAIN) $$f; done
uninstall-exec:
-for f in $(notdir $(PROGS)); do rm -f $(BINPATH)/$$f; done
-for f in $(notdir $(PROGS) $(SYMLINKS)); do rm -f $(BINPATH)/$$f; done ;
# Program + user documents installation.
.PHONY: install-program uninstall-program
@ -277,15 +285,11 @@ osx-dmg: ../$(osx_dmg_name)
-rm -f $(osx_dmg_name)
mv $(osx_udzo_name) ../$(osx_dmg_name)
.PHONY: test test-markdown test-wrapper
.PHONY: test test-markdown
test: $(MAIN)
@cd $(TESTDIR) && perl runtests.pl -s $(PWD)/$(MAIN)
test-markdown: $(MAIN)
@cd $(TESTDIR)/MarkdownTest_1.0.3 && perl MarkdownTest.pl -s $(PWD)/$(MAIN) -tidy
cleanup_files+=testwrapper
test-wrappers: testwrapper
@echo "Running $<..."
@sh testwrapper
# Stolen and slightly improved from a GPLed Makefile. Credits to John Meacham.
src_all:=$(shell find $(SRCDIR) -type f -name '*hs' | egrep -v '^\./(_darcs|lib|test)/')

251
README
View file

@ -20,7 +20,7 @@ or output format requires only adding a reader or writer.
[reStructuredText]: http://docutils.sourceforge.net/docs/ref/rst/introduction.html
[S5]: http://meyerweb.com/eric/tools/s5/
[HTML]: http://www.w3.org/TR/html40/
[LaTeX]: http://www.latex-project.org/
[LaTeX]: http://www.latex-project.org/
[RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format
[Haskell]: http://www.haskell.org/
@ -30,9 +30,53 @@ any kind. (See COPYRIGHT for full copyright and warranty notices.)
Recai Oktaş (roktas at debian dot org) deserves credit for the build
system, the debian package, and the robust wrapper scripts.
[GPL]: http://www.gnu.org/copyleft/gpl.html
[GPL]: http://www.gnu.org/copyleft/gpl.html "GNU General Public License"
# Using Pandoc
Requirements
============
The `pandoc` program itself does not depend on any external libraries
or programs. The convenience programs `markdown2html`, `markdown2latex`,
`markdown2rst`, `markdown2rtf`, `markdown2s5`, `html2markdown`,
`latex2markdown`, and `rst2markdown` are implemented as symbolic links to
`pandoc`.
The wrapper script `web2markdown` requires
- `html2markdown` (included with Pandoc)
- a POSIX-compliant shell (installed by default on all linux and unix
systems, including Mac OS X, and in [Cygwin] for Windows),
- `HTML Tidy`
- `iconv` (for character encoding conversion). (If `iconv` is absent,
`web2markdown` will still work, but it will treat everything as UTF-8.)
[Cygwin]: http://www.cygwin.com/
[HTML Tidy]: http://tidy.sourceforge.net/
[`iconv`]: http://www.gnu.org/software/libiconv/
The wrapper script `markdown2pdf` requires
- `markdown2latex` (included with Pandoc)
- a POSIX-compliant shell
- `pdflatex`, which should be part of any [LaTeX] distribution
- the [unicode] and [fancyvrb] LaTeX packages, which are included
in many LaTeX distributions. The [unicode] package allows LaTeX to
process UTF-8 characters. [fancyvrb] allows code blocks and verbatim
text to be used within footnotes. If your installation of LaTeX
does not include these packages, you will get an error (complaining
about missing `ucs.sty` or `fancyvrb.sty`) when you try to compile
a LaTeX file produced by Pandoc, or when you use the `markdown2pdf`
script (described below). If this happens, install the [unicode] and
[fancyvrb] packages package from [CTAN]. (Get the zip file from CTAN
and unpack it into `~/texmf/tex/latex/`. You may also need to run
`mktexlsr` or `texhash` before the files can be found by TeX.)
[CTAN]: http://www.ctan.org "Comprehensive TeX Archive Network"
[unicode]: http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/
[fancyvrb]: http://www.ctan.org/tex-archive/macros/latex/contrib/fancyvrb/
Using Pandoc
============
If you run `pandoc` without arguments, it will accept input from
STDIN. If you run it with file names as arguments, it will take input
@ -66,10 +110,14 @@ a subset of reStructuredText syntax. For example, it doesn't handle
tables, definition lists, option lists, or footnotes. It handles only the
constructs expressible in unextended markdown. But for simple documents
it should be adequate. The `latex` and `html` readers are also limited
in what they can do.
in what they can do. Because the `html` reader is picky about the HTML
it parses, it is recommended that you pipe HTML through [HTML Tidy] before
sending it to `pandoc`, or use the `web2markdown` script described below.
`pandoc` writes its output to STDOUT. If you want to write to a file,
use redirection:
By default, `pandoc` writes its output to STDOUT. If you want to
write to a file, use the `-o` option or shell redirection:
pandoc -o hello.html hello.txt
pandoc hello.txt > hello.html
@ -77,13 +125,14 @@ Note that you can specify multiple input files on the command line.
`pandoc` will concatenate them all (with blank lines between them)
before parsing:
pandoc -s chapter1.txt chapter2.txt chapter3.txt references.txt > book.html
pandoc -s chapter1.txt chapter2.txt references.txt > book.html
(The `-s` option here tells `pandoc` to produce a standalone HTML file,
with a proper header, rather than a fragment. For more details on this
and many other command-line options, see below.)
# Character encodings
Character encodings
-------------------
Unfortunately, due to limitations in GHC, `pandoc` does not automatically
detect the system's local character encoding. Hence, all input and
@ -97,92 +146,65 @@ will convert `source.txt` from the local encoding to UTF-8, then
convert it to HTML, then convert back to the local encoding,
putting the output in `output.html`.
[`iconv`]: http://www.gnu.org/software/libiconv/
The shell scripts (described below) automatically convert the input
from the local encoding to UTF-8 before running them through `pandoc`,
then convert the output back to the local encoding.
## LaTeX and UTF-8
Convenience programs and wrapper scripts
========================================
LaTeX sources produced by Pandoc use `ucs.sty`, which is included in many
LaTeX distributions. This allows LaTeX to process UTF-8 characters.
If your installation of LaTeX does not include `ucs.sty`, you will get an
error when you try to compile a LaTeX file produced by Pandoc, or when
you use the `markdown2pdf` script (described below). If this happens,
install the [unicode] package from [CTAN]. (Get the `unicode.zip`
file from CTAN, unpack it, and copy the whole `unicode` directory into
`~/texmf/tex/latex/`. You may also need to run `mktexlsr` or `texhash`
before the files can be found by TeX.)
For convenience, eight variant programs are included with Pandoc:
`markdown2html` (which is equivalent to `pandoc -w html`),
`markdown2latex` (equivalent to `pandoc -w latex`), `markdown2rst`
(equivalent to `pandoc -w rst`), `markdown2rtf` (equivalent to
`pandoc -w rtf`), `markdown2s5` (equivalent to `pandoc -w s5`),
`html2markdown` (equivalent to `pandoc -r html -w markdown`),
`latex2markdown` (equivalent to `pandoc -r latex -w markdown`), and
`rst2markdown` (equivalent to `pandoc -r rst -w markdown`). These
programs take an appropriately restricted subset of `pandoc`'s
options. (Run them with the `-h` flag for a full list of allowed
options.)
[CTAN]: http://www.ctan.org
[unicode]: http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/
Like `pandoc`, all of these programs produce fragments by default.
If you want to produce a standalone file, complete with a header
and footer appropriate to the format, use the `-s` option:
# The shell scripts
markdown2latex -s sample.txt > sample.tex
Five shell scripts have been included that make it easy to run
`pandoc` without worrying about character encodings, and without
remembering all the command-line options:
Two shell scripts have also been included:
- `markdown2html` converts markdown-formatted text to HTML
- `markdown2latex` converts markdown-formatted text to LaTeX
- `markdown2pdf` produces a PDF file from markdown-formatted
text, using `pdflatex`.
- `html2markdown` converts HTML to markdown-formatted text
- `latex2markdown` converts LaTeX to markdown-formatted text
1. `markdown2pdf` produces a PDF file from markdown-formatted
text, using `markdown2latex` and `pdflatex`. The default
behavior of `markdown2pdf` is to create a file with the same
base name as the first argument and the extension `pdf`; thus,
for example,
All of the scripts use `iconv` (if available) to convert to and from
the local character encoding. All of the scripts presuppose that
`pandoc` is in the path, and some have additional requirements. (For
example, `html2markdown` uses `tidy`, and `markdown2pdf` uses
`pdflatex`.)
markdown2pdf sample.txt endnotes.txt
When no arguments are specified, text will be read from standard
input. Arguments specify input files (limited to one in the case of
`latex2markdown` and `html2markdown`; the other scripts accept any number
of arguments). `html2markdown` may take a URL as argument instead of
a filename; in this case, `curl`, `wget`, or an available text-based
browser will be used to fetch the contents of the URL. (The `-n` option
inhibits this behavior; the `-g` option allows the user to specify a
custom command that will be used to fetch from a URL.)
will produce `sample.pdf`. (If `sample.pdf` exists already,
it will be backed up before being overwritten.) An output file
name can be specified explicitly using the `-o` option:
With the exception of `markdown2pdf`, the scripts write to standard output.
Output can be sent to a file using shell output redirection:
markdown2pdf -o "My Book.pdf" chap1.txt chap2.txt chap3.txt
latex2markdown sample.tex > sample.txt
If no input file is specified, input will be taken from STDIN.
The default behavior of `markdown2pdf` is to create a file with the same
base name as the first argument and the extension `pdf`; thus, for example,
2. `web2markdown` grabs a web page from a file or URL and converts
it to markdown-formatted text, using `tidy` and `html2markdown`.
Unless input is from STDIN, an attempt is made to determine the
character encoding of the page from the "Content-type" meta tag.
If this is not present, UTF-8 is assumed. Alternatively, a character
encoding may be specified explicitly using the `-e` option.
markdown2pdf sample.txt endnotes.txt
`web2markdown` searches for an available program (`wget`, `curl`,
or a text-mode browser) to fetch the contents of a URL.
Optionally, the `-g` command may be used to specify the command
to be used:
will produce `sample.pdf`. (If `sample.pdf` exists already, it will be
backed up before being overwritten.) An output file name can be specified
explicitly using the `-o` option:
web2markdown -g 'wget --user=foo --password=bar' mysite.com
markdown2pdf -o "My Book.pdf" chap1.txt chap2.txt chap3.txt
Options specific to the scripts, like `-o`, `-g`, and `-n`, must
be specified *before* any command-line arguments (file names or URLs).
Any options specified *after* the command-line arguments will be
passed directly to `pandoc`. For example,
markdown2html tusks.txt -S -T Elephants
will convert `tusks.txt` to `tusks.html` using smart quotes, ellipses,
and dashes, with "Elephants" as the page title prefix. (For a
complete list of `pandoc` options, see below.) When there are no
command-line arguments (because input is from STDIN), `pandoc`
options must be preceded by ` -- `:
cat tusks.txt | markdown2html -- -S -T Elephants
The ` -- ` separator may optionally be used when there are command-line
arguments:
markdown2html -- tusks.txt -S -T Elephants
# Command-line options
Command-line options
====================
Various command-line options can be used to customize the output.
For a complete list, type
@ -207,9 +229,11 @@ specified.)
complete with appropriate document headers. By default, `pandoc`
produces a fragment.
`--custom-header` can be used to specify a custom document header. To
see the headers used by default, use the `-D` option: for example,
`pandoc -D html` prints the default HTML header.
`-o` or `--output-file` can be used to specify an output file.
`-C` or `--custom-header` can be used to specify a custom document
header. To see the headers used by default, use the `-D` option:
for example, `pandoc -D html` prints the default HTML header.
`-c` or `--css` allows the user to specify a custom stylesheet that
will be linked to in HTML and S5 output.
@ -253,15 +277,38 @@ is for lists to be displayed all at once.
`-N` or `--number-sections` causes sections to be numbered in LaTeX
output. By default, sections are not numbered.
# Pandoc's markdown vs. standard markdown
`-d` or `--debug` causes a debugging message to be written to STDERR.
The format of the message is as follows:
OUTPUT=foo
INPUT=bar
INPUT=Foo Baz
Here `OUTPUT=` is followed by the name of the output file specified
using `-o`, if any. If no output file was specified, `OUTPUT=`
will appear with nothing following it. Lines beginning `INPUT=`
specify input files. If there are no input files, no `INPUT=` lines
will be printed. The `-d` option forces output to be written to
STDOUT, even if an output file was specified using the `-o` option.
(This option is provided to make it easier to write wrappers for
`pandoc`.)
`-v` or `--version` prints the version number to STDERR.
`-h` or `--help` prints a usage message to STDERR.
Pandoc's markdown vs. standard markdown
=======================================
In parsing markdown, Pandoc departs from and extends [standard markdown]
in a few respects. (To run Pandoc on the official
markdown test suite, type `make test-markdown`.)
[standard markdown]: http://daringfireball.net/projects/markdown/syntax
"Markdown syntax description"
## Section Headings
Section Headings
----------------
Pandoc creates an invisible anchor in front of every HTML section
heading. The ID of this anchor is derived from the section heading
@ -281,7 +328,8 @@ example, just insert:
[Back to Aristotle](#Aristotle's_De_Anima)
## Lists
Lists
-----
Pandoc behaves differently from standard markdown on some "edge
cases" involving lists. Consider this source:
@ -332,7 +380,8 @@ the example above:
B) Fie
C) Third
## Literal quotes in titles
Literal quotes in titles
------------------------
Standard markdown allows unescaped literal quotes in titles, as
in
@ -343,7 +392,8 @@ Pandoc requires all quotes within titles to be escaped:
[foo]: "bar \"embedded\" baz"
## Reference links
Reference links
---------------
Pandoc allows implicit reference links in either of two styles:
@ -357,7 +407,8 @@ will appear as regular bracketed text. Note: even `[link][]` will
appear as `[link]` if there's no reference for `link`. If you want
`[link][]`, use a backslash escape: `\[link]\[]`.
## Footnotes
Footnotes
---------
Pandoc's markdown allows footnotes, using the following syntax:
@ -394,7 +445,8 @@ they cannot contain multiple paragraphs). The syntax is as follows:
Inline and regular footnotes may be mixed freely.
## Embedded HTML
Embedded HTML
-------------
Pandoc treats embedded HTML in markdown a bit differently than
Markdown 1.0. While Markdown 1.0 leaves HTML blocks exactly as they
@ -427,7 +479,8 @@ markdown with HTML block elements. For example, one can surround
a block of markdown text with `<div>` tags without preventing it
from being interpreted as markdown.
## Title blocks
Title blocks
------------
If the file begins with a title block
@ -460,7 +513,8 @@ If a title prefix is specified with `-T` and no title block appears
in the document, the title prefix will be used by itself as the
HTML title.
## Box-style blockquotes
Box-style blockquotes
---------------------
Pandoc supports emacs-style boxquote block quotes, in addition to
standard markdown (email-style) boxquotes:
@ -469,7 +523,8 @@ standard markdown (email-style) boxquotes:
| They look like this.
`----
## Inline LaTeX
Inline LaTeX
------------
Anything between two $ characters will be parsed as LaTeX math. The
opening $ must have a character immediately to its right, while the
@ -501,7 +556,8 @@ You can also use LaTeX environments. For example,
Note, however, that material between the begin and end tags will
be interpreted as raw LaTeX, not as markdown.
## Custom headers
Custom headers
--------------
When run with the "standalone" option (`-s`), `pandoc` creates a
standalone file, complete with an appropriate header. To see the
@ -516,13 +572,14 @@ it and specify it on the command line as follows:
pandoc --header=MyHeaderFile
# Producing S5 with Pandoc
Producing S5 with Pandoc
========================
Producing an [S5] slide show with Pandoc is easy. A title page is
constructed automatically from the document's title block (see above).
Each section (with a level-one header) produces a single slide. (Note
that if the section is too big, the slide will not fit on the page; S5
is not smart enough to produce multiple pages.)
Producing an [S5] web-based slide show with Pandoc is easy. A title
page is constructed automatically from the document's title block (see
above). Each section (with a level-one header) produces a single slide.
(Note that if the section is too big, the slide will not fit on the page;
S5 is not smart enough to produce multiple pages.)
Here's the markdown source for a simple slide show, `eating.txt`:

2
debian/changelog vendored
View file

@ -14,6 +14,8 @@ pandoc (0.22) unstable; urgency=low
* Refactored template processing (fillTemplates.pl).
* Modified wrapper scripts to make them more robust.
* Modified wrapper scripts to make them more robust and portable.
To avoid code duplication and ensure consistency, wrappers are
generated via a templating system from templates in src/wrappers.

View file

@ -1,60 +1 @@
.TH HTML2MARKDOWN 1 "November 21, 2006" Pandoc "User Manuals"
.SH NAME
html2markdown \- converts HTML to markdown-formatted text
.SH SYNOPSIS
\fBhtml2markdown\fR [\fIoptions\fR] [\fIinput\-file\fR or \fIURL\fR]
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
.SH DESCRIPTION
\fBhtml2markdown\fR converts \fIinput\-file\fR or \fIURL\fR (or text
from STDIN) from HTML to markdown\-formatted plain text.
If a URL is specified, \fBhtml2markdown\fR uses an available program
(e.g. wget, w3m, lynx or curl) to fetch its contents. Output is sent
to STDOUT.
.PP
\fBhtml2markdown\fR is a wrapper for \fBpandoc\fR.
.SH OPTIONS
.TP
.B \-h
Show usage message.
.TP
.B \-e \fIencoding\fR
Assume the character encoding \fIencoding\fR in reading the HTML.
(Note: \fIencoding\fR will be passed to \fBiconv\fR; a list of
available encodings may be obtained using `\fBiconv \-l\fR'.)
If the \fB\-e\fR option is not specified, the encoding will be
determined as follows: If input is from STDIN, the local encoding
will be assumed. Otherwise, \fBhtml2markdown\fR will try to
extract the character encoding from the "Content-type" meta tag.
If no character encoding is specified in this way, UTF-8 will be
assumed for a URL argument, and the local encoding will be assumed
for a file argument.
.TP
.B \-g \fIcommand\fR
Use \fIcommand\fR to fetch the contents of a URL. (By default,
\fBhtml2markdown\fR searches for an available program or text-based
browser to fetch the contents of a URL.) For example:
.IP
html2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
.TP
.B \-n
Disable automatic fetching of contents when URLs are specified as
arguments.
.TP
.I pandoc\-opts
Any options appearing after \fIinput\-file\fR or \fIURL\fR on the
command line will be passed directly to \fBpandoc\fR. If no
\fIinput-file\fR or \fIURL\fR is specified, these options must
be preceded by ` \fB\-\-\fR '. (In other cases, ` \fB\-\-\fR ' is
optional.) See \fBpandoc\fR(1) for a list of options that may be used.
Example:
.IP
html2markdown input.txt \-\- \-R
.SH "SEE ALSO"
\fBpandoc\fR(1),
\fBmarkdown2html\fR(1),
\fBmarkdown2latex\fR(1),
\fBlatex2markdown\fR(1),
\fBmarkdown2pdf\fR(1),
\fBiconv\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas
.so man1/pandoc.1

View file

@ -1,33 +1 @@
.TH LATEX2MARKDOWN 1 "November 21, 2006" Pandoc "User Manuals"
.SH NAME
latex2markdown \- converts LaTeX to markdown\-formatted text
.SH SYNOPSIS
\fBlatex2markdown\fR [\fIoptions\fR] [\fIinput\-file\fR]
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
.SH DESCRIPTION
\fBlatex2markdown\fR converts \fIinput\-file\fR
(or text from STDIN) from LaTeX to markdown\-formatted plain text.
Output is sent to STDOUT.
.PP
\fBlatex2markdown\fR is a wrapper for \fBpandoc\fR.
.SH OPTIONS
.TP
.B \-h
Show usage message.
.TP
.I pandoc\-opts
Any options appearing after \fIinput\-file\fR on the command line
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
is specified, these options must be preceded by ` \fB\-\-\fR '.
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
for a list of options that may be used. Example:
.IP
latex2markdown input.txt \-\- \-R
.SH "SEE ALSO"
\fBpandoc\fR(1),
\fBmarkdown2html\fR(1),
\fBhtml2markdown\fR(1),
\fBmarkdown2latex\fR(1),
\fBmarkdown2pdf\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas
.so man1/pandoc.1

View file

@ -1,34 +1 @@
.TH MARKDOWN2HTML 1 "November 21, 2006" Pandoc "User Manuals"
.SH NAME
markdown2html \- converts markdown\-formatted text to HTML
.SH SYNOPSIS
\fBmarkdown2html\fR [\fIoptions\fR] [\fIinput\-file\fR]...
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
.SH DESCRIPTION
\fBmarkdown2html\fR converts \fIinput\-file\fR
(or text from STDIN) from markdown\-formatted plain text to HTML.
If multiple files are specified, they will be combined to make a single
HTML document. Output is sent to STDOUT.
.PP
\fBmarkdown2html\fR is a wrapper for \fBpandoc\fR.
.SH OPTIONS
.TP
.B \-h
Show usage message.
.TP
.I pandoc\-opts
Any options appearing after \fIinput\-file\fR... on the command line
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
is specified, these options must be preceded by ` \fB\-\-\fR '.
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
for a list of options that may be used. Example:
.IP
markdown2html input.txt \-\- \-\-css=main.css \-S
.SH "SEE ALSO"
\fBpandoc\fR(1),
\fBhtml2markdown\fR(1),
\fBmarkdown2latex\fR(1),
\fBlatex2markdown\fR(1),
\fBmarkdown2pdf\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas
.so man1/pandoc.1

View file

@ -1,34 +1 @@
.TH MARKDOWN2LATEX 1 "November 21, 2006" Pandoc "User Manuals"
.SH NAME
markdown2latex \- converts markdown-formatted text to LaTeX
.SH SYNOPSIS
\fBmarkdown2latex\fR [\fIoptions\fR] [\fIinput\-file\fR]...
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
.SH DESCRIPTION
\fBmarkdown2latex\fR converts \fIinput\-file\fR (or text from STDIN)
from markdown\-formatted plain text to LaTeX. If multiple files are
specified, they will be combined to make a single LaTeX document.
Output is sent to STDOUT.
.PP
\fBmarkdown2latex\fR is a wrapper for \fBpandoc\fR.
.SH OPTIONS
.TP
.B \-h
Show usage message.
.TP
.I pandoc\-opts
Any options appearing after \fIinput\-file\fR... on the command line
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
is specified, these options must be preceded by ` \fB\-\-\fR '.
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
for a list of options that may be used. Example:
.IP
markdown2latex input.txt \-\- \-\-custom\-header=letterhead.tex
.SH "SEE ALSO"
\fBpandoc\fR(1),
\fBmarkdown2html\fR(1),
\fBhtml2markdown\fR(1),
\fBlatex2markdown\fR(1),
\fBmarkdown2pdf\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas
.so man1/pandoc.1

View file

@ -1,43 +1,71 @@
.TH MARKDOWN2PDF 1 "November 21, 2006" Pandoc "User Manuals"
.TH MARKDOWN2PDF 1 "December 15, 2006" Pandoc "User Manuals"
.SH NAME
markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex
.SH SYNOPSIS
\fBmarkdown2pdf\fR [\fIoptions\fR] [\fB\-o\fR \fIoutput-file\fR]
[\fIinput-file\fR]... [\fB\-\-\fR] [\fIpandoc\-opts\fR]
\fBmarkdown2pdf\fR [\fIoptions\fR] [\fIinput-file\fR]...
.SH DESCRIPTION
\fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from STDIN) from
markdown\-formatted plain text to PDF, using \fBpdflatex\fR. If no output
filename is specified, the name of the output file is derived from the
input file; thus, for example, if the input file is \fIhello.txt\fR,
the output file will be \fIhello.pdf\fR. If the input is read from STDIN
and no output filename is specified, the output file will be named
\fIstdin.pdf\fR. If multiple input files are specified, they will be
concatenated before conversion, and the name of the output file will be
derived from the first input file.
\fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from standard
input) from markdown\-formatted plain text to PDF, using \fBpdflatex\fR.
If no output filename is specified, the name of the output file is
derived from the input file; thus, for example, if the input file
is \fIhello.txt\fR, the output file will be \fIhello.pdf\fR. If
the input is read from STDIN and no output filename is
specified, the output file will be named \fIstdin.pdf\fR. If
multiple input files are specified, they will be concatenated before
conversion, and the name of the output file will be derived from
the first input file.
.PP
\fBmarkdown2pdf\fR is a wrapper for \fBpandoc\fR.
Input is assumed to be in the UTF\-8 character encoding. If your
local character encoding is not UTF\-8, you should pipe input and
output through \fBiconv\fR:
.IP
.B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
.PP
\fBmarkdown2pdf\fR assumes that the 'unicode' package
is in latex's search path. If this package is not included in your
latex setup, it can be obtained from <http://ctan.org>.
.PP
\fBmarkdown2pdf\fR is a wrapper around \fBmarkdown2latex\fR.
.SH OPTIONS
.TP
.B \-h
.B \-o FILE, \-\-output=FILE
Write output to \fIFILE\fR.
.TP
.B \-p, \-\-preserve-tabs
Preserve tabs instead of converting them to spaces.
.TP
.B \-\-tab-stop=\fITABSTOP\fB
Specify tab stop (default is 4).
.TP
.B \-R, \-\-parse-raw
Parse untranslatable LaTeX environments as raw LaTeX,
instead of ignoring them.
.TP
.B \-N, \-\-number-sections
Number section headings in LaTeX output. (Default is not to number them.)
.TP
.B \-H \fIFILE\fB, \-\-include-in-header=\fIFILE\fB
Include (LaTeX) contents of \fIFILE\fR at the end of the header. Implies
\fB\-s\fR.
.TP
.B \-B \fIFILE\fB, \-\-include-before-body=\fIFILE\fB
Include (LaTeX) contents of \fIFILE\fR at the beginning of the document body.
.TP
.B \-A \fIFILE\fB, \-\-include-after-body=\fIFILE\fB
Include (LaTeX) contents of \fIFILE\fR at the end of the document body.
.TP
.B \-C \fIFILE\fB, \-\-custom-header=\fIFILE\fB
Use contents of \fIFILE\fR
as the LaTeX document header (overriding the default header, which can be
printed using '\fBpandoc \-D latex\fR'). Implies \fB-s\fR.
.TP
.B \-v, \-\-version
Print version.
.TP
.B \-h, \-\-help
Show usage message.
.TP
.B \-o \fIoutput-file\fR
Specify name of output (PDF) file.
.TP
.I pandoc\-opts
Any options appearing after \fIinput\-file\fR... on the command line
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
is specified, these options must be preceded by ` \fB\-\-\fR '.
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
for a list of options that may be used. Example:
.IP
markdown2pdf input.txt \-\- \-\-custom\-header=letterhead.tex
.SH "SEE ALSO"
\fBpandoc\fR(1),
\fBmarkdown2html\fR(1),
\fBhtml2markdown\fR(1),
\fBmarkdown2latex\fR(1),
\fBlatex2markdown\fR(1),
\fBpdflatex\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas

1
man/man1/markdown2rst.1 Normal file
View file

@ -0,0 +1 @@
.so man1/pandoc.1

1
man/man1/markdown2rtf.1 Normal file
View file

@ -0,0 +1 @@
.so man1/pandoc.1

1
man/man1/markdown2s5.1 Normal file
View file

@ -0,0 +1 @@
.so man1/pandoc.1

View file

@ -1,18 +1,23 @@
.TH PANDOC 1 "November 21, 2006" Pandoc "User Manuals"
.TH PANDOC 1 "December 15, 2006" Pandoc "User Manuals"
.SH NAME
pandoc \- general markup converter
pandoc, markdown2html, markdown2latex, markdown2rst, markdown2rtf,
markdown2s5, html2markdown2, latex2markdown, rst2markdown \- general
markup converter
.SH SYNOPSIS
\fBpandoc\fR [\fIoptions\fR] [\fIinput\-file\fR]...
.SH DESCRIPTION
\fIPandoc\fR converts files from one markup format to another. It can
\fBPandoc\fR converts files from one markup format to another. It can
read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and
it can write markdown, reStructuredText, HTML, LaTeX, RTF, and S5 HTML
slide shows.
.PP
If no \fIinput\-file\fR is specified, input is read from STDIN. Otherwise,
the \fIinput\-files\fR are concatenated (with a blank line between each)
and used as input. Output goes to STDOUT. If you want output to a file,
use shell redirection:
If no \fIinput\-file\fR is specified, input is read from STDIN.
Otherwise, the \fIinput\-files\fR are concatenated (with a blank
line between each) and used as input. Output goes to standard
output. If you want output to a file, use the \fB\-o\fR option or
shell redirection:
.IP
.B pandoc \-o output.html input.txt
.IP
.B pandoc input.txt > output.html
.PP
@ -25,6 +30,19 @@ formats can be specified using command\-line options. For example,
converts \fIchap1.tex\fR from LaTeX to markdown\-formatted plain text.
See below for a detailed list of command\-line options.
.PP
For convenience, eight variant programs are available:
\fBmarkdown2html\fR (same as \fBpandoc \-w html\fR),
\fBmarkdown2latex\fR (same as \fBpandoc \-w latex\fR),
\fBmarkdown2rst\fR (same as \fBpandoc \-w rst\fR),
\fBmarkdown2rtf\fR (same as \fBpandoc \-w rtf\fR),
\fBmarkdown2s5\fR (same as \fBpandoc \-w s5\fR),
\fBhtml2markdown\fR (same as \fBpandoc \-r html \-w markdown\fR),
\fBlatex2markdown\fR (same as \fBpandoc \-r latex \-w markdown\fR),
and \fBrst2markdown\fR (same as \fBpandoc \-r rst \-w markdown\fR).
These programs take an appropriately restricted subset of \fBpandoc\fR's
options. (Run them with the \fB-h\fR flag for a full list of allowed
options.)
.PP
\fIPandoc\fR uses the UTF\-8 character encoding for both input and output.
If your local character encoding is not UTF\-8, you should pipe input
and output through \fBiconv\fR:
@ -33,61 +51,58 @@ and output through \fBiconv\fR:
.SH OPTIONS
.TP
.B \-v, \-\-version
Print version.
.TP
.B \-h, \-\-help
Show usage message.
.TP
.B \-f FORMAT, \-r FORMAT, \-\-from=FORMAT, \-\-read=FORMAT
.B \-f \fIFORMAT\fB, \-r \fIFORMAT\fB, \-\-from=\fIFORMAT\fB, \-\-read=\fIFORMAT\fB
Specify input format.
.I FORMAT
can be
.I native
.B native
(native Haskell),
.I markdown
.B markdown
(markdown or plain text),
.I rst
.B rst
(reStructuredText),
.I html
.B html
(HTML),
or
.I latex
.B latex
(LaTeX).
.TP
.B \-t FORMAT, \-w FORMAT, \-\-to=FORMAT, \-\-write=FORMAT
.B \-t \fIFORMAT\fB, \-w \fIFORMAT\fB, \-\-to=\fIFORMAT\fB, \-\-write=\fIFORMAT\fB
Specify output format.
.I FORMAT
can be
.I native
.B native
(native Haskell),
.I markdown
.B markdown
(markdown or plain text),
.I rst
.B rst
(reStructuredText),
.I html
.B html
(HTML),
.I latex
.B latex
(LaTeX),
.I s5
.B s5
(S5 HTML and javascript slide show),
or
.I rtf
.B rtf
(rich text format).
.TP
.B \-s, \-\-standalone
Produce output with an appropriate header and footer (e.g. a
standalone HTML, LaTeX, or RTF file, not a fragment).
.TP
.B \-o FILE, \-\-output=FILE
Write output to \fIFILE\fR instead of STDOUT.
.TP
.B \-p, \-\-preserve-tabs
Preserve tabs instead of converting them to spaces.
.TP
.B \-\-tab-stop=TABSTOP
.B \-\-tab-stop=\fITABSTOP\fB
Specify tab stop (default is 4).
.TP
.B \-R, \-\-parse-raw
Parse untranslatable HTML codes and LaTeX environments as raw HTML or
LaTeX, instead of ignoring them.
Parse untranslatable HTML codes and LaTeX environments as raw HTML
or LaTeX, instead of ignoring them.
.TP
.B \-S, \-\-smartypants
Use smart quotes, dashes, and ellipses in HTML output.
@ -99,41 +114,50 @@ Use ASCIIMathML to display embedded LaTeX math in HTML output.
Make list items in S5 display incrementally (one by one).
.TP
.B \-N, \-\-number-sections
Number section headings in LaTeX output. (Default is not to number them.)
Number section headings in LaTeX output. (Default is not to number
them.)
.TP
.B \-c CSS, \-\-css=CSS
.B \-c \fICSS\fB, \-\-css=\fICSS\fB
Link to a CSS style sheet.
.I CSS
is the pathname of the style sheet.
.TP
.B \-H FILENAME, \-\-include-in-header=FILENAME
Include contents of \fIFILENAME\fR at the end of the header. Implies
.B \-H \fIFILE\fB, \-\-include-in-header=\fIFILE\fB
Include contents of \fIFILE\fR at the end of the header. Implies
\fB\-s\fR.
.TP
.B \-B FILENAME, \-\-include-before-body=FILENAME
Include contents of \fIFILENAME\fR at the beginning of the document body.
.B \-B \fIFILE\fB, \-\-include-before-body=\fIFILE\fB
Include contents of \fIFILE\fR at the beginning of the document
body.
.TP
.B \-A FILENAME, \-\-include-after-body=FILENAME
Include contents of \fIFILENAME\fR at the end of the document body.
.B \-A \fIFILE\fB, \-\-include-after-body=\fIFILE\fB
Include contents of \fIFILE\fR at the end of the document body.
.TP
.B \-\-custom-header=FILENAME
Use contents of \fIFILENAME\fR
as the document header (overriding the default header, which can be
printed by using the \fB\-D\fR option). Implies
\fB-s\fR.
.B \-C \fIFILE\fB, \-\-custom-header=\fIFILE\fB
Use contents of \fIFILE\fR as the document header (overriding the
default header, which can be printed by using the \fB\-D\fR option).
Implies \fB-s\fR.
.TP
.B \-D FORMAT, \-\-print-default-header=FORMAT
Print the default header for \fIFORMAT\fR
(\fIhtml, s5, latex, markdown, rst, rtf\fR).
.B \-D \fIFORMAT\fB, \-\-print-default-header=\fIFORMAT\fB
Print the default header for \fIFORMAT\fR (\fIhtml, s5, latex,
markdown, rst, rtf\fR).
.TP
.B \-T STRING, \-\-title-prefix=STRING
.B \-T \fISTRING\fB, \-\-title-prefix=\fISTRING\fB
Specify \fISTRING\fR as a prefix to the HTML window title.
.TP
.B \-d, \-\-debug
Print debugging information (names of input and output files) to
STDERR. Write output to STDOUT, even if an output file was specified
using the \fB\-o\fR option.
.TP
.B \-v, \-\-version
Print version.
.TP
.B \-h, \-\-help
Show usage message.
.SH "SEE ALSO"
\fBmarkdown2html\fR(1),
\fBhtml2markdown\fR(1),
\fBmarkdown2latex\fR(1),
\fBlatex2markdown\fR(1),
\fBweb2markdown\fR(1),
\fBmarkdown2pdf\fR(1),
\fBiconv\fR(1)

1
man/man1/rst2markdown.1 Normal file
View file

@ -0,0 +1 @@
.so man1/pandoc.1

82
man/man1/web2markdown.1 Normal file
View file

@ -0,0 +1,82 @@
.TH WEB2MARKDOWN 1 "December 15, 2006" Pandoc "User Manuals"
.SH NAME
web2markdown \- converts HTML to markdown-formatted text
.SH SYNOPSIS
\fBweb2markdown\fR [\fIoptions\fR] [\fIinput\-file\fR or \fIURL\fR]
.SH DESCRIPTION
\fBweb2markdown\fR converts \fIinput\-file\fR or \fIURL\fR (or text
from STDIN) from HTML to markdown\-formatted plain text.
If a URL is specified, \fBweb2markdown\fR uses an available program
(e.g. wget, w3m, lynx or curl) to fetch its contents. Output is sent
to STDOUT unless an output file is specified using the \fB\-o\fR
option.
.PP
\fBweb2markdown\fR uses the character encoding specified in the
"Content-type" meta tag. If this is not present, or if input comes
from STDIN, UTF-8 is assumed. A character encoding may be specified
explicitly using the \fB\-e\fR option.
.PP
\fBweb2markdown\fR is a wrapper for \fBhtml2markdown\fR.
.SH OPTIONS
.TP
.B \-s, \-\-standalone
Include title, author, and date information (if present) at the
top of markdown output.
.TP
.B \-o FILE, \-\-output=FILE
Write output to \fIFILE\fR instead of STDOUT.
.TP
.B \-p, \-\-preserve-tabs
Preserve tabs instead of converting them to spaces.
.TP
.B \-\-tab-stop=\fITABSTOP\fB
Specify tab stop (default is 4).
.TP
.B \-R, \-\-parse-raw
Parse untranslatable HTML codes as raw HTML.
.TP
.B \-H \fIFILE\fB, \-\-include-in-header=\fIFILE\fB
Include contents of \fIFILE\fR at the end of the header. Implies
\fB\-s\fR.
.TP
.B \-B \fIFILE\fB, \-\-include-before-body=\fIFILE\fB
Include contents of \fIFILE\fR at the beginning of the document body.
.TP
.B \-A \fIFILE\fB, \-\-include-after-body=\fIFILE\fB
Include contents of \fIFILE\fR at the end of the document body.
.TP
.B \-C \fIFILE\fB, \-\-custom-header=\fIFILE\fB
Use contents of \fIFILE\fR
as the document header (overriding the default header, which can be
printed using '\fBpandoc \-D markdown\fR'). Implies
\fB-s\fR.
.TP
.B \-v, \-\-version
Print version.
.TP
.B \-h, \-\-help
Show usage message.
.TP
.B \-e \fIencoding\fR
Assume the character encoding \fIencoding\fR in reading HTML.
(Note: \fIencoding\fR will be passed to \fBiconv\fR; a list of
available encodings may be obtained using `\fBiconv \-l\fR'.)
If the \fB\-e\fR option is not specified and input is not from
STDIN, \fBweb2markdown\fR will try to extract the character encoding
from the "Content-type" meta tag. If no character encoding is
specified in this way, or if input is from STDIN, UTF-8 will be
assumed.
.TP
.B \-g \fIcommand\fR
Use \fIcommand\fR to fetch the contents of a URL. (By default,
\fBweb2markdown\fR searches for an available program or text-based
browser to fetch the contents of a URL.) For example:
.IP
web2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
.SH "SEE ALSO"
\fBpandoc\fR(1),
\fBhtml2markdown\fR(1),
\fBiconv\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas

View file

@ -45,6 +45,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader,
defaultRTFHeader, defaultS5Header, defaultLaTeXHeader )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Regex ( mkRegex, splitRegex )
import System ( exitWith, getArgs, getProgName )
import System.Exit
import System.Console.GetOpt
@ -57,6 +58,9 @@ import Control.Monad ( (>>=) )
version :: String
version = "0.3"
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006 John MacFarlane\nWeb: http://sophos.berkeley.edu/macfarlane/pandoc\nThis is free software; see the source for copying conditions. There is no\nwarranty, not even for merchantability or fitness for a particular purpose."
-- | Association list of formats and readers.
readers :: [(String, ParserState -> String -> Pandoc)]
readers = [("native" , readPandoc)
@ -101,10 +105,13 @@ data Opt = Opt
, optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
, optDefaultHeader :: String -- ^ Default header
, optTitlePrefix :: String -- ^ Optional prefix for HTML title
, optOutputFile :: String -- ^ Name of output file
, optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
, optIncremental :: Bool -- ^ If @True@, incremental lists in S5
, optSmart :: Bool -- ^ If @True@, use smart typography
, optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML
, optShowUsage :: Bool -- ^ If @True@, show usage message
, optDebug :: Bool -- ^ If @True@, output debug messages
}
-- | Defaults for command-line options.
@ -123,32 +130,20 @@ startOpt = Opt
, optCustomHeader = "DEFAULT"
, optDefaultHeader = defaultHtmlHeader
, optTitlePrefix = ""
, optOutputFile = "" -- null for stdout
, optNumberSections = False
, optIncremental = False
, optSmart = False
, optASCIIMathML = False
, optShowUsage = False
, optDebug = False
}
-- | A list of functions, each transforming the options data structure in response
-- to a command-line option.
options :: [OptDescr (Opt -> IO Opt)]
options =
[ Option "v" ["version"]
(NoArg
(\_ -> do
hPutStrLn stderr ("Version " ++ version)
exitWith ExitSuccess))
"Print version"
, Option "h" ["help"]
(NoArg
(\_ -> do
prg <- getProgName
hPutStrLn stderr (usageInfo (prg ++ " [OPTIONS] [FILES] - convert FILES from one markup format to another\nIf no OPTIONS specified, converts from markdown to html.\nIf no FILES specified, input is read from STDIN.\nOptions:") options)
exitWith ExitSuccess))
"Show help"
, Option "fr" ["from","read"]
allOptions :: [OptDescr (Opt -> IO Opt)]
allOptions =
[ Option "fr" ["from","read"]
(ReqArg
(\arg opt -> case (lookup (map toLower arg) readers) of
Just reader -> return opt { optReader = reader }
@ -172,6 +167,13 @@ options =
(\opt -> return opt { optStandalone = True }))
"Include needed header and footer on output"
, Option "o" ["output"]
(ReqArg
(\arg opt -> do
return opt { optOutputFile = arg })
"FILENAME")
"Name of output file"
, Option "p" ["preserve-tabs"]
(NoArg
(\opt -> return opt { optPreserveTabs = True }))
@ -241,7 +243,7 @@ options =
"FILENAME")
"File to include after document body"
, Option "" ["custom-header"]
, Option "C" ["custom-header"]
(ReqArg
(\arg opt -> do
text <- readFile arg
@ -263,18 +265,87 @@ options =
let header = case (lookup arg writers) of
Just (writer, head) -> head
Nothing -> error ("Unknown reader: " ++ arg)
hPutStrLn stdout header
hPutStr stdout header
exitWith ExitSuccess)
"FORMAT")
"Print default header for FORMAT"
, Option "d" ["debug"]
(NoArg
(\opt -> return opt { optDebug = True }))
"Print debug messages to stderr, output to stdout"
, Option "v" ["version"]
(NoArg
(\_ -> do
prg <- getProgName
hPutStrLn stderr (prg ++ " " ++ version ++
copyrightMessage)
exitWith $ ExitFailure 2))
"Print version"
, Option "h" ["help"]
(NoArg
(\opt -> return opt { optShowUsage = True }))
"Show help"
]
-- parse name of calling program and return default reader and writer descriptions
parseProgName name =
case (splitRegex (mkRegex "2") (map toLower name)) of
[from, to] -> (from, to)
_ -> ("markdown", "html")
-- set default options based on reader and writer descriptions; start is starting options
setDefaultOpts from to start =
case ((lookup from readers), (lookup to writers)) of
(Just reader, Just (writer, header)) -> start {optReader = reader,
optWriter = writer,
optDefaultHeader = header}
_ -> start
-- True if single-letter option is in option list
inOptList :: [Char] -> OptDescr (Opt -> IO Opt) -> Bool
inOptList list desc =
let (Option letters _ _ _) = desc in
any (\x -> x `elem` list) letters
-- Reformat usage message so it doesn't wrap illegibly
reformatUsageInfo = gsub " *--" " --" .
gsub "(-[A-Za-z0-9]) *--" "\\1, --" .
gsub " *([^- ])" "\n\t\\1"
main = do
name <- getProgName
let (from, to) = parseProgName name
let irrelevantOptions = if not ('2' `elem` name)
then ""
else "frtwD" ++
(if (to /= "html" && to /= "s5") then "SmcT" else "") ++
(if (to /= "latex") then "N" else "") ++
(if (to /= "s5") then "i" else "") ++
(if (from /= "html" && from /= "latex") then "R" else "")
let options = filter (not . inOptList irrelevantOptions) allOptions
let defaultOpts = setDefaultOpts from to startOpt
args <- getArgs
let (actions, sources, errors) = getOpt RequireOrder options args
let (actions, sources, errors) = getOpt Permute options args
if (not (null errors))
then do
mapM (\e -> hPutStrLn stderr e) errors
hPutStrLn stderr (reformatUsageInfo $
usageInfo (name ++ " [OPTIONS] [FILES]") options)
exitWith $ ExitFailure 2
else
return ()
-- thread option data structure through all supplied option actions
opts <- foldl (>>=) (return startOpt) actions
opts <- foldl (>>=) (return defaultOpts) actions
let Opt { optPreserveTabs = preserveTabs
, optTabStop = tabStop
@ -289,12 +360,31 @@ main = do
, optCustomHeader = customHeader
, optDefaultHeader = defaultHeader
, optTitlePrefix = titlePrefix
, optOutputFile = outputFile
, optNumberSections = numberSections
, optIncremental = incremental
, optSmart = smart
, optASCIIMathML = asciiMathML
, optShowUsage = showUsage
, optDebug = debug
} = opts
if showUsage
then do
hPutStr stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options)
exitWith $ ExitFailure 2
else return ()
output <- if ((null outputFile) || debug)
then return stdout
else openFile outputFile WriteMode
if debug
then do
hPutStrLn stderr ("OUTPUT=" ++ outputFile)
hPutStr stderr $ concatMap (\s -> "INPUT=" ++ s ++ "\n") sources
else return ()
let writingS5 = (defaultHeader == defaultS5Header)
let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop)
let addBlank str = str ++ "\n\n"
@ -323,13 +413,13 @@ main = do
writerIncludeBefore = includeBefore,
writerIncludeAfter = includeAfter }
(readSources sources) >>= (putStr . encodeUTF8 . (writer writerOptions) .
(readSources sources) >>= (hPutStr output . encodeUTF8 .
(writer writerOptions) .
(reader startParserState) . filter .
decodeUTF8 . (joinWithSep "\n"))
decodeUTF8 . (joinWithSep "\n")) >> hClose output
where
readSources [] = mapM readSource ["-"]
readSources sources = mapM readSource sources
readSource "-" = getContents
readSource source = readFile source

View file

@ -1,7 +0,0 @@
# Check if input files exist.
for f; do
if [ -n "$f" ] && ! [ -f "$f" ]; then
err "File '$f' not found."
exit 1
fi
done

View file

@ -8,22 +8,6 @@ WRAPPEE_ARGS=
err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; }
errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; }
usage () {
synopsis="$@"
err "Usage: $THIS $synopsis"
err "See $THIS(1) man file for details."
}
runpandoc () {
if [ -n "$WRAPPEE_ARGS" ]; then
# Unpack arguments that will be passed to pandoc.
oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPEE_ARGS "$@"; IFS="$oldifs"
case "$1" in --) shift;; esac # tolerate the existence of a leading '--'
fi
pandoc "$@"
}
# Portable which(1).
pathfind () {
oldifs="$IFS"; IFS=':'
@ -37,17 +21,6 @@ pathfind () {
return 1
}
HAVE_ICONV=
if pathfind iconv; then
HAVE_ICONV=1
alias to_utf8='iconv -t utf-8'
alias from_utf8='iconv -f utf-8'
else
err "Warning: iconv not present. Assuming UTF-8 character encoding."
alias to_utf8='cat'
alias from_utf8='cat'
fi
for p in pandoc $REQUIRED; do
pathfind $p || {
err "You need '$p' to use this program!"

View file

@ -1,12 +0,0 @@
if [ -z "$SYNOPSIS" ]; then
SYNOPSIS="[-h] [input_file]"
[ -n "$THIS_NARG" ] || SYNOPSIS="${SYNOPSIS}..."
fi
while getopts h opt; do
case $opt in
h|?) usage "$SYNOPSIS"; exit 2 ;;
esac
done
shift $(($OPTIND - 1))

View file

@ -1,134 +0,0 @@
#!/bin/sh -e
# converts html to markdown
# uses an available program to fetch URL and tidy to normalize it first
REQUIRED=tidy
### common.sh
grab_url_with () {
url="${1:?internal error: grab_url_with: url required}"
shift
cmdline="$@"
prog=
prog_opts=
if [ -n "$cmdline" ]; then
eval "set -- $cmdline"
prog=$1
shift
prog_opts="$@"
fi
if [ -z "$prog" ]; then
# Locate a sensible web grabber (note the order).
for p in wget lynx w3m curl links w3c; do
if pathfind $p; then
prog=$p
break
fi
done
[ -n "$prog" ] || {
errn "$THIS: Couldn't find a program to fetch the file from URL "
err "(e.g. wget, w3m, lynx, w3c, or curl)."
return 1
}
else
pathfind "$prog" || {
err "$THIS: No such web grabber '$prog' found; aborting."
return 1
}
fi
# Setup proper base options for known grabbers.
base_opts=
case "$prog" in
wget) base_opts="-O-" ;;
lynx) base_opts="-source" ;;
w3m) base_opts="-dump_source" ;;
curl) base_opts="" ;;
links) base_opts="-source" ;;
w3c) base_opts="-n -get" ;;
*) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
esac
err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
eval "set -- $base_opts $prog_opts"
$prog "$@" "$url"
}
encoding=
grabber=
nograb=
while getopts e:g:nh opt; do
case $opt in
e) encoding="$OPTARG" ;;
g) grabber="$OPTARG" ;;
n) nograb=1 ;;
h|?)
usage "[-e encoding] [-g grabber_command] [-n] [-h] [input_file|url]"
exit 2 ;;
esac
done
shift $(($OPTIND - 1))
### postopts.sh
### singlearg.sh
inurl=
if [ -n "$1" ] && ! [ -f "$1" ]; then
if [ -n "$nograb" ]; then
err "'$1' not found; refusing to treat input as URL."
exit 1
fi
# Treat given argument as an URL.
inurl="$1"
fi
if [ -n "$inurl" ]; then
err "Attempting to fetch file from '$inurl'..."
### tempdir.sh
grabber_out=$THIS_TEMPDIR/grabber.out
grabber_log=$THIS_TEMPDIR/grabber.log
if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out \
2>$grabber_log; then
errn "grab_url_with failed"
if [ -f $grabber_log ]; then
err " with the following error log."
err
cat >&2 $grabber_log
else
err .
fi
exit 1
fi
set -- $grabber_out
fi
if [ -z "$encoding" ] && [ "x$@" != "x" ]; then
# Try to determine character encoding unless not specified
# and input is STDIN.
encoding=$(
head "$@" |
LC_ALL=C tr 'A-Z' 'a-z' |
sed -ne '/<meta .*content-type.*charset=/ {
s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
}'
)
fi
if [ -n "$encoding" ] && [ -n "$HAVE_ICONV" ]; then
alias to_utf8='iconv -f "$encoding" -t utf-8'
elif [ -n "$inurl" ]; then # assume web pages are UTF-8
alias to_utf8='cat'
fi # else just use local encoding
to_utf8 "$@" | tidy -utf8 2>/dev/null |
runpandoc -r html -w markdown -s | from_utf8

View file

@ -1,14 +0,0 @@
#!/bin/sh -e
# runs pandoc to convert latex to markdown
### common.sh
### getopts.sh
### postopts.sh
### singlearg.sh
### checkin.sh
to_utf8 "$@" | runpandoc -r latex -w markdown -s | from_utf8

View file

@ -1,12 +0,0 @@
#!/bin/sh -e
# converts markdown to HTML
### common.sh
### getopts.sh
### postopts.sh
### checkin.sh
to_utf8 "$@" | runpandoc | from_utf8

View file

@ -1,12 +0,0 @@
#!/bin/sh -e
# converts markdown to latex
### common.sh
### getopts.sh
### postopts.sh
### checkin.sh
to_utf8 "$@" | runpandoc -w latex -s | from_utf8

View file

@ -1,64 +1,54 @@
#!/bin/sh -e
# converts markdown to latex, then uses latex to make a PDF
REQUIRED=pdflatex
REQUIRED="markdown2latex pdflatex"
### common.sh
outfile=
while getopts o:h opt; do
case $opt in
o) outfile="$OPTARG" ;;
h|?) usage "[-o output_file] [-h] [input_file]..."; exit 2 ;;
esac
done
shift $(($OPTIND - 1))
### postopts.sh
### checkin.sh
if [ -z "$outfile" ]; then
if [ -n "$1" ]; then
outfile="${1%.*}"
else
outfile="stdin" # input is STDIN, since no argument given
fi
fi
case "$outfile" in
*.*) ;; # skip appending extension if one is already present
*) outfile="${outfile%.*}.pdf";;
esac
### tempdir.sh
# We should use a filename without white spaces for pdflatex.
TEXNAME=$THIS
texname=output
logfile=$THIS_TEMPDIR/log
if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then
[ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \
-e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2
exit 1
fi
outfile="$(sed -ne 's/^OUTPUT=//p' $logfile)"
IFS="$NEWLINE"
set -- $(sed -ne 's/^INPUT=//p' $logfile)
firstinfilebase="${1%.*}"
defaultdest="${firstinfilebase:-stdin}.pdf"
destname="${outfile:-$defaultdest}"
to_utf8 "$@" | runpandoc -w latex -s >$THIS_TEMPDIR/$TEXNAME.tex
(
cd $THIS_TEMPDIR
if ! pdflatex -interaction=batchmode $TEXNAME.tex >/dev/null 2>&1; then
if ! pdflatex -interaction=batchmode $texname.tex >/dev/null 2>&1; then
err "LaTeX errors:"
from_utf8 $TEXNAME.log | sed -ne '/^!/,/^ *$/p' >&2
if grep -q "File \`ucs.sty' not found" $TEXNAME.log; then
err "Please install the 'unicode' package from ctan.org."
sed -ne '/^!/,/^ *$/p' $texname.log >&2
if grep -q "File \`ucs.sty' not found" $texname.log; then
err "Please install the 'unicode' package from CTAN:"
err "http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"
fi
if grep -q "File \`fancyvrb.sty' not found" $texname.log; then
err "Please install the 'fancyvrb' package from CTAN:"
err "http://www.ctan.org/tex-archive/macros/latex/contrib/fancyvrb/"
fi
exit 1
fi
)
) || exit $?
is_target_exists=
if [ -f "$outfile" ]; then
if [ -f "$destname" ]; then
is_target_exists=1
mv -f "$outfile" "$outfile~"
mv "$destname" "$destname~"
fi
mv -f $THIS_TEMPDIR/$TEXNAME.pdf "$outfile"
mv -f $THIS_TEMPDIR/$texname.pdf "$destname"
errn "Created '$outfile'"
errn "Created $destname"
[ -z "$is_target_exists" ] || {
errn " (previous file has been backed up as '$outfile~')"
errn " (previous file has been backed up as $destname~)"
}
err .

View file

@ -1,17 +0,0 @@
# Parse wrapper and wrappee (pandoc) arguments by taking
# into account that they may have space or tab characters.
pick="WRAPPER_ARGS"
while [ $# -gt 0 ]; do
if [ "$pick" = "WRAPPER_ARGS" ]; then
case "$1" in
-*) pick="WRAPPEE_ARGS" ;;
esac
fi
# Pack args with NEWLINE to preserve spaces,
# and put them into the picked variable.
eval "$pick=\"\$${pick}${NEWLINE}${1}\""
shift
done
# Unpack filename arguments. Now "$@" will hold the filenames.
oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPER_ARGS; IFS="$oldifs"

View file

@ -1,7 +0,0 @@
# Ensure to work with a single argument.
if [ $# -gt 1 ]; then
first_arg="$1"
shift
err "Warning: extra arguments '$@' will be ignored."
set -- $first_arg
fi

View file

@ -1,141 +0,0 @@
#!/bin/sh
THIS=$1
ASH="ash -s"
BASH="bash --posix -s"
DASH="dash -s"
KSH="ksh -s"
POSH="posh -s"
ZSH="zsh -s"
ERROR=""
wrapper () {
$SH -- "$@" <<-'EOF'
### common.sh
outfile=
while getopts o: opt; do
case $opt in
o) outfile="$OPTARG" ;;
esac
done
shift $(($OPTIND - 1))
### postopts.sh
echo "Options passed to wrapper:"
[ -z "$outfile" ] || echo "|$outfile|"
echo "Arguments passed to wrapper:"
for arg; do
echo "|$arg|"
done
pandoc () {
echo "Arguments passed to wrappee:"
for arg; do
echo "|$arg|"
done
}
runpandoc
EOF
}
# Portable which(1).
pathfind () {
oldifs="$IFS"; IFS=':'
for _p in $PATH; do
if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then
IFS="$oldifs"
return 0
fi
done
IFS="$oldifs"
return 1
}
check_results () {
if [ "$1" = "$2" ]; then
echo >&2 ok
return 0
else
echo >&2 failed
sed "s/^/\t/" >&2 <<EOF
Command line: '$3'
===> Expected:
$2
<=== Got:
$1
EOF
return 1
fi
}
for SH in "$BASH" "$DASH" "$KSH" "$ZSH"; do
CMD=${SH%% *}
echo >&2 " Testing with $CMD..."
if pathfind "$CMD"; then
if [ "$CMD" = "zsh" ]; then
# Zsh needs to be called as 'sh' to enable POSIX mode.
ln -s $(which zsh) ./sh
SH="./sh ${SH#* }"
trap 'err=$?; rm -f ./sh; exit $err' 0 1 2 3 13 15
fi
set -e
# Test 1
printf >&2 " test case 1... "
actual=$(wrapper -o "output file" "foo bar" -A "quux baz" -B)
expected=$(cat <<'EOF'
Options passed to wrapper:
|output file|
Arguments passed to wrapper:
|foo bar|
Arguments passed to wrappee:
|-A|
|quux baz|
|-B|
EOF
)
check_results "$actual" "$expected" \
'wrapper -o "output file" "foo bar" -A "quux baz" -B'
# Test 2
printf >&2 " test case 2... "
actual=$(wrapper -- -A "foo bar")
expected=$(cat <<'EOF'
Options passed to wrapper:
Arguments passed to wrapper:
Arguments passed to wrappee:
|-A|
|foo bar|
EOF
)
check_results "$actual" "$expected" 'wrapper -- -A "foo bar"'
# Test 3 (Test 1 with a redundant '--')
printf >&2 " test case 3... "
actual=$(wrapper -o "output file" "foo bar" -- -A "quux baz" -B)
expected=$(cat <<'EOF'
Options passed to wrapper:
|output file|
Arguments passed to wrapper:
|foo bar|
Arguments passed to wrappee:
|-A|
|quux baz|
|-B|
EOF
)
check_results "$actual" "$expected" \
'wrapper -o "output file" "foo bar" -- -A "quux baz" -B'
else
echo >&2 "Warning: cannot verify correctness with $CMD; shell not available"
fi
done
exit 0

View file

@ -0,0 +1,173 @@
#!/bin/sh -e
# converts HTML from a URL, file, or stdin to markdown
# uses an available program to fetch URL and tidy to normalize it first
REQUIRED="tidy html2markdown"
### common.sh
grab_url_with () {
url="${1:?internal error: grab_url_with: url required}"
shift
cmdline="$@"
prog=
prog_opts=
if [ -n "$cmdline" ]; then
eval "set -- $cmdline"
prog=$1
shift
prog_opts="$@"
fi
if [ -z "$prog" ]; then
# Locate a sensible web grabber (note the order).
for p in wget lynx w3m curl links w3c; do
if pathfind $p; then
prog=$p
break
fi
done
[ -n "$prog" ] || {
errn "$THIS: Couldn't find a program to fetch the file from URL "
err "(e.g. wget, w3m, lynx, w3c, or curl)."
return 1
}
else
pathfind "$prog" || {
err "$THIS: No such web grabber '$prog' found; aborting."
return 1
}
fi
# Setup proper base options for known grabbers.
base_opts=
case "$prog" in
wget) base_opts="-O-" ;;
lynx) base_opts="-source" ;;
w3m) base_opts="-dump_source" ;;
curl) base_opts="" ;;
links) base_opts="-source" ;;
w3c) base_opts="-n -get" ;;
*) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
esac
err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
eval "set -- $base_opts $prog_opts"
$prog "$@" "$url"
}
add_option () {
options="$options$NEWLINE$1"
}
options=
argument=
encoding=
grabber=
# Parse command-line arguments
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2
err " -e ENCODING, --encoding=ENCODING"
err " Specify character encoding of input"
err " -g COMMAND, --grabber=COMMAND"
err " Specify command to be used to grab contents of URL"
exit 0 ;;
-v|--version)
html2markdown -v
exit 0 ;;
-e)
shift
encoding=$1 ;;
--encoding=*)
wholeopt=$1
# extract encoding from after =
encoding=${wholeopt#*=} ;;
-g)
shift
grabber=$1 ;;
--grabber=*)
wholeopt=$1
# extract encoding from after =
grabber=${wholeopt#*=} ;;
-o|--output|-b|--tab-stop|-H|--include-in-header| \
-A|--include-after-body|-C|-B|--include-before-body| \
-C|--custom-header|-T|--title-prefix)
add_option $1
shift
add_option $1 ;;
-*) add_option $1 ;;
*)
if [ -z "$argument" ]; then
argument=$1
else
err "Warning: extra argument '$1' will be ignored."
fi ;;
esac
shift
done
# Unpack options. Now "$@" will hold the html2markdown options.
oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs"
inurl=
if [ -n "$argument" ] && ! [ -f "$argument" ]; then
# Treat given argument as an URL.
inurl="$argument"
fi
if [ -n "$inurl" ]; then
err "Attempting to fetch file from '$inurl'..."
### tempdir.sh
grabber_out=$THIS_TEMPDIR/grabber.out
grabber_log=$THIS_TEMPDIR/grabber.log
if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
errn "grab_url_with failed"
if [ -f $grabber_log ]; then
err " with the following error log."
err
cat >&2 $grabber_log
else
err .
fi
exit 1
fi
argument="$grabber_out"
fi
if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then
# Try to determine character encoding if not specified
# and input is not STDIN.
encoding=$(
head "$argument" |
LC_ALL=C tr 'A-Z' 'a-z' |
sed -ne '/<meta .*content-type.*charset=/ {
s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
}'
)
fi
if [ -n "$encoding" ] && pathfind iconv; then
alias to_utf8='iconv -f "$encoding" -t utf-8'
else # assume UTF-8
alias to_utf8='cat'
fi
if [ -z "$argument" ]; then
tidy -utf8 2>/dev/null | html2markdown "$@"
else
if [ -f "$argument" ]; then
to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@"
else
err "File '$argument' not found."
exit 1
fi
fi