Merged changes from branches/wrappers since r177.
Summary of main changes: + Added -o/--output and -d/--debug options to pandoc. + Modified pandoc to behave differently depending on the name of the program. For example, if the program name is 'html2latex', the default reader will be html and the default writer latex. + Removed most of the old wrappers, replacing them with symlinks to pandoc. + Rewrote markdown2pdf and created a new wrapper web2markdown, with the functionality of the old html2markdown script. These new scripts exploit pandoc's -d option to avoid having to do complex command-line parsing. + Revised man pages and documentation appropriately. git-svn-id: https://pandoc.googlecode.com/svn/trunk@279 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
cfaf0c178c
commit
d829c4820a
27 changed files with 713 additions and 799 deletions
24
Makefile
24
Makefile
|
@ -23,15 +23,16 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL).in)
|
|||
#-------------------------------------------------------------------------------
|
||||
# Install targets
|
||||
#-------------------------------------------------------------------------------
|
||||
WRAPPERS := web2markdown markdown2pdf
|
||||
SYMLINKS := markdown2html markdown2latex markdown2s5 markdown2rst \
|
||||
markdown2rtf html2markdown latex2markdown rst2markdown
|
||||
PROGS := $(EXECS) $(WRAPPERS)
|
||||
# Add .exe extensions if we're running Windows/Cygwin.
|
||||
EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \
|
||||
sed -ne 's/^cygwin.*$$/\.exe/p')
|
||||
EXECS := $(addsuffix $(EXTENSION),$(EXECSBASE))
|
||||
# First entry in Cabal's executable stanza is the main executable.
|
||||
MAIN := $(firstword $(EXECS))
|
||||
WRAPPERS := html2markdown latex2markdown markdown2html \
|
||||
markdown2latex markdown2pdf
|
||||
PROGS := $(EXECS) $(WRAPPERS)
|
||||
DOCS := README.html README BUGS
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
|
@ -92,6 +93,12 @@ all: build-program
|
|||
templates: $(SRCDIR)/templates
|
||||
$(MAKE) -C $(SRCDIR)/templates
|
||||
|
||||
.PHONY: symlinks
|
||||
cleanup_files+=$(SYMLINKS)
|
||||
symlinks: $(SYMLINKS)
|
||||
$(SYMLINKS): $(MAIN)
|
||||
ln -sf ./$(MAIN) $@
|
||||
|
||||
define generate-shell-script
|
||||
echo "Generating $@..."; \
|
||||
awk ' \
|
||||
|
@ -131,7 +138,7 @@ build: configure
|
|||
$(BUILDCMD) build
|
||||
|
||||
.PHONY: build-exec
|
||||
build-exec: $(PROGS)
|
||||
build-exec: $(PROGS) $(SYMLINKS)
|
||||
cleanup_files+=$(EXECS)
|
||||
$(EXECS): build
|
||||
for f in $@; do \
|
||||
|
@ -191,8 +198,9 @@ install-exec: build-exec
|
|||
fi; \
|
||||
$(INSTALL_PROGRAM) $$f $(BINPATH)/; \
|
||||
done
|
||||
cd $(BINPATH); for f in $(SYMLINKS); do ln -sf $(MAIN) $$f; done
|
||||
uninstall-exec:
|
||||
-for f in $(notdir $(PROGS)); do rm -f $(BINPATH)/$$f; done
|
||||
-for f in $(notdir $(PROGS) $(SYMLINKS)); do rm -f $(BINPATH)/$$f; done ;
|
||||
|
||||
# Program + user documents installation.
|
||||
.PHONY: install-program uninstall-program
|
||||
|
@ -277,15 +285,11 @@ osx-dmg: ../$(osx_dmg_name)
|
|||
-rm -f $(osx_dmg_name)
|
||||
mv $(osx_udzo_name) ../$(osx_dmg_name)
|
||||
|
||||
.PHONY: test test-markdown test-wrapper
|
||||
.PHONY: test test-markdown
|
||||
test: $(MAIN)
|
||||
@cd $(TESTDIR) && perl runtests.pl -s $(PWD)/$(MAIN)
|
||||
test-markdown: $(MAIN)
|
||||
@cd $(TESTDIR)/MarkdownTest_1.0.3 && perl MarkdownTest.pl -s $(PWD)/$(MAIN) -tidy
|
||||
cleanup_files+=testwrapper
|
||||
test-wrappers: testwrapper
|
||||
@echo "Running $<..."
|
||||
@sh testwrapper
|
||||
|
||||
# Stolen and slightly improved from a GPLed Makefile. Credits to John Meacham.
|
||||
src_all:=$(shell find $(SRCDIR) -type f -name '*hs' | egrep -v '^\./(_darcs|lib|test)/')
|
||||
|
|
251
README
251
README
|
@ -20,7 +20,7 @@ or output format requires only adding a reader or writer.
|
|||
[reStructuredText]: http://docutils.sourceforge.net/docs/ref/rst/introduction.html
|
||||
[S5]: http://meyerweb.com/eric/tools/s5/
|
||||
[HTML]: http://www.w3.org/TR/html40/
|
||||
[LaTeX]: http://www.latex-project.org/
|
||||
[LaTeX]: http://www.latex-project.org/
|
||||
[RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format
|
||||
[Haskell]: http://www.haskell.org/
|
||||
|
||||
|
@ -30,9 +30,53 @@ any kind. (See COPYRIGHT for full copyright and warranty notices.)
|
|||
Recai Oktaş (roktas at debian dot org) deserves credit for the build
|
||||
system, the debian package, and the robust wrapper scripts.
|
||||
|
||||
[GPL]: http://www.gnu.org/copyleft/gpl.html
|
||||
[GPL]: http://www.gnu.org/copyleft/gpl.html "GNU General Public License"
|
||||
|
||||
# Using Pandoc
|
||||
Requirements
|
||||
============
|
||||
|
||||
The `pandoc` program itself does not depend on any external libraries
|
||||
or programs. The convenience programs `markdown2html`, `markdown2latex`,
|
||||
`markdown2rst`, `markdown2rtf`, `markdown2s5`, `html2markdown`,
|
||||
`latex2markdown`, and `rst2markdown` are implemented as symbolic links to
|
||||
`pandoc`.
|
||||
|
||||
The wrapper script `web2markdown` requires
|
||||
|
||||
- `html2markdown` (included with Pandoc)
|
||||
- a POSIX-compliant shell (installed by default on all linux and unix
|
||||
systems, including Mac OS X, and in [Cygwin] for Windows),
|
||||
- `HTML Tidy`
|
||||
- `iconv` (for character encoding conversion). (If `iconv` is absent,
|
||||
`web2markdown` will still work, but it will treat everything as UTF-8.)
|
||||
|
||||
[Cygwin]: http://www.cygwin.com/
|
||||
[HTML Tidy]: http://tidy.sourceforge.net/
|
||||
[`iconv`]: http://www.gnu.org/software/libiconv/
|
||||
|
||||
The wrapper script `markdown2pdf` requires
|
||||
|
||||
- `markdown2latex` (included with Pandoc)
|
||||
- a POSIX-compliant shell
|
||||
- `pdflatex`, which should be part of any [LaTeX] distribution
|
||||
- the [unicode] and [fancyvrb] LaTeX packages, which are included
|
||||
in many LaTeX distributions. The [unicode] package allows LaTeX to
|
||||
process UTF-8 characters. [fancyvrb] allows code blocks and verbatim
|
||||
text to be used within footnotes. If your installation of LaTeX
|
||||
does not include these packages, you will get an error (complaining
|
||||
about missing `ucs.sty` or `fancyvrb.sty`) when you try to compile
|
||||
a LaTeX file produced by Pandoc, or when you use the `markdown2pdf`
|
||||
script (described below). If this happens, install the [unicode] and
|
||||
[fancyvrb] packages package from [CTAN]. (Get the zip file from CTAN
|
||||
and unpack it into `~/texmf/tex/latex/`. You may also need to run
|
||||
`mktexlsr` or `texhash` before the files can be found by TeX.)
|
||||
|
||||
[CTAN]: http://www.ctan.org "Comprehensive TeX Archive Network"
|
||||
[unicode]: http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/
|
||||
[fancyvrb]: http://www.ctan.org/tex-archive/macros/latex/contrib/fancyvrb/
|
||||
|
||||
Using Pandoc
|
||||
============
|
||||
|
||||
If you run `pandoc` without arguments, it will accept input from
|
||||
STDIN. If you run it with file names as arguments, it will take input
|
||||
|
@ -66,10 +110,14 @@ a subset of reStructuredText syntax. For example, it doesn't handle
|
|||
tables, definition lists, option lists, or footnotes. It handles only the
|
||||
constructs expressible in unextended markdown. But for simple documents
|
||||
it should be adequate. The `latex` and `html` readers are also limited
|
||||
in what they can do.
|
||||
in what they can do. Because the `html` reader is picky about the HTML
|
||||
it parses, it is recommended that you pipe HTML through [HTML Tidy] before
|
||||
sending it to `pandoc`, or use the `web2markdown` script described below.
|
||||
|
||||
`pandoc` writes its output to STDOUT. If you want to write to a file,
|
||||
use redirection:
|
||||
By default, `pandoc` writes its output to STDOUT. If you want to
|
||||
write to a file, use the `-o` option or shell redirection:
|
||||
|
||||
pandoc -o hello.html hello.txt
|
||||
|
||||
pandoc hello.txt > hello.html
|
||||
|
||||
|
@ -77,13 +125,14 @@ Note that you can specify multiple input files on the command line.
|
|||
`pandoc` will concatenate them all (with blank lines between them)
|
||||
before parsing:
|
||||
|
||||
pandoc -s chapter1.txt chapter2.txt chapter3.txt references.txt > book.html
|
||||
pandoc -s chapter1.txt chapter2.txt references.txt > book.html
|
||||
|
||||
(The `-s` option here tells `pandoc` to produce a standalone HTML file,
|
||||
with a proper header, rather than a fragment. For more details on this
|
||||
and many other command-line options, see below.)
|
||||
|
||||
# Character encodings
|
||||
Character encodings
|
||||
-------------------
|
||||
|
||||
Unfortunately, due to limitations in GHC, `pandoc` does not automatically
|
||||
detect the system's local character encoding. Hence, all input and
|
||||
|
@ -97,92 +146,65 @@ will convert `source.txt` from the local encoding to UTF-8, then
|
|||
convert it to HTML, then convert back to the local encoding,
|
||||
putting the output in `output.html`.
|
||||
|
||||
[`iconv`]: http://www.gnu.org/software/libiconv/
|
||||
|
||||
The shell scripts (described below) automatically convert the input
|
||||
from the local encoding to UTF-8 before running them through `pandoc`,
|
||||
then convert the output back to the local encoding.
|
||||
|
||||
## LaTeX and UTF-8
|
||||
Convenience programs and wrapper scripts
|
||||
========================================
|
||||
|
||||
LaTeX sources produced by Pandoc use `ucs.sty`, which is included in many
|
||||
LaTeX distributions. This allows LaTeX to process UTF-8 characters.
|
||||
If your installation of LaTeX does not include `ucs.sty`, you will get an
|
||||
error when you try to compile a LaTeX file produced by Pandoc, or when
|
||||
you use the `markdown2pdf` script (described below). If this happens,
|
||||
install the [unicode] package from [CTAN]. (Get the `unicode.zip`
|
||||
file from CTAN, unpack it, and copy the whole `unicode` directory into
|
||||
`~/texmf/tex/latex/`. You may also need to run `mktexlsr` or `texhash`
|
||||
before the files can be found by TeX.)
|
||||
For convenience, eight variant programs are included with Pandoc:
|
||||
`markdown2html` (which is equivalent to `pandoc -w html`),
|
||||
`markdown2latex` (equivalent to `pandoc -w latex`), `markdown2rst`
|
||||
(equivalent to `pandoc -w rst`), `markdown2rtf` (equivalent to
|
||||
`pandoc -w rtf`), `markdown2s5` (equivalent to `pandoc -w s5`),
|
||||
`html2markdown` (equivalent to `pandoc -r html -w markdown`),
|
||||
`latex2markdown` (equivalent to `pandoc -r latex -w markdown`), and
|
||||
`rst2markdown` (equivalent to `pandoc -r rst -w markdown`). These
|
||||
programs take an appropriately restricted subset of `pandoc`'s
|
||||
options. (Run them with the `-h` flag for a full list of allowed
|
||||
options.)
|
||||
|
||||
[CTAN]: http://www.ctan.org
|
||||
[unicode]: http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/
|
||||
Like `pandoc`, all of these programs produce fragments by default.
|
||||
If you want to produce a standalone file, complete with a header
|
||||
and footer appropriate to the format, use the `-s` option:
|
||||
|
||||
# The shell scripts
|
||||
markdown2latex -s sample.txt > sample.tex
|
||||
|
||||
Five shell scripts have been included that make it easy to run
|
||||
`pandoc` without worrying about character encodings, and without
|
||||
remembering all the command-line options:
|
||||
Two shell scripts have also been included:
|
||||
|
||||
- `markdown2html` converts markdown-formatted text to HTML
|
||||
- `markdown2latex` converts markdown-formatted text to LaTeX
|
||||
- `markdown2pdf` produces a PDF file from markdown-formatted
|
||||
text, using `pdflatex`.
|
||||
- `html2markdown` converts HTML to markdown-formatted text
|
||||
- `latex2markdown` converts LaTeX to markdown-formatted text
|
||||
1. `markdown2pdf` produces a PDF file from markdown-formatted
|
||||
text, using `markdown2latex` and `pdflatex`. The default
|
||||
behavior of `markdown2pdf` is to create a file with the same
|
||||
base name as the first argument and the extension `pdf`; thus,
|
||||
for example,
|
||||
|
||||
All of the scripts use `iconv` (if available) to convert to and from
|
||||
the local character encoding. All of the scripts presuppose that
|
||||
`pandoc` is in the path, and some have additional requirements. (For
|
||||
example, `html2markdown` uses `tidy`, and `markdown2pdf` uses
|
||||
`pdflatex`.)
|
||||
markdown2pdf sample.txt endnotes.txt
|
||||
|
||||
When no arguments are specified, text will be read from standard
|
||||
input. Arguments specify input files (limited to one in the case of
|
||||
`latex2markdown` and `html2markdown`; the other scripts accept any number
|
||||
of arguments). `html2markdown` may take a URL as argument instead of
|
||||
a filename; in this case, `curl`, `wget`, or an available text-based
|
||||
browser will be used to fetch the contents of the URL. (The `-n` option
|
||||
inhibits this behavior; the `-g` option allows the user to specify a
|
||||
custom command that will be used to fetch from a URL.)
|
||||
will produce `sample.pdf`. (If `sample.pdf` exists already,
|
||||
it will be backed up before being overwritten.) An output file
|
||||
name can be specified explicitly using the `-o` option:
|
||||
|
||||
With the exception of `markdown2pdf`, the scripts write to standard output.
|
||||
Output can be sent to a file using shell output redirection:
|
||||
markdown2pdf -o "My Book.pdf" chap1.txt chap2.txt chap3.txt
|
||||
|
||||
latex2markdown sample.tex > sample.txt
|
||||
If no input file is specified, input will be taken from STDIN.
|
||||
|
||||
The default behavior of `markdown2pdf` is to create a file with the same
|
||||
base name as the first argument and the extension `pdf`; thus, for example,
|
||||
2. `web2markdown` grabs a web page from a file or URL and converts
|
||||
it to markdown-formatted text, using `tidy` and `html2markdown`.
|
||||
Unless input is from STDIN, an attempt is made to determine the
|
||||
character encoding of the page from the "Content-type" meta tag.
|
||||
If this is not present, UTF-8 is assumed. Alternatively, a character
|
||||
encoding may be specified explicitly using the `-e` option.
|
||||
|
||||
markdown2pdf sample.txt endnotes.txt
|
||||
`web2markdown` searches for an available program (`wget`, `curl`,
|
||||
or a text-mode browser) to fetch the contents of a URL.
|
||||
Optionally, the `-g` command may be used to specify the command
|
||||
to be used:
|
||||
|
||||
will produce `sample.pdf`. (If `sample.pdf` exists already, it will be
|
||||
backed up before being overwritten.) An output file name can be specified
|
||||
explicitly using the `-o` option:
|
||||
web2markdown -g 'wget --user=foo --password=bar' mysite.com
|
||||
|
||||
markdown2pdf -o "My Book.pdf" chap1.txt chap2.txt chap3.txt
|
||||
|
||||
Options specific to the scripts, like `-o`, `-g`, and `-n`, must
|
||||
be specified *before* any command-line arguments (file names or URLs).
|
||||
Any options specified *after* the command-line arguments will be
|
||||
passed directly to `pandoc`. For example,
|
||||
|
||||
markdown2html tusks.txt -S -T Elephants
|
||||
|
||||
will convert `tusks.txt` to `tusks.html` using smart quotes, ellipses,
|
||||
and dashes, with "Elephants" as the page title prefix. (For a
|
||||
complete list of `pandoc` options, see below.) When there are no
|
||||
command-line arguments (because input is from STDIN), `pandoc`
|
||||
options must be preceded by ` -- `:
|
||||
|
||||
cat tusks.txt | markdown2html -- -S -T Elephants
|
||||
|
||||
The ` -- ` separator may optionally be used when there are command-line
|
||||
arguments:
|
||||
|
||||
markdown2html -- tusks.txt -S -T Elephants
|
||||
|
||||
# Command-line options
|
||||
Command-line options
|
||||
====================
|
||||
|
||||
Various command-line options can be used to customize the output.
|
||||
For a complete list, type
|
||||
|
@ -207,9 +229,11 @@ specified.)
|
|||
complete with appropriate document headers. By default, `pandoc`
|
||||
produces a fragment.
|
||||
|
||||
`--custom-header` can be used to specify a custom document header. To
|
||||
see the headers used by default, use the `-D` option: for example,
|
||||
`pandoc -D html` prints the default HTML header.
|
||||
`-o` or `--output-file` can be used to specify an output file.
|
||||
|
||||
`-C` or `--custom-header` can be used to specify a custom document
|
||||
header. To see the headers used by default, use the `-D` option:
|
||||
for example, `pandoc -D html` prints the default HTML header.
|
||||
|
||||
`-c` or `--css` allows the user to specify a custom stylesheet that
|
||||
will be linked to in HTML and S5 output.
|
||||
|
@ -253,15 +277,38 @@ is for lists to be displayed all at once.
|
|||
`-N` or `--number-sections` causes sections to be numbered in LaTeX
|
||||
output. By default, sections are not numbered.
|
||||
|
||||
# Pandoc's markdown vs. standard markdown
|
||||
`-d` or `--debug` causes a debugging message to be written to STDERR.
|
||||
The format of the message is as follows:
|
||||
|
||||
OUTPUT=foo
|
||||
INPUT=bar
|
||||
INPUT=Foo Baz
|
||||
|
||||
Here `OUTPUT=` is followed by the name of the output file specified
|
||||
using `-o`, if any. If no output file was specified, `OUTPUT=`
|
||||
will appear with nothing following it. Lines beginning `INPUT=`
|
||||
specify input files. If there are no input files, no `INPUT=` lines
|
||||
will be printed. The `-d` option forces output to be written to
|
||||
STDOUT, even if an output file was specified using the `-o` option.
|
||||
(This option is provided to make it easier to write wrappers for
|
||||
`pandoc`.)
|
||||
|
||||
`-v` or `--version` prints the version number to STDERR.
|
||||
|
||||
`-h` or `--help` prints a usage message to STDERR.
|
||||
|
||||
Pandoc's markdown vs. standard markdown
|
||||
=======================================
|
||||
|
||||
In parsing markdown, Pandoc departs from and extends [standard markdown]
|
||||
in a few respects. (To run Pandoc on the official
|
||||
markdown test suite, type `make test-markdown`.)
|
||||
|
||||
[standard markdown]: http://daringfireball.net/projects/markdown/syntax
|
||||
"Markdown syntax description"
|
||||
|
||||
## Section Headings
|
||||
Section Headings
|
||||
----------------
|
||||
|
||||
Pandoc creates an invisible anchor in front of every HTML section
|
||||
heading. The ID of this anchor is derived from the section heading
|
||||
|
@ -281,7 +328,8 @@ example, just insert:
|
|||
|
||||
[Back to Aristotle](#Aristotle's_De_Anima)
|
||||
|
||||
## Lists
|
||||
Lists
|
||||
-----
|
||||
|
||||
Pandoc behaves differently from standard markdown on some "edge
|
||||
cases" involving lists. Consider this source:
|
||||
|
@ -332,7 +380,8 @@ the example above:
|
|||
B) Fie
|
||||
C) Third
|
||||
|
||||
## Literal quotes in titles
|
||||
Literal quotes in titles
|
||||
------------------------
|
||||
|
||||
Standard markdown allows unescaped literal quotes in titles, as
|
||||
in
|
||||
|
@ -343,7 +392,8 @@ Pandoc requires all quotes within titles to be escaped:
|
|||
|
||||
[foo]: "bar \"embedded\" baz"
|
||||
|
||||
## Reference links
|
||||
Reference links
|
||||
---------------
|
||||
|
||||
Pandoc allows implicit reference links in either of two styles:
|
||||
|
||||
|
@ -357,7 +407,8 @@ will appear as regular bracketed text. Note: even `[link][]` will
|
|||
appear as `[link]` if there's no reference for `link`. If you want
|
||||
`[link][]`, use a backslash escape: `\[link]\[]`.
|
||||
|
||||
## Footnotes
|
||||
Footnotes
|
||||
---------
|
||||
|
||||
Pandoc's markdown allows footnotes, using the following syntax:
|
||||
|
||||
|
@ -394,7 +445,8 @@ they cannot contain multiple paragraphs). The syntax is as follows:
|
|||
|
||||
Inline and regular footnotes may be mixed freely.
|
||||
|
||||
## Embedded HTML
|
||||
Embedded HTML
|
||||
-------------
|
||||
|
||||
Pandoc treats embedded HTML in markdown a bit differently than
|
||||
Markdown 1.0. While Markdown 1.0 leaves HTML blocks exactly as they
|
||||
|
@ -427,7 +479,8 @@ markdown with HTML block elements. For example, one can surround
|
|||
a block of markdown text with `<div>` tags without preventing it
|
||||
from being interpreted as markdown.
|
||||
|
||||
## Title blocks
|
||||
Title blocks
|
||||
------------
|
||||
|
||||
If the file begins with a title block
|
||||
|
||||
|
@ -460,7 +513,8 @@ If a title prefix is specified with `-T` and no title block appears
|
|||
in the document, the title prefix will be used by itself as the
|
||||
HTML title.
|
||||
|
||||
## Box-style blockquotes
|
||||
Box-style blockquotes
|
||||
---------------------
|
||||
|
||||
Pandoc supports emacs-style boxquote block quotes, in addition to
|
||||
standard markdown (email-style) boxquotes:
|
||||
|
@ -469,7 +523,8 @@ standard markdown (email-style) boxquotes:
|
|||
| They look like this.
|
||||
`----
|
||||
|
||||
## Inline LaTeX
|
||||
Inline LaTeX
|
||||
------------
|
||||
|
||||
Anything between two $ characters will be parsed as LaTeX math. The
|
||||
opening $ must have a character immediately to its right, while the
|
||||
|
@ -501,7 +556,8 @@ You can also use LaTeX environments. For example,
|
|||
Note, however, that material between the begin and end tags will
|
||||
be interpreted as raw LaTeX, not as markdown.
|
||||
|
||||
## Custom headers
|
||||
Custom headers
|
||||
--------------
|
||||
|
||||
When run with the "standalone" option (`-s`), `pandoc` creates a
|
||||
standalone file, complete with an appropriate header. To see the
|
||||
|
@ -516,13 +572,14 @@ it and specify it on the command line as follows:
|
|||
|
||||
pandoc --header=MyHeaderFile
|
||||
|
||||
# Producing S5 with Pandoc
|
||||
Producing S5 with Pandoc
|
||||
========================
|
||||
|
||||
Producing an [S5] slide show with Pandoc is easy. A title page is
|
||||
constructed automatically from the document's title block (see above).
|
||||
Each section (with a level-one header) produces a single slide. (Note
|
||||
that if the section is too big, the slide will not fit on the page; S5
|
||||
is not smart enough to produce multiple pages.)
|
||||
Producing an [S5] web-based slide show with Pandoc is easy. A title
|
||||
page is constructed automatically from the document's title block (see
|
||||
above). Each section (with a level-one header) produces a single slide.
|
||||
(Note that if the section is too big, the slide will not fit on the page;
|
||||
S5 is not smart enough to produce multiple pages.)
|
||||
|
||||
Here's the markdown source for a simple slide show, `eating.txt`:
|
||||
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -14,6 +14,8 @@ pandoc (0.22) unstable; urgency=low
|
|||
|
||||
* Refactored template processing (fillTemplates.pl).
|
||||
|
||||
* Modified wrapper scripts to make them more robust.
|
||||
|
||||
* Modified wrapper scripts to make them more robust and portable.
|
||||
To avoid code duplication and ensure consistency, wrappers are
|
||||
generated via a templating system from templates in src/wrappers.
|
||||
|
|
|
@ -1,60 +1 @@
|
|||
.TH HTML2MARKDOWN 1 "November 21, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
html2markdown \- converts HTML to markdown-formatted text
|
||||
.SH SYNOPSIS
|
||||
\fBhtml2markdown\fR [\fIoptions\fR] [\fIinput\-file\fR or \fIURL\fR]
|
||||
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
|
||||
.SH DESCRIPTION
|
||||
\fBhtml2markdown\fR converts \fIinput\-file\fR or \fIURL\fR (or text
|
||||
from STDIN) from HTML to markdown\-formatted plain text.
|
||||
If a URL is specified, \fBhtml2markdown\fR uses an available program
|
||||
(e.g. wget, w3m, lynx or curl) to fetch its contents. Output is sent
|
||||
to STDOUT.
|
||||
.PP
|
||||
\fBhtml2markdown\fR is a wrapper for \fBpandoc\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-h
|
||||
Show usage message.
|
||||
.TP
|
||||
.B \-e \fIencoding\fR
|
||||
Assume the character encoding \fIencoding\fR in reading the HTML.
|
||||
(Note: \fIencoding\fR will be passed to \fBiconv\fR; a list of
|
||||
available encodings may be obtained using `\fBiconv \-l\fR'.)
|
||||
If the \fB\-e\fR option is not specified, the encoding will be
|
||||
determined as follows: If input is from STDIN, the local encoding
|
||||
will be assumed. Otherwise, \fBhtml2markdown\fR will try to
|
||||
extract the character encoding from the "Content-type" meta tag.
|
||||
If no character encoding is specified in this way, UTF-8 will be
|
||||
assumed for a URL argument, and the local encoding will be assumed
|
||||
for a file argument.
|
||||
.TP
|
||||
.B \-g \fIcommand\fR
|
||||
Use \fIcommand\fR to fetch the contents of a URL. (By default,
|
||||
\fBhtml2markdown\fR searches for an available program or text-based
|
||||
browser to fetch the contents of a URL.) For example:
|
||||
.IP
|
||||
html2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
|
||||
.TP
|
||||
.B \-n
|
||||
Disable automatic fetching of contents when URLs are specified as
|
||||
arguments.
|
||||
.TP
|
||||
.I pandoc\-opts
|
||||
Any options appearing after \fIinput\-file\fR or \fIURL\fR on the
|
||||
command line will be passed directly to \fBpandoc\fR. If no
|
||||
\fIinput-file\fR or \fIURL\fR is specified, these options must
|
||||
be preceded by ` \fB\-\-\fR '. (In other cases, ` \fB\-\-\fR ' is
|
||||
optional.) See \fBpandoc\fR(1) for a list of options that may be used.
|
||||
Example:
|
||||
.IP
|
||||
html2markdown input.txt \-\- \-R
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBmarkdown2html\fR(1),
|
||||
\fBmarkdown2latex\fR(1),
|
||||
\fBlatex2markdown\fR(1),
|
||||
\fBmarkdown2pdf\fR(1),
|
||||
\fBiconv\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
||||
.so man1/pandoc.1
|
||||
|
|
|
@ -1,33 +1 @@
|
|||
.TH LATEX2MARKDOWN 1 "November 21, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
latex2markdown \- converts LaTeX to markdown\-formatted text
|
||||
.SH SYNOPSIS
|
||||
\fBlatex2markdown\fR [\fIoptions\fR] [\fIinput\-file\fR]
|
||||
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
|
||||
.SH DESCRIPTION
|
||||
\fBlatex2markdown\fR converts \fIinput\-file\fR
|
||||
(or text from STDIN) from LaTeX to markdown\-formatted plain text.
|
||||
Output is sent to STDOUT.
|
||||
.PP
|
||||
\fBlatex2markdown\fR is a wrapper for \fBpandoc\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-h
|
||||
Show usage message.
|
||||
.TP
|
||||
.I pandoc\-opts
|
||||
Any options appearing after \fIinput\-file\fR on the command line
|
||||
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
|
||||
is specified, these options must be preceded by ` \fB\-\-\fR '.
|
||||
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
|
||||
for a list of options that may be used. Example:
|
||||
.IP
|
||||
latex2markdown input.txt \-\- \-R
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBmarkdown2html\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBmarkdown2latex\fR(1),
|
||||
\fBmarkdown2pdf\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
||||
.so man1/pandoc.1
|
||||
|
|
|
@ -1,34 +1 @@
|
|||
.TH MARKDOWN2HTML 1 "November 21, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
markdown2html \- converts markdown\-formatted text to HTML
|
||||
.SH SYNOPSIS
|
||||
\fBmarkdown2html\fR [\fIoptions\fR] [\fIinput\-file\fR]...
|
||||
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
|
||||
.SH DESCRIPTION
|
||||
\fBmarkdown2html\fR converts \fIinput\-file\fR
|
||||
(or text from STDIN) from markdown\-formatted plain text to HTML.
|
||||
If multiple files are specified, they will be combined to make a single
|
||||
HTML document. Output is sent to STDOUT.
|
||||
.PP
|
||||
\fBmarkdown2html\fR is a wrapper for \fBpandoc\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-h
|
||||
Show usage message.
|
||||
.TP
|
||||
.I pandoc\-opts
|
||||
Any options appearing after \fIinput\-file\fR... on the command line
|
||||
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
|
||||
is specified, these options must be preceded by ` \fB\-\-\fR '.
|
||||
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
|
||||
for a list of options that may be used. Example:
|
||||
.IP
|
||||
markdown2html input.txt \-\- \-\-css=main.css \-S
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBmarkdown2latex\fR(1),
|
||||
\fBlatex2markdown\fR(1),
|
||||
\fBmarkdown2pdf\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
||||
.so man1/pandoc.1
|
||||
|
|
|
@ -1,34 +1 @@
|
|||
.TH MARKDOWN2LATEX 1 "November 21, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
markdown2latex \- converts markdown-formatted text to LaTeX
|
||||
.SH SYNOPSIS
|
||||
\fBmarkdown2latex\fR [\fIoptions\fR] [\fIinput\-file\fR]...
|
||||
[\fB\-\-\fR] [\fIpandoc\-opts\fR]
|
||||
.SH DESCRIPTION
|
||||
\fBmarkdown2latex\fR converts \fIinput\-file\fR (or text from STDIN)
|
||||
from markdown\-formatted plain text to LaTeX. If multiple files are
|
||||
specified, they will be combined to make a single LaTeX document.
|
||||
Output is sent to STDOUT.
|
||||
.PP
|
||||
\fBmarkdown2latex\fR is a wrapper for \fBpandoc\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-h
|
||||
Show usage message.
|
||||
.TP
|
||||
.I pandoc\-opts
|
||||
Any options appearing after \fIinput\-file\fR... on the command line
|
||||
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
|
||||
is specified, these options must be preceded by ` \fB\-\-\fR '.
|
||||
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
|
||||
for a list of options that may be used. Example:
|
||||
.IP
|
||||
markdown2latex input.txt \-\- \-\-custom\-header=letterhead.tex
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBmarkdown2html\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBlatex2markdown\fR(1),
|
||||
\fBmarkdown2pdf\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
||||
.so man1/pandoc.1
|
||||
|
|
|
@ -1,43 +1,71 @@
|
|||
.TH MARKDOWN2PDF 1 "November 21, 2006" Pandoc "User Manuals"
|
||||
.TH MARKDOWN2PDF 1 "December 15, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex
|
||||
.SH SYNOPSIS
|
||||
\fBmarkdown2pdf\fR [\fIoptions\fR] [\fB\-o\fR \fIoutput-file\fR]
|
||||
[\fIinput-file\fR]... [\fB\-\-\fR] [\fIpandoc\-opts\fR]
|
||||
\fBmarkdown2pdf\fR [\fIoptions\fR] [\fIinput-file\fR]...
|
||||
.SH DESCRIPTION
|
||||
\fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from STDIN) from
|
||||
markdown\-formatted plain text to PDF, using \fBpdflatex\fR. If no output
|
||||
filename is specified, the name of the output file is derived from the
|
||||
input file; thus, for example, if the input file is \fIhello.txt\fR,
|
||||
the output file will be \fIhello.pdf\fR. If the input is read from STDIN
|
||||
and no output filename is specified, the output file will be named
|
||||
\fIstdin.pdf\fR. If multiple input files are specified, they will be
|
||||
concatenated before conversion, and the name of the output file will be
|
||||
derived from the first input file.
|
||||
\fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from standard
|
||||
input) from markdown\-formatted plain text to PDF, using \fBpdflatex\fR.
|
||||
If no output filename is specified, the name of the output file is
|
||||
derived from the input file; thus, for example, if the input file
|
||||
is \fIhello.txt\fR, the output file will be \fIhello.pdf\fR. If
|
||||
the input is read from STDIN and no output filename is
|
||||
specified, the output file will be named \fIstdin.pdf\fR. If
|
||||
multiple input files are specified, they will be concatenated before
|
||||
conversion, and the name of the output file will be derived from
|
||||
the first input file.
|
||||
.PP
|
||||
\fBmarkdown2pdf\fR is a wrapper for \fBpandoc\fR.
|
||||
Input is assumed to be in the UTF\-8 character encoding. If your
|
||||
local character encoding is not UTF\-8, you should pipe input and
|
||||
output through \fBiconv\fR:
|
||||
.IP
|
||||
.B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
|
||||
.PP
|
||||
\fBmarkdown2pdf\fR assumes that the 'unicode' package
|
||||
is in latex's search path. If this package is not included in your
|
||||
latex setup, it can be obtained from <http://ctan.org>.
|
||||
.PP
|
||||
\fBmarkdown2pdf\fR is a wrapper around \fBmarkdown2latex\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-h
|
||||
.B \-o FILE, \-\-output=FILE
|
||||
Write output to \fIFILE\fR.
|
||||
.TP
|
||||
.B \-p, \-\-preserve-tabs
|
||||
Preserve tabs instead of converting them to spaces.
|
||||
.TP
|
||||
.B \-\-tab-stop=\fITABSTOP\fB
|
||||
Specify tab stop (default is 4).
|
||||
.TP
|
||||
.B \-R, \-\-parse-raw
|
||||
Parse untranslatable LaTeX environments as raw LaTeX,
|
||||
instead of ignoring them.
|
||||
.TP
|
||||
.B \-N, \-\-number-sections
|
||||
Number section headings in LaTeX output. (Default is not to number them.)
|
||||
.TP
|
||||
.B \-H \fIFILE\fB, \-\-include-in-header=\fIFILE\fB
|
||||
Include (LaTeX) contents of \fIFILE\fR at the end of the header. Implies
|
||||
\fB\-s\fR.
|
||||
.TP
|
||||
.B \-B \fIFILE\fB, \-\-include-before-body=\fIFILE\fB
|
||||
Include (LaTeX) contents of \fIFILE\fR at the beginning of the document body.
|
||||
.TP
|
||||
.B \-A \fIFILE\fB, \-\-include-after-body=\fIFILE\fB
|
||||
Include (LaTeX) contents of \fIFILE\fR at the end of the document body.
|
||||
.TP
|
||||
.B \-C \fIFILE\fB, \-\-custom-header=\fIFILE\fB
|
||||
Use contents of \fIFILE\fR
|
||||
as the LaTeX document header (overriding the default header, which can be
|
||||
printed using '\fBpandoc \-D latex\fR'). Implies \fB-s\fR.
|
||||
.TP
|
||||
.B \-v, \-\-version
|
||||
Print version.
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show usage message.
|
||||
.TP
|
||||
.B \-o \fIoutput-file\fR
|
||||
Specify name of output (PDF) file.
|
||||
.TP
|
||||
.I pandoc\-opts
|
||||
Any options appearing after \fIinput\-file\fR... on the command line
|
||||
will be passed directly to \fBpandoc\fR. If no \fIinput-file\fR
|
||||
is specified, these options must be preceded by ` \fB\-\-\fR '.
|
||||
(In other cases, ` \fB\-\-\fR ' is optional.) See \fBpandoc\fR(1)
|
||||
for a list of options that may be used. Example:
|
||||
.IP
|
||||
markdown2pdf input.txt \-\- \-\-custom\-header=letterhead.tex
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBmarkdown2html\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBmarkdown2latex\fR(1),
|
||||
\fBlatex2markdown\fR(1),
|
||||
\fBpdflatex\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
||||
|
|
1
man/man1/markdown2rst.1
Normal file
1
man/man1/markdown2rst.1
Normal file
|
@ -0,0 +1 @@
|
|||
.so man1/pandoc.1
|
1
man/man1/markdown2rtf.1
Normal file
1
man/man1/markdown2rtf.1
Normal file
|
@ -0,0 +1 @@
|
|||
.so man1/pandoc.1
|
1
man/man1/markdown2s5.1
Normal file
1
man/man1/markdown2s5.1
Normal file
|
@ -0,0 +1 @@
|
|||
.so man1/pandoc.1
|
|
@ -1,18 +1,23 @@
|
|||
.TH PANDOC 1 "November 21, 2006" Pandoc "User Manuals"
|
||||
.TH PANDOC 1 "December 15, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
pandoc \- general markup converter
|
||||
pandoc, markdown2html, markdown2latex, markdown2rst, markdown2rtf,
|
||||
markdown2s5, html2markdown2, latex2markdown, rst2markdown \- general
|
||||
markup converter
|
||||
.SH SYNOPSIS
|
||||
\fBpandoc\fR [\fIoptions\fR] [\fIinput\-file\fR]...
|
||||
.SH DESCRIPTION
|
||||
\fIPandoc\fR converts files from one markup format to another. It can
|
||||
\fBPandoc\fR converts files from one markup format to another. It can
|
||||
read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and
|
||||
it can write markdown, reStructuredText, HTML, LaTeX, RTF, and S5 HTML
|
||||
slide shows.
|
||||
.PP
|
||||
If no \fIinput\-file\fR is specified, input is read from STDIN. Otherwise,
|
||||
the \fIinput\-files\fR are concatenated (with a blank line between each)
|
||||
and used as input. Output goes to STDOUT. If you want output to a file,
|
||||
use shell redirection:
|
||||
If no \fIinput\-file\fR is specified, input is read from STDIN.
|
||||
Otherwise, the \fIinput\-files\fR are concatenated (with a blank
|
||||
line between each) and used as input. Output goes to standard
|
||||
output. If you want output to a file, use the \fB\-o\fR option or
|
||||
shell redirection:
|
||||
.IP
|
||||
.B pandoc \-o output.html input.txt
|
||||
.IP
|
||||
.B pandoc input.txt > output.html
|
||||
.PP
|
||||
|
@ -25,6 +30,19 @@ formats can be specified using command\-line options. For example,
|
|||
converts \fIchap1.tex\fR from LaTeX to markdown\-formatted plain text.
|
||||
See below for a detailed list of command\-line options.
|
||||
.PP
|
||||
For convenience, eight variant programs are available:
|
||||
\fBmarkdown2html\fR (same as \fBpandoc \-w html\fR),
|
||||
\fBmarkdown2latex\fR (same as \fBpandoc \-w latex\fR),
|
||||
\fBmarkdown2rst\fR (same as \fBpandoc \-w rst\fR),
|
||||
\fBmarkdown2rtf\fR (same as \fBpandoc \-w rtf\fR),
|
||||
\fBmarkdown2s5\fR (same as \fBpandoc \-w s5\fR),
|
||||
\fBhtml2markdown\fR (same as \fBpandoc \-r html \-w markdown\fR),
|
||||
\fBlatex2markdown\fR (same as \fBpandoc \-r latex \-w markdown\fR),
|
||||
and \fBrst2markdown\fR (same as \fBpandoc \-r rst \-w markdown\fR).
|
||||
These programs take an appropriately restricted subset of \fBpandoc\fR's
|
||||
options. (Run them with the \fB-h\fR flag for a full list of allowed
|
||||
options.)
|
||||
.PP
|
||||
\fIPandoc\fR uses the UTF\-8 character encoding for both input and output.
|
||||
If your local character encoding is not UTF\-8, you should pipe input
|
||||
and output through \fBiconv\fR:
|
||||
|
@ -33,61 +51,58 @@ and output through \fBiconv\fR:
|
|||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-v, \-\-version
|
||||
Print version.
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show usage message.
|
||||
.TP
|
||||
.B \-f FORMAT, \-r FORMAT, \-\-from=FORMAT, \-\-read=FORMAT
|
||||
.B \-f \fIFORMAT\fB, \-r \fIFORMAT\fB, \-\-from=\fIFORMAT\fB, \-\-read=\fIFORMAT\fB
|
||||
Specify input format.
|
||||
.I FORMAT
|
||||
can be
|
||||
.I native
|
||||
.B native
|
||||
(native Haskell),
|
||||
.I markdown
|
||||
.B markdown
|
||||
(markdown or plain text),
|
||||
.I rst
|
||||
.B rst
|
||||
(reStructuredText),
|
||||
.I html
|
||||
.B html
|
||||
(HTML),
|
||||
or
|
||||
.I latex
|
||||
.B latex
|
||||
(LaTeX).
|
||||
.TP
|
||||
.B \-t FORMAT, \-w FORMAT, \-\-to=FORMAT, \-\-write=FORMAT
|
||||
.B \-t \fIFORMAT\fB, \-w \fIFORMAT\fB, \-\-to=\fIFORMAT\fB, \-\-write=\fIFORMAT\fB
|
||||
Specify output format.
|
||||
.I FORMAT
|
||||
can be
|
||||
.I native
|
||||
.B native
|
||||
(native Haskell),
|
||||
.I markdown
|
||||
.B markdown
|
||||
(markdown or plain text),
|
||||
.I rst
|
||||
.B rst
|
||||
(reStructuredText),
|
||||
.I html
|
||||
.B html
|
||||
(HTML),
|
||||
.I latex
|
||||
.B latex
|
||||
(LaTeX),
|
||||
.I s5
|
||||
.B s5
|
||||
(S5 HTML and javascript slide show),
|
||||
or
|
||||
.I rtf
|
||||
.B rtf
|
||||
(rich text format).
|
||||
.TP
|
||||
.B \-s, \-\-standalone
|
||||
Produce output with an appropriate header and footer (e.g. a
|
||||
standalone HTML, LaTeX, or RTF file, not a fragment).
|
||||
.TP
|
||||
.B \-o FILE, \-\-output=FILE
|
||||
Write output to \fIFILE\fR instead of STDOUT.
|
||||
.TP
|
||||
.B \-p, \-\-preserve-tabs
|
||||
Preserve tabs instead of converting them to spaces.
|
||||
.TP
|
||||
.B \-\-tab-stop=TABSTOP
|
||||
.B \-\-tab-stop=\fITABSTOP\fB
|
||||
Specify tab stop (default is 4).
|
||||
.TP
|
||||
.B \-R, \-\-parse-raw
|
||||
Parse untranslatable HTML codes and LaTeX environments as raw HTML or
|
||||
LaTeX, instead of ignoring them.
|
||||
Parse untranslatable HTML codes and LaTeX environments as raw HTML
|
||||
or LaTeX, instead of ignoring them.
|
||||
.TP
|
||||
.B \-S, \-\-smartypants
|
||||
Use smart quotes, dashes, and ellipses in HTML output.
|
||||
|
@ -99,41 +114,50 @@ Use ASCIIMathML to display embedded LaTeX math in HTML output.
|
|||
Make list items in S5 display incrementally (one by one).
|
||||
.TP
|
||||
.B \-N, \-\-number-sections
|
||||
Number section headings in LaTeX output. (Default is not to number them.)
|
||||
Number section headings in LaTeX output. (Default is not to number
|
||||
them.)
|
||||
.TP
|
||||
.B \-c CSS, \-\-css=CSS
|
||||
.B \-c \fICSS\fB, \-\-css=\fICSS\fB
|
||||
Link to a CSS style sheet.
|
||||
.I CSS
|
||||
is the pathname of the style sheet.
|
||||
.TP
|
||||
.B \-H FILENAME, \-\-include-in-header=FILENAME
|
||||
Include contents of \fIFILENAME\fR at the end of the header. Implies
|
||||
.B \-H \fIFILE\fB, \-\-include-in-header=\fIFILE\fB
|
||||
Include contents of \fIFILE\fR at the end of the header. Implies
|
||||
\fB\-s\fR.
|
||||
.TP
|
||||
.B \-B FILENAME, \-\-include-before-body=FILENAME
|
||||
Include contents of \fIFILENAME\fR at the beginning of the document body.
|
||||
.B \-B \fIFILE\fB, \-\-include-before-body=\fIFILE\fB
|
||||
Include contents of \fIFILE\fR at the beginning of the document
|
||||
body.
|
||||
.TP
|
||||
.B \-A FILENAME, \-\-include-after-body=FILENAME
|
||||
Include contents of \fIFILENAME\fR at the end of the document body.
|
||||
.B \-A \fIFILE\fB, \-\-include-after-body=\fIFILE\fB
|
||||
Include contents of \fIFILE\fR at the end of the document body.
|
||||
.TP
|
||||
.B \-\-custom-header=FILENAME
|
||||
Use contents of \fIFILENAME\fR
|
||||
as the document header (overriding the default header, which can be
|
||||
printed by using the \fB\-D\fR option). Implies
|
||||
\fB-s\fR.
|
||||
.B \-C \fIFILE\fB, \-\-custom-header=\fIFILE\fB
|
||||
Use contents of \fIFILE\fR as the document header (overriding the
|
||||
default header, which can be printed by using the \fB\-D\fR option).
|
||||
Implies \fB-s\fR.
|
||||
.TP
|
||||
.B \-D FORMAT, \-\-print-default-header=FORMAT
|
||||
Print the default header for \fIFORMAT\fR
|
||||
(\fIhtml, s5, latex, markdown, rst, rtf\fR).
|
||||
.B \-D \fIFORMAT\fB, \-\-print-default-header=\fIFORMAT\fB
|
||||
Print the default header for \fIFORMAT\fR (\fIhtml, s5, latex,
|
||||
markdown, rst, rtf\fR).
|
||||
.TP
|
||||
.B \-T STRING, \-\-title-prefix=STRING
|
||||
.B \-T \fISTRING\fB, \-\-title-prefix=\fISTRING\fB
|
||||
Specify \fISTRING\fR as a prefix to the HTML window title.
|
||||
.TP
|
||||
.B \-d, \-\-debug
|
||||
Print debugging information (names of input and output files) to
|
||||
STDERR. Write output to STDOUT, even if an output file was specified
|
||||
using the \fB\-o\fR option.
|
||||
.TP
|
||||
.B \-v, \-\-version
|
||||
Print version.
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show usage message.
|
||||
|
||||
.SH "SEE ALSO"
|
||||
\fBmarkdown2html\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBmarkdown2latex\fR(1),
|
||||
\fBlatex2markdown\fR(1),
|
||||
\fBweb2markdown\fR(1),
|
||||
\fBmarkdown2pdf\fR(1),
|
||||
\fBiconv\fR(1)
|
||||
|
||||
|
|
1
man/man1/rst2markdown.1
Normal file
1
man/man1/rst2markdown.1
Normal file
|
@ -0,0 +1 @@
|
|||
.so man1/pandoc.1
|
82
man/man1/web2markdown.1
Normal file
82
man/man1/web2markdown.1
Normal file
|
@ -0,0 +1,82 @@
|
|||
.TH WEB2MARKDOWN 1 "December 15, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
web2markdown \- converts HTML to markdown-formatted text
|
||||
.SH SYNOPSIS
|
||||
\fBweb2markdown\fR [\fIoptions\fR] [\fIinput\-file\fR or \fIURL\fR]
|
||||
.SH DESCRIPTION
|
||||
\fBweb2markdown\fR converts \fIinput\-file\fR or \fIURL\fR (or text
|
||||
from STDIN) from HTML to markdown\-formatted plain text.
|
||||
If a URL is specified, \fBweb2markdown\fR uses an available program
|
||||
(e.g. wget, w3m, lynx or curl) to fetch its contents. Output is sent
|
||||
to STDOUT unless an output file is specified using the \fB\-o\fR
|
||||
option.
|
||||
.PP
|
||||
\fBweb2markdown\fR uses the character encoding specified in the
|
||||
"Content-type" meta tag. If this is not present, or if input comes
|
||||
from STDIN, UTF-8 is assumed. A character encoding may be specified
|
||||
explicitly using the \fB\-e\fR option.
|
||||
.PP
|
||||
\fBweb2markdown\fR is a wrapper for \fBhtml2markdown\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-s, \-\-standalone
|
||||
Include title, author, and date information (if present) at the
|
||||
top of markdown output.
|
||||
.TP
|
||||
.B \-o FILE, \-\-output=FILE
|
||||
Write output to \fIFILE\fR instead of STDOUT.
|
||||
.TP
|
||||
.B \-p, \-\-preserve-tabs
|
||||
Preserve tabs instead of converting them to spaces.
|
||||
.TP
|
||||
.B \-\-tab-stop=\fITABSTOP\fB
|
||||
Specify tab stop (default is 4).
|
||||
.TP
|
||||
.B \-R, \-\-parse-raw
|
||||
Parse untranslatable HTML codes as raw HTML.
|
||||
.TP
|
||||
.B \-H \fIFILE\fB, \-\-include-in-header=\fIFILE\fB
|
||||
Include contents of \fIFILE\fR at the end of the header. Implies
|
||||
\fB\-s\fR.
|
||||
.TP
|
||||
.B \-B \fIFILE\fB, \-\-include-before-body=\fIFILE\fB
|
||||
Include contents of \fIFILE\fR at the beginning of the document body.
|
||||
.TP
|
||||
.B \-A \fIFILE\fB, \-\-include-after-body=\fIFILE\fB
|
||||
Include contents of \fIFILE\fR at the end of the document body.
|
||||
.TP
|
||||
.B \-C \fIFILE\fB, \-\-custom-header=\fIFILE\fB
|
||||
Use contents of \fIFILE\fR
|
||||
as the document header (overriding the default header, which can be
|
||||
printed using '\fBpandoc \-D markdown\fR'). Implies
|
||||
\fB-s\fR.
|
||||
.TP
|
||||
.B \-v, \-\-version
|
||||
Print version.
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show usage message.
|
||||
.TP
|
||||
.B \-e \fIencoding\fR
|
||||
Assume the character encoding \fIencoding\fR in reading HTML.
|
||||
(Note: \fIencoding\fR will be passed to \fBiconv\fR; a list of
|
||||
available encodings may be obtained using `\fBiconv \-l\fR'.)
|
||||
If the \fB\-e\fR option is not specified and input is not from
|
||||
STDIN, \fBweb2markdown\fR will try to extract the character encoding
|
||||
from the "Content-type" meta tag. If no character encoding is
|
||||
specified in this way, or if input is from STDIN, UTF-8 will be
|
||||
assumed.
|
||||
.TP
|
||||
.B \-g \fIcommand\fR
|
||||
Use \fIcommand\fR to fetch the contents of a URL. (By default,
|
||||
\fBweb2markdown\fR searches for an available program or text-based
|
||||
browser to fetch the contents of a URL.) For example:
|
||||
.IP
|
||||
web2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
|
||||
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBiconv\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
140
src/Main.hs
140
src/Main.hs
|
@ -45,6 +45,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader,
|
|||
defaultRTFHeader, defaultS5Header, defaultLaTeXHeader )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Regex ( mkRegex, splitRegex )
|
||||
import System ( exitWith, getArgs, getProgName )
|
||||
import System.Exit
|
||||
import System.Console.GetOpt
|
||||
|
@ -57,6 +58,9 @@ import Control.Monad ( (>>=) )
|
|||
version :: String
|
||||
version = "0.3"
|
||||
|
||||
copyrightMessage :: String
|
||||
copyrightMessage = "\nCopyright (C) 2006 John MacFarlane\nWeb: http://sophos.berkeley.edu/macfarlane/pandoc\nThis is free software; see the source for copying conditions. There is no\nwarranty, not even for merchantability or fitness for a particular purpose."
|
||||
|
||||
-- | Association list of formats and readers.
|
||||
readers :: [(String, ParserState -> String -> Pandoc)]
|
||||
readers = [("native" , readPandoc)
|
||||
|
@ -101,10 +105,13 @@ data Opt = Opt
|
|||
, optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
|
||||
, optDefaultHeader :: String -- ^ Default header
|
||||
, optTitlePrefix :: String -- ^ Optional prefix for HTML title
|
||||
, optOutputFile :: String -- ^ Name of output file
|
||||
, optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
|
||||
, optIncremental :: Bool -- ^ If @True@, incremental lists in S5
|
||||
, optSmart :: Bool -- ^ If @True@, use smart typography
|
||||
, optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML
|
||||
, optShowUsage :: Bool -- ^ If @True@, show usage message
|
||||
, optDebug :: Bool -- ^ If @True@, output debug messages
|
||||
}
|
||||
|
||||
-- | Defaults for command-line options.
|
||||
|
@ -123,32 +130,20 @@ startOpt = Opt
|
|||
, optCustomHeader = "DEFAULT"
|
||||
, optDefaultHeader = defaultHtmlHeader
|
||||
, optTitlePrefix = ""
|
||||
, optOutputFile = "" -- null for stdout
|
||||
, optNumberSections = False
|
||||
, optIncremental = False
|
||||
, optSmart = False
|
||||
, optASCIIMathML = False
|
||||
, optShowUsage = False
|
||||
, optDebug = False
|
||||
}
|
||||
|
||||
-- | A list of functions, each transforming the options data structure in response
|
||||
-- to a command-line option.
|
||||
options :: [OptDescr (Opt -> IO Opt)]
|
||||
options =
|
||||
[ Option "v" ["version"]
|
||||
(NoArg
|
||||
(\_ -> do
|
||||
hPutStrLn stderr ("Version " ++ version)
|
||||
exitWith ExitSuccess))
|
||||
"Print version"
|
||||
|
||||
, Option "h" ["help"]
|
||||
(NoArg
|
||||
(\_ -> do
|
||||
prg <- getProgName
|
||||
hPutStrLn stderr (usageInfo (prg ++ " [OPTIONS] [FILES] - convert FILES from one markup format to another\nIf no OPTIONS specified, converts from markdown to html.\nIf no FILES specified, input is read from STDIN.\nOptions:") options)
|
||||
exitWith ExitSuccess))
|
||||
"Show help"
|
||||
|
||||
, Option "fr" ["from","read"]
|
||||
allOptions :: [OptDescr (Opt -> IO Opt)]
|
||||
allOptions =
|
||||
[ Option "fr" ["from","read"]
|
||||
(ReqArg
|
||||
(\arg opt -> case (lookup (map toLower arg) readers) of
|
||||
Just reader -> return opt { optReader = reader }
|
||||
|
@ -172,6 +167,13 @@ options =
|
|||
(\opt -> return opt { optStandalone = True }))
|
||||
"Include needed header and footer on output"
|
||||
|
||||
, Option "o" ["output"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
return opt { optOutputFile = arg })
|
||||
"FILENAME")
|
||||
"Name of output file"
|
||||
|
||||
, Option "p" ["preserve-tabs"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optPreserveTabs = True }))
|
||||
|
@ -241,7 +243,7 @@ options =
|
|||
"FILENAME")
|
||||
"File to include after document body"
|
||||
|
||||
, Option "" ["custom-header"]
|
||||
, Option "C" ["custom-header"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
text <- readFile arg
|
||||
|
@ -263,18 +265,87 @@ options =
|
|||
let header = case (lookup arg writers) of
|
||||
Just (writer, head) -> head
|
||||
Nothing -> error ("Unknown reader: " ++ arg)
|
||||
hPutStrLn stdout header
|
||||
hPutStr stdout header
|
||||
exitWith ExitSuccess)
|
||||
"FORMAT")
|
||||
"Print default header for FORMAT"
|
||||
|
||||
, Option "d" ["debug"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optDebug = True }))
|
||||
"Print debug messages to stderr, output to stdout"
|
||||
|
||||
, Option "v" ["version"]
|
||||
(NoArg
|
||||
(\_ -> do
|
||||
prg <- getProgName
|
||||
hPutStrLn stderr (prg ++ " " ++ version ++
|
||||
copyrightMessage)
|
||||
exitWith $ ExitFailure 2))
|
||||
"Print version"
|
||||
|
||||
, Option "h" ["help"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optShowUsage = True }))
|
||||
"Show help"
|
||||
]
|
||||
|
||||
-- parse name of calling program and return default reader and writer descriptions
|
||||
parseProgName name =
|
||||
case (splitRegex (mkRegex "2") (map toLower name)) of
|
||||
[from, to] -> (from, to)
|
||||
_ -> ("markdown", "html")
|
||||
|
||||
-- set default options based on reader and writer descriptions; start is starting options
|
||||
setDefaultOpts from to start =
|
||||
case ((lookup from readers), (lookup to writers)) of
|
||||
(Just reader, Just (writer, header)) -> start {optReader = reader,
|
||||
optWriter = writer,
|
||||
optDefaultHeader = header}
|
||||
_ -> start
|
||||
|
||||
-- True if single-letter option is in option list
|
||||
inOptList :: [Char] -> OptDescr (Opt -> IO Opt) -> Bool
|
||||
inOptList list desc =
|
||||
let (Option letters _ _ _) = desc in
|
||||
any (\x -> x `elem` list) letters
|
||||
|
||||
-- Reformat usage message so it doesn't wrap illegibly
|
||||
reformatUsageInfo = gsub " *--" " --" .
|
||||
gsub "(-[A-Za-z0-9]) *--" "\\1, --" .
|
||||
gsub " *([^- ])" "\n\t\\1"
|
||||
|
||||
main = do
|
||||
|
||||
name <- getProgName
|
||||
let (from, to) = parseProgName name
|
||||
|
||||
let irrelevantOptions = if not ('2' `elem` name)
|
||||
then ""
|
||||
else "frtwD" ++
|
||||
(if (to /= "html" && to /= "s5") then "SmcT" else "") ++
|
||||
(if (to /= "latex") then "N" else "") ++
|
||||
(if (to /= "s5") then "i" else "") ++
|
||||
(if (from /= "html" && from /= "latex") then "R" else "")
|
||||
|
||||
let options = filter (not . inOptList irrelevantOptions) allOptions
|
||||
|
||||
let defaultOpts = setDefaultOpts from to startOpt
|
||||
|
||||
args <- getArgs
|
||||
let (actions, sources, errors) = getOpt RequireOrder options args
|
||||
let (actions, sources, errors) = getOpt Permute options args
|
||||
|
||||
if (not (null errors))
|
||||
then do
|
||||
mapM (\e -> hPutStrLn stderr e) errors
|
||||
hPutStrLn stderr (reformatUsageInfo $
|
||||
usageInfo (name ++ " [OPTIONS] [FILES]") options)
|
||||
exitWith $ ExitFailure 2
|
||||
else
|
||||
return ()
|
||||
|
||||
-- thread option data structure through all supplied option actions
|
||||
opts <- foldl (>>=) (return startOpt) actions
|
||||
opts <- foldl (>>=) (return defaultOpts) actions
|
||||
|
||||
let Opt { optPreserveTabs = preserveTabs
|
||||
, optTabStop = tabStop
|
||||
|
@ -289,12 +360,31 @@ main = do
|
|||
, optCustomHeader = customHeader
|
||||
, optDefaultHeader = defaultHeader
|
||||
, optTitlePrefix = titlePrefix
|
||||
, optOutputFile = outputFile
|
||||
, optNumberSections = numberSections
|
||||
, optIncremental = incremental
|
||||
, optSmart = smart
|
||||
, optASCIIMathML = asciiMathML
|
||||
, optShowUsage = showUsage
|
||||
, optDebug = debug
|
||||
} = opts
|
||||
|
||||
if showUsage
|
||||
then do
|
||||
hPutStr stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options)
|
||||
exitWith $ ExitFailure 2
|
||||
else return ()
|
||||
|
||||
output <- if ((null outputFile) || debug)
|
||||
then return stdout
|
||||
else openFile outputFile WriteMode
|
||||
|
||||
if debug
|
||||
then do
|
||||
hPutStrLn stderr ("OUTPUT=" ++ outputFile)
|
||||
hPutStr stderr $ concatMap (\s -> "INPUT=" ++ s ++ "\n") sources
|
||||
else return ()
|
||||
|
||||
let writingS5 = (defaultHeader == defaultS5Header)
|
||||
let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop)
|
||||
let addBlank str = str ++ "\n\n"
|
||||
|
@ -323,13 +413,13 @@ main = do
|
|||
writerIncludeBefore = includeBefore,
|
||||
writerIncludeAfter = includeAfter }
|
||||
|
||||
(readSources sources) >>= (putStr . encodeUTF8 . (writer writerOptions) .
|
||||
(readSources sources) >>= (hPutStr output . encodeUTF8 .
|
||||
(writer writerOptions) .
|
||||
(reader startParserState) . filter .
|
||||
decodeUTF8 . (joinWithSep "\n"))
|
||||
decodeUTF8 . (joinWithSep "\n")) >> hClose output
|
||||
|
||||
where
|
||||
readSources [] = mapM readSource ["-"]
|
||||
readSources sources = mapM readSource sources
|
||||
readSource "-" = getContents
|
||||
readSource source = readFile source
|
||||
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
# Check if input files exist.
|
||||
for f; do
|
||||
if [ -n "$f" ] && ! [ -f "$f" ]; then
|
||||
err "File '$f' not found."
|
||||
exit 1
|
||||
fi
|
||||
done
|
|
@ -8,22 +8,6 @@ WRAPPEE_ARGS=
|
|||
err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; }
|
||||
errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; }
|
||||
|
||||
usage () {
|
||||
synopsis="$@"
|
||||
err "Usage: $THIS $synopsis"
|
||||
err "See $THIS(1) man file for details."
|
||||
}
|
||||
|
||||
runpandoc () {
|
||||
if [ -n "$WRAPPEE_ARGS" ]; then
|
||||
# Unpack arguments that will be passed to pandoc.
|
||||
oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPEE_ARGS "$@"; IFS="$oldifs"
|
||||
case "$1" in --) shift;; esac # tolerate the existence of a leading '--'
|
||||
fi
|
||||
|
||||
pandoc "$@"
|
||||
}
|
||||
|
||||
# Portable which(1).
|
||||
pathfind () {
|
||||
oldifs="$IFS"; IFS=':'
|
||||
|
@ -37,17 +21,6 @@ pathfind () {
|
|||
return 1
|
||||
}
|
||||
|
||||
HAVE_ICONV=
|
||||
if pathfind iconv; then
|
||||
HAVE_ICONV=1
|
||||
alias to_utf8='iconv -t utf-8'
|
||||
alias from_utf8='iconv -f utf-8'
|
||||
else
|
||||
err "Warning: iconv not present. Assuming UTF-8 character encoding."
|
||||
alias to_utf8='cat'
|
||||
alias from_utf8='cat'
|
||||
fi
|
||||
|
||||
for p in pandoc $REQUIRED; do
|
||||
pathfind $p || {
|
||||
err "You need '$p' to use this program!"
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
if [ -z "$SYNOPSIS" ]; then
|
||||
SYNOPSIS="[-h] [input_file]"
|
||||
[ -n "$THIS_NARG" ] || SYNOPSIS="${SYNOPSIS}..."
|
||||
fi
|
||||
|
||||
while getopts h opt; do
|
||||
case $opt in
|
||||
h|?) usage "$SYNOPSIS"; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
shift $(($OPTIND - 1))
|
|
@ -1,134 +0,0 @@
|
|||
#!/bin/sh -e
|
||||
# converts html to markdown
|
||||
# uses an available program to fetch URL and tidy to normalize it first
|
||||
|
||||
REQUIRED=tidy
|
||||
|
||||
### common.sh
|
||||
|
||||
grab_url_with () {
|
||||
url="${1:?internal error: grab_url_with: url required}"
|
||||
|
||||
shift
|
||||
cmdline="$@"
|
||||
|
||||
prog=
|
||||
prog_opts=
|
||||
if [ -n "$cmdline" ]; then
|
||||
eval "set -- $cmdline"
|
||||
prog=$1
|
||||
shift
|
||||
prog_opts="$@"
|
||||
fi
|
||||
|
||||
if [ -z "$prog" ]; then
|
||||
# Locate a sensible web grabber (note the order).
|
||||
for p in wget lynx w3m curl links w3c; do
|
||||
if pathfind $p; then
|
||||
prog=$p
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
[ -n "$prog" ] || {
|
||||
errn "$THIS: Couldn't find a program to fetch the file from URL "
|
||||
err "(e.g. wget, w3m, lynx, w3c, or curl)."
|
||||
return 1
|
||||
}
|
||||
else
|
||||
pathfind "$prog" || {
|
||||
err "$THIS: No such web grabber '$prog' found; aborting."
|
||||
return 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Setup proper base options for known grabbers.
|
||||
base_opts=
|
||||
case "$prog" in
|
||||
wget) base_opts="-O-" ;;
|
||||
lynx) base_opts="-source" ;;
|
||||
w3m) base_opts="-dump_source" ;;
|
||||
curl) base_opts="" ;;
|
||||
links) base_opts="-source" ;;
|
||||
w3c) base_opts="-n -get" ;;
|
||||
*) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
|
||||
esac
|
||||
|
||||
err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
|
||||
eval "set -- $base_opts $prog_opts"
|
||||
$prog "$@" "$url"
|
||||
}
|
||||
|
||||
encoding=
|
||||
grabber=
|
||||
nograb=
|
||||
while getopts e:g:nh opt; do
|
||||
case $opt in
|
||||
e) encoding="$OPTARG" ;;
|
||||
g) grabber="$OPTARG" ;;
|
||||
n) nograb=1 ;;
|
||||
h|?)
|
||||
usage "[-e encoding] [-g grabber_command] [-n] [-h] [input_file|url]"
|
||||
exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
shift $(($OPTIND - 1))
|
||||
|
||||
### postopts.sh
|
||||
|
||||
### singlearg.sh
|
||||
|
||||
inurl=
|
||||
if [ -n "$1" ] && ! [ -f "$1" ]; then
|
||||
if [ -n "$nograb" ]; then
|
||||
err "'$1' not found; refusing to treat input as URL."
|
||||
exit 1
|
||||
fi
|
||||
# Treat given argument as an URL.
|
||||
inurl="$1"
|
||||
fi
|
||||
|
||||
if [ -n "$inurl" ]; then
|
||||
err "Attempting to fetch file from '$inurl'..."
|
||||
|
||||
### tempdir.sh
|
||||
|
||||
grabber_out=$THIS_TEMPDIR/grabber.out
|
||||
grabber_log=$THIS_TEMPDIR/grabber.log
|
||||
if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out \
|
||||
2>$grabber_log; then
|
||||
errn "grab_url_with failed"
|
||||
if [ -f $grabber_log ]; then
|
||||
err " with the following error log."
|
||||
err
|
||||
cat >&2 $grabber_log
|
||||
else
|
||||
err .
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -- $grabber_out
|
||||
fi
|
||||
|
||||
if [ -z "$encoding" ] && [ "x$@" != "x" ]; then
|
||||
# Try to determine character encoding unless not specified
|
||||
# and input is STDIN.
|
||||
encoding=$(
|
||||
head "$@" |
|
||||
LC_ALL=C tr 'A-Z' 'a-z' |
|
||||
sed -ne '/<meta .*content-type.*charset=/ {
|
||||
s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
|
||||
}'
|
||||
)
|
||||
fi
|
||||
|
||||
if [ -n "$encoding" ] && [ -n "$HAVE_ICONV" ]; then
|
||||
alias to_utf8='iconv -f "$encoding" -t utf-8'
|
||||
elif [ -n "$inurl" ]; then # assume web pages are UTF-8
|
||||
alias to_utf8='cat'
|
||||
fi # else just use local encoding
|
||||
|
||||
to_utf8 "$@" | tidy -utf8 2>/dev/null |
|
||||
runpandoc -r html -w markdown -s | from_utf8
|
|
@ -1,14 +0,0 @@
|
|||
#!/bin/sh -e
|
||||
# runs pandoc to convert latex to markdown
|
||||
|
||||
### common.sh
|
||||
|
||||
### getopts.sh
|
||||
|
||||
### postopts.sh
|
||||
|
||||
### singlearg.sh
|
||||
|
||||
### checkin.sh
|
||||
|
||||
to_utf8 "$@" | runpandoc -r latex -w markdown -s | from_utf8
|
|
@ -1,12 +0,0 @@
|
|||
#!/bin/sh -e
|
||||
# converts markdown to HTML
|
||||
|
||||
### common.sh
|
||||
|
||||
### getopts.sh
|
||||
|
||||
### postopts.sh
|
||||
|
||||
### checkin.sh
|
||||
|
||||
to_utf8 "$@" | runpandoc | from_utf8
|
|
@ -1,12 +0,0 @@
|
|||
#!/bin/sh -e
|
||||
# converts markdown to latex
|
||||
|
||||
### common.sh
|
||||
|
||||
### getopts.sh
|
||||
|
||||
### postopts.sh
|
||||
|
||||
### checkin.sh
|
||||
|
||||
to_utf8 "$@" | runpandoc -w latex -s | from_utf8
|
|
@ -1,64 +1,54 @@
|
|||
#!/bin/sh -e
|
||||
# converts markdown to latex, then uses latex to make a PDF
|
||||
|
||||
REQUIRED=pdflatex
|
||||
REQUIRED="markdown2latex pdflatex"
|
||||
|
||||
### common.sh
|
||||
|
||||
outfile=
|
||||
while getopts o:h opt; do
|
||||
case $opt in
|
||||
o) outfile="$OPTARG" ;;
|
||||
h|?) usage "[-o output_file] [-h] [input_file]..."; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
shift $(($OPTIND - 1))
|
||||
|
||||
### postopts.sh
|
||||
|
||||
### checkin.sh
|
||||
|
||||
if [ -z "$outfile" ]; then
|
||||
if [ -n "$1" ]; then
|
||||
outfile="${1%.*}"
|
||||
else
|
||||
outfile="stdin" # input is STDIN, since no argument given
|
||||
fi
|
||||
fi
|
||||
case "$outfile" in
|
||||
*.*) ;; # skip appending extension if one is already present
|
||||
*) outfile="${outfile%.*}.pdf";;
|
||||
esac
|
||||
|
||||
### tempdir.sh
|
||||
|
||||
# We should use a filename without white spaces for pdflatex.
|
||||
TEXNAME=$THIS
|
||||
texname=output
|
||||
logfile=$THIS_TEMPDIR/log
|
||||
|
||||
if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then
|
||||
[ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \
|
||||
-e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
outfile="$(sed -ne 's/^OUTPUT=//p' $logfile)"
|
||||
IFS="$NEWLINE"
|
||||
set -- $(sed -ne 's/^INPUT=//p' $logfile)
|
||||
firstinfilebase="${1%.*}"
|
||||
defaultdest="${firstinfilebase:-stdin}.pdf"
|
||||
destname="${outfile:-$defaultdest}"
|
||||
|
||||
to_utf8 "$@" | runpandoc -w latex -s >$THIS_TEMPDIR/$TEXNAME.tex
|
||||
(
|
||||
cd $THIS_TEMPDIR
|
||||
if ! pdflatex -interaction=batchmode $TEXNAME.tex >/dev/null 2>&1; then
|
||||
if ! pdflatex -interaction=batchmode $texname.tex >/dev/null 2>&1; then
|
||||
err "LaTeX errors:"
|
||||
from_utf8 $TEXNAME.log | sed -ne '/^!/,/^ *$/p' >&2
|
||||
if grep -q "File \`ucs.sty' not found" $TEXNAME.log; then
|
||||
err "Please install the 'unicode' package from ctan.org."
|
||||
sed -ne '/^!/,/^ *$/p' $texname.log >&2
|
||||
if grep -q "File \`ucs.sty' not found" $texname.log; then
|
||||
err "Please install the 'unicode' package from CTAN:"
|
||||
err "http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"
|
||||
fi
|
||||
if grep -q "File \`fancyvrb.sty' not found" $texname.log; then
|
||||
err "Please install the 'fancyvrb' package from CTAN:"
|
||||
err "http://www.ctan.org/tex-archive/macros/latex/contrib/fancyvrb/"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
)
|
||||
) || exit $?
|
||||
|
||||
is_target_exists=
|
||||
if [ -f "$outfile" ]; then
|
||||
if [ -f "$destname" ]; then
|
||||
is_target_exists=1
|
||||
mv -f "$outfile" "$outfile~"
|
||||
mv "$destname" "$destname~"
|
||||
fi
|
||||
|
||||
mv -f $THIS_TEMPDIR/$TEXNAME.pdf "$outfile"
|
||||
mv -f $THIS_TEMPDIR/$texname.pdf "$destname"
|
||||
|
||||
errn "Created '$outfile'"
|
||||
errn "Created $destname"
|
||||
[ -z "$is_target_exists" ] || {
|
||||
errn " (previous file has been backed up as '$outfile~')"
|
||||
errn " (previous file has been backed up as $destname~)"
|
||||
}
|
||||
err .
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
# Parse wrapper and wrappee (pandoc) arguments by taking
|
||||
# into account that they may have space or tab characters.
|
||||
pick="WRAPPER_ARGS"
|
||||
while [ $# -gt 0 ]; do
|
||||
if [ "$pick" = "WRAPPER_ARGS" ]; then
|
||||
case "$1" in
|
||||
-*) pick="WRAPPEE_ARGS" ;;
|
||||
esac
|
||||
fi
|
||||
# Pack args with NEWLINE to preserve spaces,
|
||||
# and put them into the picked variable.
|
||||
eval "$pick=\"\$${pick}${NEWLINE}${1}\""
|
||||
shift
|
||||
done
|
||||
|
||||
# Unpack filename arguments. Now "$@" will hold the filenames.
|
||||
oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPER_ARGS; IFS="$oldifs"
|
|
@ -1,7 +0,0 @@
|
|||
# Ensure to work with a single argument.
|
||||
if [ $# -gt 1 ]; then
|
||||
first_arg="$1"
|
||||
shift
|
||||
err "Warning: extra arguments '$@' will be ignored."
|
||||
set -- $first_arg
|
||||
fi
|
|
@ -1,141 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
THIS=$1
|
||||
|
||||
ASH="ash -s"
|
||||
BASH="bash --posix -s"
|
||||
DASH="dash -s"
|
||||
KSH="ksh -s"
|
||||
POSH="posh -s"
|
||||
ZSH="zsh -s"
|
||||
|
||||
ERROR=""
|
||||
|
||||
wrapper () {
|
||||
$SH -- "$@" <<-'EOF'
|
||||
### common.sh
|
||||
|
||||
outfile=
|
||||
while getopts o: opt; do
|
||||
case $opt in
|
||||
o) outfile="$OPTARG" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
shift $(($OPTIND - 1))
|
||||
|
||||
### postopts.sh
|
||||
|
||||
echo "Options passed to wrapper:"
|
||||
[ -z "$outfile" ] || echo "|$outfile|"
|
||||
|
||||
echo "Arguments passed to wrapper:"
|
||||
for arg; do
|
||||
echo "|$arg|"
|
||||
done
|
||||
|
||||
pandoc () {
|
||||
echo "Arguments passed to wrappee:"
|
||||
for arg; do
|
||||
echo "|$arg|"
|
||||
done
|
||||
}
|
||||
runpandoc
|
||||
EOF
|
||||
}
|
||||
|
||||
# Portable which(1).
|
||||
pathfind () {
|
||||
oldifs="$IFS"; IFS=':'
|
||||
for _p in $PATH; do
|
||||
if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then
|
||||
IFS="$oldifs"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
IFS="$oldifs"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_results () {
|
||||
if [ "$1" = "$2" ]; then
|
||||
echo >&2 ok
|
||||
return 0
|
||||
else
|
||||
echo >&2 failed
|
||||
sed "s/^/\t/" >&2 <<EOF
|
||||
Command line: '$3'
|
||||
===> Expected:
|
||||
$2
|
||||
<=== Got:
|
||||
$1
|
||||
EOF
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
for SH in "$BASH" "$DASH" "$KSH" "$ZSH"; do
|
||||
CMD=${SH%% *}
|
||||
echo >&2 " Testing with $CMD..."
|
||||
if pathfind "$CMD"; then
|
||||
if [ "$CMD" = "zsh" ]; then
|
||||
# Zsh needs to be called as 'sh' to enable POSIX mode.
|
||||
ln -s $(which zsh) ./sh
|
||||
SH="./sh ${SH#* }"
|
||||
trap 'err=$?; rm -f ./sh; exit $err' 0 1 2 3 13 15
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
# Test 1
|
||||
printf >&2 " test case 1... "
|
||||
actual=$(wrapper -o "output file" "foo bar" -A "quux baz" -B)
|
||||
expected=$(cat <<'EOF'
|
||||
Options passed to wrapper:
|
||||
|output file|
|
||||
Arguments passed to wrapper:
|
||||
|foo bar|
|
||||
Arguments passed to wrappee:
|
||||
|-A|
|
||||
|quux baz|
|
||||
|-B|
|
||||
EOF
|
||||
)
|
||||
check_results "$actual" "$expected" \
|
||||
'wrapper -o "output file" "foo bar" -A "quux baz" -B'
|
||||
|
||||
# Test 2
|
||||
printf >&2 " test case 2... "
|
||||
actual=$(wrapper -- -A "foo bar")
|
||||
expected=$(cat <<'EOF'
|
||||
Options passed to wrapper:
|
||||
Arguments passed to wrapper:
|
||||
Arguments passed to wrappee:
|
||||
|-A|
|
||||
|foo bar|
|
||||
EOF
|
||||
)
|
||||
check_results "$actual" "$expected" 'wrapper -- -A "foo bar"'
|
||||
|
||||
# Test 3 (Test 1 with a redundant '--')
|
||||
printf >&2 " test case 3... "
|
||||
actual=$(wrapper -o "output file" "foo bar" -- -A "quux baz" -B)
|
||||
expected=$(cat <<'EOF'
|
||||
Options passed to wrapper:
|
||||
|output file|
|
||||
Arguments passed to wrapper:
|
||||
|foo bar|
|
||||
Arguments passed to wrappee:
|
||||
|-A|
|
||||
|quux baz|
|
||||
|-B|
|
||||
EOF
|
||||
)
|
||||
check_results "$actual" "$expected" \
|
||||
'wrapper -o "output file" "foo bar" -- -A "quux baz" -B'
|
||||
else
|
||||
echo >&2 "Warning: cannot verify correctness with $CMD; shell not available"
|
||||
fi
|
||||
done
|
||||
|
||||
exit 0
|
173
src/wrappers/web2markdown.in
Normal file
173
src/wrappers/web2markdown.in
Normal file
|
@ -0,0 +1,173 @@
|
|||
#!/bin/sh -e
|
||||
# converts HTML from a URL, file, or stdin to markdown
|
||||
# uses an available program to fetch URL and tidy to normalize it first
|
||||
|
||||
REQUIRED="tidy html2markdown"
|
||||
|
||||
### common.sh
|
||||
|
||||
grab_url_with () {
|
||||
url="${1:?internal error: grab_url_with: url required}"
|
||||
|
||||
shift
|
||||
cmdline="$@"
|
||||
|
||||
prog=
|
||||
prog_opts=
|
||||
if [ -n "$cmdline" ]; then
|
||||
eval "set -- $cmdline"
|
||||
prog=$1
|
||||
shift
|
||||
prog_opts="$@"
|
||||
fi
|
||||
|
||||
if [ -z "$prog" ]; then
|
||||
# Locate a sensible web grabber (note the order).
|
||||
for p in wget lynx w3m curl links w3c; do
|
||||
if pathfind $p; then
|
||||
prog=$p
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
[ -n "$prog" ] || {
|
||||
errn "$THIS: Couldn't find a program to fetch the file from URL "
|
||||
err "(e.g. wget, w3m, lynx, w3c, or curl)."
|
||||
return 1
|
||||
}
|
||||
else
|
||||
pathfind "$prog" || {
|
||||
err "$THIS: No such web grabber '$prog' found; aborting."
|
||||
return 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Setup proper base options for known grabbers.
|
||||
base_opts=
|
||||
case "$prog" in
|
||||
wget) base_opts="-O-" ;;
|
||||
lynx) base_opts="-source" ;;
|
||||
w3m) base_opts="-dump_source" ;;
|
||||
curl) base_opts="" ;;
|
||||
links) base_opts="-source" ;;
|
||||
w3c) base_opts="-n -get" ;;
|
||||
*) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
|
||||
esac
|
||||
|
||||
err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
|
||||
eval "set -- $base_opts $prog_opts"
|
||||
$prog "$@" "$url"
|
||||
}
|
||||
|
||||
add_option () {
|
||||
options="$options$NEWLINE$1"
|
||||
}
|
||||
|
||||
options=
|
||||
argument=
|
||||
encoding=
|
||||
grabber=
|
||||
|
||||
# Parse command-line arguments
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2
|
||||
err " -e ENCODING, --encoding=ENCODING"
|
||||
err " Specify character encoding of input"
|
||||
err " -g COMMAND, --grabber=COMMAND"
|
||||
err " Specify command to be used to grab contents of URL"
|
||||
exit 0 ;;
|
||||
-v|--version)
|
||||
html2markdown -v
|
||||
exit 0 ;;
|
||||
-e)
|
||||
shift
|
||||
encoding=$1 ;;
|
||||
--encoding=*)
|
||||
wholeopt=$1
|
||||
# extract encoding from after =
|
||||
encoding=${wholeopt#*=} ;;
|
||||
-g)
|
||||
shift
|
||||
grabber=$1 ;;
|
||||
--grabber=*)
|
||||
wholeopt=$1
|
||||
# extract encoding from after =
|
||||
grabber=${wholeopt#*=} ;;
|
||||
-o|--output|-b|--tab-stop|-H|--include-in-header| \
|
||||
-A|--include-after-body|-C|-B|--include-before-body| \
|
||||
-C|--custom-header|-T|--title-prefix)
|
||||
add_option $1
|
||||
shift
|
||||
add_option $1 ;;
|
||||
-*) add_option $1 ;;
|
||||
*)
|
||||
if [ -z "$argument" ]; then
|
||||
argument=$1
|
||||
else
|
||||
err "Warning: extra argument '$1' will be ignored."
|
||||
fi ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Unpack options. Now "$@" will hold the html2markdown options.
|
||||
oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs"
|
||||
|
||||
inurl=
|
||||
if [ -n "$argument" ] && ! [ -f "$argument" ]; then
|
||||
# Treat given argument as an URL.
|
||||
inurl="$argument"
|
||||
fi
|
||||
|
||||
if [ -n "$inurl" ]; then
|
||||
err "Attempting to fetch file from '$inurl'..."
|
||||
|
||||
### tempdir.sh
|
||||
|
||||
grabber_out=$THIS_TEMPDIR/grabber.out
|
||||
grabber_log=$THIS_TEMPDIR/grabber.log
|
||||
if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
|
||||
errn "grab_url_with failed"
|
||||
if [ -f $grabber_log ]; then
|
||||
err " with the following error log."
|
||||
err
|
||||
cat >&2 $grabber_log
|
||||
else
|
||||
err .
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
argument="$grabber_out"
|
||||
fi
|
||||
|
||||
if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then
|
||||
# Try to determine character encoding if not specified
|
||||
# and input is not STDIN.
|
||||
encoding=$(
|
||||
head "$argument" |
|
||||
LC_ALL=C tr 'A-Z' 'a-z' |
|
||||
sed -ne '/<meta .*content-type.*charset=/ {
|
||||
s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
|
||||
}'
|
||||
)
|
||||
fi
|
||||
|
||||
if [ -n "$encoding" ] && pathfind iconv; then
|
||||
alias to_utf8='iconv -f "$encoding" -t utf-8'
|
||||
else # assume UTF-8
|
||||
alias to_utf8='cat'
|
||||
fi
|
||||
|
||||
if [ -z "$argument" ]; then
|
||||
tidy -utf8 2>/dev/null | html2markdown "$@"
|
||||
else
|
||||
if [ -f "$argument" ]; then
|
||||
to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@"
|
||||
else
|
||||
err "File '$argument' not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
Loading…
Reference in a new issue