+ Removed the convenience symlinks (which don't work on Windows under
Cygwin, due to Windows' lack of true symbolic links). + Modified the wrappers to use 'pandoc' instead of the symlinks. + Modified the Makefile to remove all references to the symlinks. + Removed code from Main.hs that made pandoc's behavior depend on the name of the calling program. + Added code to Main.hs that sets default reader and writer based on extensions of input and output filenames (if provided). (Thanks to roktas for the idea.) + Modified README and man pages accordingly. + Removed WINDOWS-README target from Makefile. It is no longer needed now that we don't have the symlinks. git-svn-id: https://pandoc.googlecode.com/svn/trunk@295 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
a1539d9ab8
commit
a1a30d69bd
16 changed files with 198 additions and 231 deletions
19
Makefile
19
Makefile
|
@ -26,8 +26,6 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL).in)
|
|||
# Install targets
|
||||
#-------------------------------------------------------------------------------
|
||||
WRAPPERS := web2markdown markdown2pdf
|
||||
SYMLINKS := markdown2html markdown2latex markdown2s5 markdown2rst \
|
||||
markdown2rtf html2markdown latex2markdown rst2markdown
|
||||
# Add .exe extensions if we're running Windows/Cygwin.
|
||||
EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \
|
||||
sed -ne 's/^cygwin.*$$/\.exe/p')
|
||||
|
@ -96,12 +94,6 @@ all: build-program
|
|||
templates: $(SRCDIR)/templates
|
||||
$(MAKE) -C $(SRCDIR)/templates
|
||||
|
||||
.PHONY: symlinks
|
||||
cleanup_files+=$(SYMLINKS)
|
||||
symlinks: $(SYMLINKS)
|
||||
$(SYMLINKS): $(MAIN)
|
||||
ln -sf ./$(MAIN) $@
|
||||
|
||||
define generate-shell-script
|
||||
echo "Generating $@..."; \
|
||||
awk ' \
|
||||
|
@ -141,7 +133,7 @@ build: configure
|
|||
$(BUILDCMD) build
|
||||
|
||||
.PHONY: build-exec
|
||||
build-exec: $(PROGS) $(SYMLINKS)
|
||||
build-exec: $(PROGS)
|
||||
cleanup_files+=$(EXECS)
|
||||
$(EXECS): build
|
||||
for f in $@; do \
|
||||
|
@ -201,9 +193,8 @@ install-exec: build-exec
|
|||
fi; \
|
||||
$(INSTALL_PROGRAM) $$f $(BINPATH)/; \
|
||||
done
|
||||
cd $(BINPATH); for f in $(SYMLINKS); do ln -sf $(MAIN) $$f; done
|
||||
uninstall-exec:
|
||||
-for f in $(notdir $(PROGS) $(SYMLINKS)); do rm -f $(BINPATH)/$$f; done ;
|
||||
-for f in $(notdir $(PROGS)); do rm -f $(BINPATH)/$$f; done ;
|
||||
|
||||
# Program + user documents installation.
|
||||
.PHONY: install-program uninstall-program
|
||||
|
@ -295,15 +286,11 @@ $(osx_dmg_name): $(osx_pkg_name)
|
|||
|
||||
.PHONY: win-pkg
|
||||
win_pkg_name:=$(RELNAME).zip
|
||||
win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README-WINDOWS.txt README-WINDOWS.html
|
||||
win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README.txt README.html
|
||||
cleanup_files+=$(win_pkg_name) $(win_docs)
|
||||
win-pkg: $(win_pkg_name)
|
||||
$(win_pkg_name): $(THIS).exe $(win_docs)
|
||||
zip -r $(win_pkg_name) $(THIS).exe $(win_docs)
|
||||
cleanup_files+=README-WINDOWS
|
||||
README-WINDOWS: README
|
||||
sed -e '/^Requirements/,/^\[fancyvrb\]:/ d' \
|
||||
-e '/^Character encodings/,/mysite.com$$/ d' $< > $@
|
||||
|
||||
.PHONY: test test-markdown
|
||||
test: $(MAIN)
|
||||
|
|
139
README
139
README
|
@ -36,14 +36,11 @@ Requirements
|
|||
============
|
||||
|
||||
The `pandoc` program itself does not depend on any external libraries
|
||||
or programs. The convenience programs `markdown2html`, `markdown2latex`,
|
||||
`markdown2rst`, `markdown2rtf`, `markdown2s5`, `html2markdown`,
|
||||
`latex2markdown`, and `rst2markdown` are implemented as symbolic links to
|
||||
`pandoc`.
|
||||
or programs.
|
||||
|
||||
The wrapper script `web2markdown` requires
|
||||
|
||||
- `html2markdown` (included with Pandoc)
|
||||
- `pandoc` (which must be in the PATH)
|
||||
- a POSIX-compliant shell (installed by default on all linux and unix
|
||||
systems, including Mac OS X, and in [Cygwin] for Windows),
|
||||
- `HTML Tidy`
|
||||
|
@ -56,7 +53,7 @@ The wrapper script `web2markdown` requires
|
|||
|
||||
The wrapper script `markdown2pdf` requires
|
||||
|
||||
- `markdown2latex` (included with Pandoc)
|
||||
- `pandoc` (which must be in the PATH)
|
||||
- a POSIX-compliant shell
|
||||
- `pdflatex`, which should be part of any [LaTeX] distribution
|
||||
- the [unicode] and [fancyvrb] LaTeX packages, which are included
|
||||
|
@ -80,47 +77,11 @@ Using Pandoc
|
|||
|
||||
If you run `pandoc` without arguments, it will accept input from
|
||||
STDIN. If you run it with file names as arguments, it will take input
|
||||
from those files. It accepts several command-line options. For a
|
||||
list, type
|
||||
|
||||
pandoc -h
|
||||
|
||||
The most important options specify the format of the source file and
|
||||
the output. The default reader is markdown; the default writer is
|
||||
HTML. So if you don't specify a reader or writer, `pandoc` will
|
||||
convert markdown to HTML. For example,
|
||||
|
||||
pandoc hello.txt
|
||||
|
||||
will convert `hello.txt` from markdown to HTML. For other conversions,
|
||||
you must specify a reader and/or a writer using the `-r` and `-w`
|
||||
flags. To convert markdown to LaTeX, you would write:
|
||||
|
||||
pandoc -w latex hello.txt
|
||||
|
||||
To convert html to markdown:
|
||||
|
||||
pandoc -r html -w markdown hello.txt
|
||||
|
||||
Supported writers include `markdown`, `latex`, `html`, `rtf` (rich text
|
||||
format), `rst` (reStructuredText), and `s5` (which produces an HTML
|
||||
file that acts like powerpoint). Supported readers include `markdown`,
|
||||
`html`, `latex`, and `rst`. Note that the `rst` reader only parses
|
||||
a subset of reStructuredText syntax. For example, it doesn't handle
|
||||
tables, definition lists, option lists, or footnotes. It handles only the
|
||||
constructs expressible in unextended markdown. But for simple documents
|
||||
it should be adequate. The `latex` and `html` readers are also limited
|
||||
in what they can do. Because the `html` reader is picky about the HTML
|
||||
it parses, it is recommended that you pipe HTML through [HTML Tidy] before
|
||||
sending it to `pandoc`, or use the `web2markdown` script described below.
|
||||
|
||||
By default, `pandoc` writes its output to STDOUT. If you want to
|
||||
write to a file, use the `-o` option or shell redirection:
|
||||
from those files. By default, `pandoc` writes its output to STDOUT.
|
||||
If you want to write to a file, use the `-o` option:
|
||||
|
||||
pandoc -o hello.html hello.txt
|
||||
|
||||
pandoc hello.txt > hello.html
|
||||
|
||||
Note that you can specify multiple input files on the command line.
|
||||
`pandoc` will concatenate them all (with blank lines between them)
|
||||
before parsing:
|
||||
|
@ -131,6 +92,44 @@ before parsing:
|
|||
with a proper header, rather than a fragment. For more details on this
|
||||
and many other command-line options, see below.)
|
||||
|
||||
The format of the input and output can be specified explicitly using
|
||||
command-line options. The input format can be specified using the
|
||||
`-r/--read` or `-f/--from` options, the output format using the
|
||||
`-w/--write` or `-t/--to` options. Thus, to convert `hello.txt` from
|
||||
markdown to LaTeX, you could type:
|
||||
|
||||
pandoc -f markdown -t latex hello.txt
|
||||
|
||||
To convert `hello.html` from html to markdown:
|
||||
|
||||
pandoc -f html -t markdown hello.html
|
||||
|
||||
Supported output formats include `markdown`, `latex`, `html`, `rtf`
|
||||
(rich text format), `rst` (reStructuredText), and `s5` (which produces
|
||||
an HTML file that acts like powerpoint). Supported input formats
|
||||
include `markdown`, `html`, `latex`, and `rst`. Note that the `rst`
|
||||
reader only parses a subset of reStructuredText syntax. For example,
|
||||
it doesn't handle tables, definition lists, option lists, or footnotes.
|
||||
It handles only the constructs expressible in unextended markdown.
|
||||
But for simple documents it should be adequate. The `latex` and `html`
|
||||
readers are also limited in what they can do. Because the `html`
|
||||
reader is picky about the HTML it parses, it is recommended that you
|
||||
pipe HTML through [HTML Tidy] before sending it to `pandoc`, or use the
|
||||
`web2markdown` script described below.
|
||||
|
||||
If you don't specify a reader or writer explicitly, `pandoc` will
|
||||
try to determine the input and output format from the extensions of
|
||||
the input and output filenames. Thus, for example,
|
||||
|
||||
pandoc -o hello.tex hello.txt
|
||||
|
||||
will convert `hello.txt` from markdown to LaTeX. If no output file
|
||||
is specified (so that output goes to STDOUT), or if the output file's
|
||||
extension is unknown, the output format will default to HTML.
|
||||
If no input file is specified (so that input comes from STDIN), or
|
||||
if the input files' extensions are unknown, the input format will
|
||||
be assumed to be markdown unless explicitly specified.
|
||||
|
||||
Character encodings
|
||||
-------------------
|
||||
|
||||
|
@ -150,31 +149,16 @@ The shell scripts (described below) automatically convert the input
|
|||
from the local encoding to UTF-8 before running them through `pandoc`,
|
||||
then convert the output back to the local encoding.
|
||||
|
||||
Convenience programs and wrapper scripts
|
||||
========================================
|
||||
`markdown2pdf` and `web2markdown`
|
||||
=================================
|
||||
|
||||
For convenience, eight variant programs are included with Pandoc:
|
||||
`markdown2html` (which is equivalent to `pandoc -w html`),
|
||||
`markdown2latex` (equivalent to `pandoc -w latex`), `markdown2rst`
|
||||
(equivalent to `pandoc -w rst`), `markdown2rtf` (equivalent to
|
||||
`pandoc -w rtf`), `markdown2s5` (equivalent to `pandoc -w s5`),
|
||||
`html2markdown` (equivalent to `pandoc -r html -w markdown`),
|
||||
`latex2markdown` (equivalent to `pandoc -r latex -w markdown`), and
|
||||
`rst2markdown` (equivalent to `pandoc -r rst -w markdown`). These
|
||||
programs take an appropriately restricted subset of `pandoc`'s
|
||||
options. (Run them with the `-h` flag for a full list of allowed
|
||||
options.)
|
||||
|
||||
Like `pandoc`, all of these programs produce fragments by default.
|
||||
If you want to produce a standalone file, complete with a header
|
||||
and footer appropriate to the format, use the `-s` option:
|
||||
|
||||
markdown2latex -s sample.txt > sample.tex
|
||||
|
||||
Two shell scripts have also been included:
|
||||
Two shell scripts, `markdown2pdf` and `web2markdown`, are included in
|
||||
the standard Pandoc installation. (They are not included in the Windows
|
||||
binary package, as they require a POSIX shell, but they may be used
|
||||
in Windows under Cygwin.)
|
||||
|
||||
1. `markdown2pdf` produces a PDF file from markdown-formatted
|
||||
text, using `markdown2latex` and `pdflatex`. The default
|
||||
text, using `pandoc` and `pdflatex`. The default
|
||||
behavior of `markdown2pdf` is to create a file with the same
|
||||
base name as the first argument and the extension `pdf`; thus,
|
||||
for example,
|
||||
|
@ -190,7 +174,7 @@ Two shell scripts have also been included:
|
|||
If no input file is specified, input will be taken from STDIN.
|
||||
|
||||
2. `web2markdown` grabs a web page from a file or URL and converts
|
||||
it to markdown-formatted text, using `tidy` and `html2markdown`.
|
||||
it to markdown-formatted text, using `tidy` and `pandoc`.
|
||||
Unless input is from STDIN, an attempt is made to determine the
|
||||
character encoding of the page from the "Content-type" meta tag.
|
||||
If this is not present, UTF-8 is assumed. Alternatively, a character
|
||||
|
@ -207,9 +191,20 @@ Command-line options
|
|||
====================
|
||||
|
||||
Various command-line options can be used to customize the output.
|
||||
For a complete list, type
|
||||
|
||||
pandoc --help
|
||||
`-f`, `--from`, `-r`, or `--read` can be used to specify the input
|
||||
format -- the format Pandoc will be converting *from*. Available
|
||||
formats are `native`, `markdown`, `rst`, `html`, and `latex`.
|
||||
|
||||
`-t`, `--to`, `-w`, or `--write` can be used to specify the output
|
||||
format -- the format Pandoc will be converting *to*. Available formats
|
||||
are `native`, `html`, `s5`, `latex`, `markdown`, `rst`, and `rtf`.
|
||||
|
||||
`-s` or `--standalone` indicates that a standalone document is to be
|
||||
produced (with appropriate headers and footers), rather than a fragment.
|
||||
|
||||
`-o` or `--output` specifies the name of the output file. If no output
|
||||
filename is given, output will be sent to STDOUT.
|
||||
|
||||
`-p` or `--preserve-tabs` causes tabs in the source text to be
|
||||
preserved, rather than converted to spaces (the default).
|
||||
|
@ -225,12 +220,6 @@ untranslatable HTML codes and LaTeX environments. (The LaTeX reader
|
|||
does pass through untranslatable LaTeX commands, even if `-R` is not
|
||||
specified.)
|
||||
|
||||
`-s` or `--standalone` causes `pandoc` to produce a standalone file,
|
||||
complete with appropriate document headers. By default, `pandoc`
|
||||
produces a fragment.
|
||||
|
||||
`-o` or `--output-file` can be used to specify an output file.
|
||||
|
||||
`-C` or `--custom-header` can be used to specify a custom document
|
||||
header. To see the headers used by default, use the `-D` option:
|
||||
for example, `pandoc -D html` prints the default HTML header.
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -6,14 +6,13 @@ markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex
|
|||
.SH DESCRIPTION
|
||||
\fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from standard
|
||||
input) from markdown\-formatted plain text to PDF, using \fBpdflatex\fR.
|
||||
If no output filename is specified, the name of the output file is
|
||||
derived from the input file; thus, for example, if the input file
|
||||
is \fIhello.txt\fR, the output file will be \fIhello.pdf\fR. If
|
||||
the input is read from STDIN and no output filename is
|
||||
specified, the output file will be named \fIstdin.pdf\fR. If
|
||||
multiple input files are specified, they will be concatenated before
|
||||
conversion, and the name of the output file will be derived from
|
||||
the first input file.
|
||||
If no output filename is specified (using the \fB\-o\fR option),
|
||||
the name of the output file is derived from the input file; thus, for
|
||||
example, if the input file is \fIhello.txt\fR, the output file will be
|
||||
\fIhello.pdf\fR. If the input is read from STDIN and no output filename
|
||||
is specified, the output file will be named \fIstdin.pdf\fR. If multiple
|
||||
input files are specified, they will be concatenated before conversion,
|
||||
and the name of the output file will be derived from the first input file.
|
||||
.PP
|
||||
Input is assumed to be in the UTF\-8 character encoding. If your
|
||||
local character encoding is not UTF\-8, you should pipe input and
|
||||
|
@ -21,11 +20,11 @@ output through \fBiconv\fR:
|
|||
.IP
|
||||
.B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
|
||||
.PP
|
||||
\fBmarkdown2pdf\fR assumes that the 'unicode' package
|
||||
is in latex's search path. If this package is not included in your
|
||||
latex setup, it can be obtained from <http://ctan.org>.
|
||||
\fBmarkdown2pdf\fR assumes that the 'unicode' and 'fancyvrb' packages
|
||||
are in latex's search path. If these packages are not included in your
|
||||
latex setup, they can be obtained from <http://ctan.org>.
|
||||
.PP
|
||||
\fBmarkdown2pdf\fR is a wrapper around \fBmarkdown2latex\fR.
|
||||
\fBmarkdown2pdf\fR is a wrapper around \fBpandoc\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-o FILE, \-\-output=FILE
|
||||
|
@ -37,10 +36,6 @@ Preserve tabs instead of converting them to spaces.
|
|||
.B \-\-tab-stop=\fITABSTOP\fB
|
||||
Specify tab stop (default is 4).
|
||||
.TP
|
||||
.B \-R, \-\-parse-raw
|
||||
Parse untranslatable LaTeX environments as raw LaTeX,
|
||||
instead of ignoring them.
|
||||
.TP
|
||||
.B \-N, \-\-number-sections
|
||||
Number section headings in LaTeX output. (Default is not to number them.)
|
||||
.TP
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -1,8 +1,6 @@
|
|||
.TH PANDOC 1 "December 15, 2006" Pandoc "User Manuals"
|
||||
.SH NAME
|
||||
pandoc, markdown2html, markdown2latex, markdown2rst, markdown2rtf,
|
||||
markdown2s5, html2markdown2, latex2markdown, rst2markdown \- general
|
||||
markup converter
|
||||
pandoc \- general markup converter
|
||||
.SH SYNOPSIS
|
||||
\fBpandoc\fR [\fIoptions\fR] [\fIinput\-file\fR]...
|
||||
.SH DESCRIPTION
|
||||
|
@ -13,41 +11,37 @@ slide shows.
|
|||
.PP
|
||||
If no \fIinput\-file\fR is specified, input is read from STDIN.
|
||||
Otherwise, the \fIinput\-files\fR are concatenated (with a blank
|
||||
line between each) and used as input. Output goes to standard
|
||||
output. If you want output to a file, use the \fB\-o\fR option or
|
||||
shell redirection:
|
||||
line between each) and used as input. Output goes to STDOUT by
|
||||
default. For output to a file, use the \fB\-o\fR option:
|
||||
.IP
|
||||
.B pandoc \-o output.html input.txt
|
||||
.PP
|
||||
The input and output formats may be specified using command-line options
|
||||
(see \fBOPTIONS\fR, below, for details). If these formats are not
|
||||
specified explicitly, \fIPandoc\fR will attempt to determine them
|
||||
from the extensions of the input and output filenames. If input comes
|
||||
from STDIN or from a file with an unknown extension, the input is assumed
|
||||
to be markdown. If no output filename is specified using the \fB\-o\fR
|
||||
option, or if a filename is specified but its extension is unknown,
|
||||
the output will default to HTML. Thus, for example,
|
||||
.IP
|
||||
.B pandoc input.txt > output.html
|
||||
.B pandoc -o chap1.tex chap1.txt
|
||||
.PP
|
||||
The default behavior of \fIPandoc\fR is to convert the input from
|
||||
markdown\-formatted plain text to HTML. Different input and output
|
||||
formats can be specified using command\-line options. For example,
|
||||
converts \fIchap1.txt\fR from markdown to LaTeX. And
|
||||
.IP
|
||||
.B pandoc \-f latex \-t markdown chap1.tex > chap1.txt
|
||||
.B pandoc README
|
||||
.PP
|
||||
converts \fIchap1.tex\fR from LaTeX to markdown\-formatted plain text.
|
||||
See below for a detailed list of command\-line options.
|
||||
.PP
|
||||
For convenience, eight variant programs are available:
|
||||
\fBmarkdown2html\fR (same as \fBpandoc \-w html\fR),
|
||||
\fBmarkdown2latex\fR (same as \fBpandoc \-w latex\fR),
|
||||
\fBmarkdown2rst\fR (same as \fBpandoc \-w rst\fR),
|
||||
\fBmarkdown2rtf\fR (same as \fBpandoc \-w rtf\fR),
|
||||
\fBmarkdown2s5\fR (same as \fBpandoc \-w s5\fR),
|
||||
\fBhtml2markdown\fR (same as \fBpandoc \-r html \-w markdown\fR),
|
||||
\fBlatex2markdown\fR (same as \fBpandoc \-r latex \-w markdown\fR),
|
||||
and \fBrst2markdown\fR (same as \fBpandoc \-r rst \-w markdown\fR).
|
||||
These programs take an appropriately restricted subset of \fBpandoc\fR's
|
||||
options. (Run them with the \fB-h\fR flag for a full list of allowed
|
||||
options.)
|
||||
converts \fIREADME\fR from markdown to HTML.
|
||||
.PP
|
||||
\fIPandoc\fR uses the UTF\-8 character encoding for both input and output.
|
||||
If your local character encoding is not UTF\-8, you should pipe input
|
||||
and output through \fBiconv\fR:
|
||||
.IP
|
||||
.B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
|
||||
.PP
|
||||
\fIPandoc\fR's HTML parser is not very forgiving. If your input is
|
||||
HTML, consider running it through \fBtidy\fR(1) before passing it
|
||||
to Pandoc. Or use \fBweb2markdown\fR(1), a wrapper around \fBpandoc\fR.
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
|
@ -158,9 +152,7 @@ Show usage message.
|
|||
|
||||
.SH "SEE ALSO"
|
||||
\fBweb2markdown\fR(1),
|
||||
\fBmarkdown2pdf\fR(1),
|
||||
\fBiconv\fR(1)
|
||||
|
||||
\fBmarkdown2pdf\fR(1).
|
||||
The
|
||||
.I README
|
||||
file distributed with Pandoc contains full documentation.
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
.so man1/pandoc.1
|
|
@ -16,7 +16,7 @@ option.
|
|||
from STDIN, UTF-8 is assumed. A character encoding may be specified
|
||||
explicitly using the \fB\-e\fR option.
|
||||
.PP
|
||||
\fBweb2markdown\fR is a wrapper for \fBhtml2markdown\fR.
|
||||
\fBweb2markdown\fR is a wrapper for \fBpandoc\fR.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B \-s, \-\-standalone
|
||||
|
@ -76,7 +76,6 @@ web2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
|
|||
|
||||
.SH "SEE ALSO"
|
||||
\fBpandoc\fR(1),
|
||||
\fBhtml2markdown\fR(1),
|
||||
\fBiconv\fR(1)
|
||||
.SH AUTHOR
|
||||
John MacFarlane and Recai Oktas
|
||||
|
|
157
src/Main.hs
157
src/Main.hs
|
@ -45,7 +45,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader,
|
|||
defaultRTFHeader, defaultS5Header, defaultLaTeXHeader )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Regex ( mkRegex, splitRegex )
|
||||
import Text.Regex ( mkRegex, matchRegex )
|
||||
import System ( exitWith, getArgs, getProgName )
|
||||
import System.Exit
|
||||
import System.Console.GetOpt
|
||||
|
@ -94,8 +94,8 @@ data Opt = Opt
|
|||
{ optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces
|
||||
, optTabStop :: Int -- ^ Number of spaces per tab
|
||||
, optStandalone :: Bool -- ^ If @True@, include header, footer
|
||||
, optReader :: ParserState -> String -> Pandoc -- ^ Read format
|
||||
, optWriter :: WriterOptions -> Pandoc -> String -- ^ Write fmt
|
||||
, optReader :: String -- ^ Reader format
|
||||
, optWriter :: String -- ^ Writer format
|
||||
, optParseRaw :: Bool -- ^ If @True@, parse unconvertable
|
||||
-- HTML and TeX
|
||||
, optCSS :: String -- ^ CSS file to link to
|
||||
|
@ -103,64 +103,55 @@ data Opt = Opt
|
|||
, optIncludeBeforeBody :: String -- ^ File to include at top of body
|
||||
, optIncludeAfterBody :: String -- ^ File to include at end of body
|
||||
, optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
|
||||
, optDefaultHeader :: String -- ^ Default header
|
||||
, optTitlePrefix :: String -- ^ Optional prefix for HTML title
|
||||
, optOutputFile :: String -- ^ Name of output file
|
||||
, optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
|
||||
, optIncremental :: Bool -- ^ If @True@, incremental lists in S5
|
||||
, optSmart :: Bool -- ^ If @True@, use smart typography
|
||||
, optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML
|
||||
, optShowUsage :: Bool -- ^ If @True@, show usage message
|
||||
, optDebug :: Bool -- ^ If @True@, output debug messages
|
||||
}
|
||||
|
||||
-- | Defaults for command-line options.
|
||||
startOpt :: Opt
|
||||
startOpt = Opt
|
||||
defaultOpts :: Opt
|
||||
defaultOpts = Opt
|
||||
{ optPreserveTabs = False
|
||||
, optTabStop = 4
|
||||
, optStandalone = False
|
||||
, optReader = readMarkdown
|
||||
, optWriter = writeHtml
|
||||
, optReader = "" -- null for default reader
|
||||
, optWriter = "" -- null for default writer
|
||||
, optParseRaw = False
|
||||
, optCSS = ""
|
||||
, optIncludeInHeader = ""
|
||||
, optIncludeBeforeBody = ""
|
||||
, optIncludeAfterBody = ""
|
||||
, optCustomHeader = "DEFAULT"
|
||||
, optDefaultHeader = defaultHtmlHeader
|
||||
, optTitlePrefix = ""
|
||||
, optOutputFile = "" -- null for stdout
|
||||
, optNumberSections = False
|
||||
, optIncremental = False
|
||||
, optSmart = False
|
||||
, optASCIIMathML = False
|
||||
, optShowUsage = False
|
||||
, optDebug = False
|
||||
}
|
||||
|
||||
-- | A list of functions, each transforming the options data structure in response
|
||||
-- to a command-line option.
|
||||
allOptions :: [OptDescr (Opt -> IO Opt)]
|
||||
allOptions =
|
||||
-- | A list of functions, each transforming the options data structure
|
||||
-- in response to a command-line option.
|
||||
options :: [OptDescr (Opt -> IO Opt)]
|
||||
options =
|
||||
[ Option "fr" ["from","read"]
|
||||
(ReqArg
|
||||
(\arg opt -> case (lookup (map toLower arg) readers) of
|
||||
Just reader -> return opt { optReader = reader }
|
||||
Nothing -> error ("Unknown reader: " ++ arg) )
|
||||
(\arg opt -> return opt { optReader = map toLower arg })
|
||||
"FORMAT")
|
||||
("Source format (" ++
|
||||
(concatMap (\(name, fn) -> " " ++ name) readers) ++ " )")
|
||||
("Input format (" ++ (joinWithSep ", " (map fst readers)) ++
|
||||
")")
|
||||
|
||||
, Option "tw" ["to","write"]
|
||||
(ReqArg
|
||||
(\arg opt -> case (lookup (map toLower arg) writers) of
|
||||
Just (writer, defaultHeader) ->
|
||||
return opt { optWriter = writer,
|
||||
optDefaultHeader = defaultHeader }
|
||||
Nothing -> error ("Unknown writer: " ++ arg) )
|
||||
(\arg opt -> return opt { optWriter = map toLower arg })
|
||||
"FORMAT")
|
||||
("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )")
|
||||
("Output format (" ++ (joinWithSep ", " (map fst writers)) ++
|
||||
")")
|
||||
|
||||
, Option "s" ["standalone"]
|
||||
(NoArg
|
||||
|
@ -169,8 +160,7 @@ allOptions =
|
|||
|
||||
, Option "o" ["output"]
|
||||
(ReqArg
|
||||
(\arg opt -> do
|
||||
return opt { optOutputFile = arg })
|
||||
(\arg opt -> return opt { optOutputFile = arg })
|
||||
"FILENAME")
|
||||
"Name of output file"
|
||||
|
||||
|
@ -286,57 +276,66 @@ allOptions =
|
|||
|
||||
, Option "h" ["help"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optShowUsage = True }))
|
||||
(\_ -> do
|
||||
prg <- getProgName
|
||||
hPutStr stderr (reformatUsageInfo $
|
||||
usageInfo (prg ++ " [OPTIONS] [FILES]") options)
|
||||
exitWith $ ExitFailure 2))
|
||||
"Show help"
|
||||
]
|
||||
|
||||
-- parse name of calling program and return default reader and writer descriptions
|
||||
parseProgName name =
|
||||
case (splitRegex (mkRegex "2") (map toLower name)) of
|
||||
[from, to] -> (from, to)
|
||||
_ -> ("markdown", "html")
|
||||
|
||||
-- set default options based on reader and writer descriptions; start is starting options
|
||||
setDefaultOpts from to start =
|
||||
case ((lookup from readers), (lookup to writers)) of
|
||||
(Just reader, Just (writer, header)) -> start {optReader = reader,
|
||||
optWriter = writer,
|
||||
optDefaultHeader = header}
|
||||
_ -> start
|
||||
|
||||
-- True if single-letter option is in option list
|
||||
inOptList :: [Char] -> OptDescr (Opt -> IO Opt) -> Bool
|
||||
inOptList list desc =
|
||||
let (Option letters _ _ _) = desc in
|
||||
any (\x -> x `elem` list) letters
|
||||
|
||||
-- Reformat usage message so it doesn't wrap illegibly
|
||||
reformatUsageInfo :: String -> String
|
||||
reformatUsageInfo = gsub " *--" " --" .
|
||||
gsub "(-[A-Za-z0-9]) *--" "\\1, --" .
|
||||
gsub " *([^- ])" "\n\t\\1"
|
||||
|
||||
-- Determine default reader based on source file extensions
|
||||
defaultReaderName :: [String] -> String
|
||||
defaultReaderName [] = "markdown"
|
||||
defaultReaderName (x:xs) =
|
||||
let x' = map toLower x in
|
||||
case (matchRegex (mkRegex ".*\\.(.*)") x') of
|
||||
Nothing -> defaultReaderName xs -- no extension
|
||||
Just ["xhtml"] -> "html"
|
||||
Just ["html"] -> "html"
|
||||
Just ["htm"] -> "html"
|
||||
Just ["tex"] -> "latex"
|
||||
Just ["latex"] -> "latex"
|
||||
Just ["ltx"] -> "latex"
|
||||
Just ["rst"] -> "rst"
|
||||
Just ["native"] -> "native"
|
||||
Just _ -> "markdown"
|
||||
|
||||
-- Determine default writer based on output file extension
|
||||
defaultWriterName :: String -> String
|
||||
defaultWriterName "" = "html" -- no output file
|
||||
defaultWriterName x =
|
||||
let x' = map toLower x in
|
||||
case (matchRegex (mkRegex ".*\\.(.*)") x') of
|
||||
Nothing -> "markdown" -- no extension
|
||||
Just [""] -> "markdown" -- empty extension
|
||||
Just ["tex"] -> "latex"
|
||||
Just ["latex"] -> "latex"
|
||||
Just ["ltx"] -> "latex"
|
||||
Just ["rtf"] -> "rtf"
|
||||
Just ["rst"] -> "rst"
|
||||
Just ["s5"] -> "s5"
|
||||
Just ["native"] -> "native"
|
||||
Just ["txt"] -> "markdown"
|
||||
Just ["text"] -> "markdown"
|
||||
Just ["md"] -> "markdown"
|
||||
Just ["markdown"] -> "markdown"
|
||||
Just _ -> "html"
|
||||
|
||||
main = do
|
||||
|
||||
name <- getProgName
|
||||
let (from, to) = parseProgName name
|
||||
|
||||
let irrelevantOptions = if not ('2' `elem` name)
|
||||
then ""
|
||||
else "frtwD" ++
|
||||
(if (to /= "html" && to /= "s5") then "SmcT" else "") ++
|
||||
(if (to /= "latex") then "N" else "") ++
|
||||
(if (to /= "s5") then "i" else "") ++
|
||||
(if (from /= "html" && from /= "latex") then "R" else "")
|
||||
|
||||
let options = filter (not . inOptList irrelevantOptions) allOptions
|
||||
|
||||
let defaultOpts = setDefaultOpts from to startOpt
|
||||
|
||||
args <- getArgs
|
||||
let (actions, sources, errors) = getOpt Permute options args
|
||||
|
||||
if (not (null errors))
|
||||
then do
|
||||
name <- getProgName
|
||||
mapM (\e -> hPutStrLn stderr e) errors
|
||||
hPutStrLn stderr (reformatUsageInfo $
|
||||
usageInfo (name ++ " [OPTIONS] [FILES]") options)
|
||||
|
@ -350,30 +349,39 @@ main = do
|
|||
let Opt { optPreserveTabs = preserveTabs
|
||||
, optTabStop = tabStop
|
||||
, optStandalone = standalone
|
||||
, optReader = reader
|
||||
, optWriter = writer
|
||||
, optReader = readerName
|
||||
, optWriter = writerName
|
||||
, optParseRaw = parseRaw
|
||||
, optCSS = css
|
||||
, optIncludeInHeader = includeHeader
|
||||
, optIncludeBeforeBody = includeBefore
|
||||
, optIncludeAfterBody = includeAfter
|
||||
, optCustomHeader = customHeader
|
||||
, optDefaultHeader = defaultHeader
|
||||
, optTitlePrefix = titlePrefix
|
||||
, optOutputFile = outputFile
|
||||
, optNumberSections = numberSections
|
||||
, optIncremental = incremental
|
||||
, optSmart = smart
|
||||
, optASCIIMathML = asciiMathML
|
||||
, optShowUsage = showUsage
|
||||
, optDebug = debug
|
||||
} = opts
|
||||
|
||||
if showUsage
|
||||
then do
|
||||
hPutStr stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options)
|
||||
exitWith $ ExitFailure 2
|
||||
else return ()
|
||||
-- assign reader and writer based on options and filenames
|
||||
let readerName' = if null readerName
|
||||
then defaultReaderName sources
|
||||
else readerName
|
||||
|
||||
let writerName' = if null writerName
|
||||
then defaultWriterName outputFile
|
||||
else writerName
|
||||
|
||||
reader <- case (lookup readerName' readers) of
|
||||
Just r -> return r
|
||||
Nothing -> error ("Unknown reader: " ++ readerName')
|
||||
|
||||
(writer, defaultHeader) <- case (lookup writerName' writers) of
|
||||
Just (w,h) -> return (w, h)
|
||||
Nothing -> error ("Unknown writer: " ++ writerName')
|
||||
|
||||
output <- if ((null outputFile) || debug)
|
||||
then return stdout
|
||||
|
@ -385,7 +393,6 @@ main = do
|
|||
hPutStr stderr $ concatMap (\s -> "INPUT=" ++ s ++ "\n") sources
|
||||
else return ()
|
||||
|
||||
let writingS5 = (defaultHeader == defaultS5Header)
|
||||
let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop)
|
||||
let addBlank str = str ++ "\n\n"
|
||||
let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings
|
||||
|
@ -407,7 +414,7 @@ main = do
|
|||
writerTitlePrefix = titlePrefix,
|
||||
writerSmart = smart,
|
||||
writerTabStop = tabStop,
|
||||
writerS5 = writingS5,
|
||||
writerS5 = (writerName=="s5"),
|
||||
writerIncremental = incremental,
|
||||
writerNumberSections = numberSections,
|
||||
writerIncludeBefore = includeBefore,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/sh -e
|
||||
|
||||
REQUIRED="markdown2latex pdflatex"
|
||||
REQUIRED="pdflatex"
|
||||
|
||||
### common.sh
|
||||
|
||||
|
@ -9,9 +9,12 @@ REQUIRED="markdown2latex pdflatex"
|
|||
texname=output
|
||||
logfile=$THIS_TEMPDIR/log
|
||||
|
||||
if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then
|
||||
[ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \
|
||||
-e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2
|
||||
if ! pandoc -s -d -r markdown -w latex "$@" >$THIS_TEMPDIR/$texname.tex \
|
||||
2>$logfile; then
|
||||
[ -f $logfile ] && sed -e 's/^pandoc/markdown2pdf/g' \
|
||||
-e '/^INPUT=/d' -e '/^OUTPUT=/d' \
|
||||
-e '/^[[:space:]]*\(-f\|-t\|-s\|-R\|-S\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\
|
||||
-e 's/(implies -s)//g' $logfile >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# converts HTML from a URL, file, or stdin to markdown
|
||||
# uses an available program to fetch URL and tidy to normalize it first
|
||||
|
||||
REQUIRED="tidy html2markdown"
|
||||
REQUIRED="tidy"
|
||||
|
||||
### common.sh
|
||||
|
||||
|
@ -72,14 +72,16 @@ grabber=
|
|||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2
|
||||
pandoc -h 2>&1 | sed -e 's/pandoc/web2markdown/' \
|
||||
-e '/^[[:space:]]*\(-f\|-t\|-S\|-N\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\
|
||||
1>&2
|
||||
err " -e ENCODING, --encoding=ENCODING"
|
||||
err " Specify character encoding of input"
|
||||
err " -g COMMAND, --grabber=COMMAND"
|
||||
err " Specify command to be used to grab contents of URL"
|
||||
exit 0 ;;
|
||||
-v|--version)
|
||||
html2markdown -v
|
||||
pandoc -v 2>&1 | sed -e 's/pandoc/web2markdown/' 1>&2
|
||||
exit 0 ;;
|
||||
-e)
|
||||
shift
|
||||
|
@ -112,7 +114,7 @@ while [ $# -gt 0 ]; do
|
|||
shift
|
||||
done
|
||||
|
||||
# Unpack options. Now "$@" will hold the html2markdown options.
|
||||
# Unpack options. Now "$@" will hold the pandoc options.
|
||||
oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs"
|
||||
|
||||
inurl=
|
||||
|
@ -162,10 +164,11 @@ else # assume UTF-8
|
|||
fi
|
||||
|
||||
if [ -z "$argument" ]; then
|
||||
tidy -utf8 2>/dev/null | html2markdown "$@"
|
||||
tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@"
|
||||
else
|
||||
if [ -f "$argument" ]; then
|
||||
to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@"
|
||||
to_utf8 "$argument" |
|
||||
tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@"
|
||||
else
|
||||
err "File '$argument' not found."
|
||||
exit 1
|
||||
|
|
Loading…
Reference in a new issue