From a1a30d69bd83e57e494cd3ca5077086077731998 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Thu, 28 Dec 2006 02:20:09 +0000 Subject: [PATCH] + Removed the convenience symlinks (which don't work on Windows under Cygwin, due to Windows' lack of true symbolic links). + Modified the wrappers to use 'pandoc' instead of the symlinks. + Modified the Makefile to remove all references to the symlinks. + Removed code from Main.hs that made pandoc's behavior depend on the name of the calling program. + Added code to Main.hs that sets default reader and writer based on extensions of input and output filenames (if provided). (Thanks to roktas for the idea.) + Modified README and man pages accordingly. + Removed WINDOWS-README target from Makefile. It is no longer needed now that we don't have the symlinks. git-svn-id: https://pandoc.googlecode.com/svn/trunk@295 788f1e2b-df1e-0410-8736-df70ead52e1b --- Makefile | 19 +---- README | 139 ++++++++++++++----------------- man/man1/html2markdown.1 | 1 - man/man1/latex2markdown.1 | 1 - man/man1/markdown2html.1 | 1 - man/man1/markdown2latex.1 | 1 - man/man1/markdown2pdf.1 | 27 +++--- man/man1/markdown2rst.1 | 1 - man/man1/markdown2rtf.1 | 1 - man/man1/markdown2s5.1 | 1 - man/man1/pandoc.1 | 50 +++++------ man/man1/rst2markdown.1 | 1 - man/man1/web2markdown.1 | 3 +- src/Main.hs | 157 ++++++++++++++++++----------------- src/wrappers/markdown2pdf.in | 11 ++- src/wrappers/web2markdown.in | 15 ++-- 16 files changed, 198 insertions(+), 231 deletions(-) delete mode 100644 man/man1/html2markdown.1 delete mode 100644 man/man1/latex2markdown.1 delete mode 100644 man/man1/markdown2html.1 delete mode 100644 man/man1/markdown2latex.1 delete mode 100644 man/man1/markdown2rst.1 delete mode 100644 man/man1/markdown2rtf.1 delete mode 100644 man/man1/markdown2s5.1 delete mode 100644 man/man1/rst2markdown.1 diff --git a/Makefile b/Makefile index 5ea1ae24d..e6e87cea2 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,6 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL).in) # Install targets #------------------------------------------------------------------------------- WRAPPERS := web2markdown markdown2pdf -SYMLINKS := markdown2html markdown2latex markdown2s5 markdown2rst \ - markdown2rtf html2markdown latex2markdown rst2markdown # Add .exe extensions if we're running Windows/Cygwin. EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \ sed -ne 's/^cygwin.*$$/\.exe/p') @@ -96,12 +94,6 @@ all: build-program templates: $(SRCDIR)/templates $(MAKE) -C $(SRCDIR)/templates -.PHONY: symlinks -cleanup_files+=$(SYMLINKS) -symlinks: $(SYMLINKS) -$(SYMLINKS): $(MAIN) - ln -sf ./$(MAIN) $@ - define generate-shell-script echo "Generating $@..."; \ awk ' \ @@ -141,7 +133,7 @@ build: configure $(BUILDCMD) build .PHONY: build-exec -build-exec: $(PROGS) $(SYMLINKS) +build-exec: $(PROGS) cleanup_files+=$(EXECS) $(EXECS): build for f in $@; do \ @@ -201,9 +193,8 @@ install-exec: build-exec fi; \ $(INSTALL_PROGRAM) $$f $(BINPATH)/; \ done - cd $(BINPATH); for f in $(SYMLINKS); do ln -sf $(MAIN) $$f; done uninstall-exec: - -for f in $(notdir $(PROGS) $(SYMLINKS)); do rm -f $(BINPATH)/$$f; done ; + -for f in $(notdir $(PROGS)); do rm -f $(BINPATH)/$$f; done ; # Program + user documents installation. .PHONY: install-program uninstall-program @@ -295,15 +286,11 @@ $(osx_dmg_name): $(osx_pkg_name) .PHONY: win-pkg win_pkg_name:=$(RELNAME).zip -win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README-WINDOWS.txt README-WINDOWS.html +win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README.txt README.html cleanup_files+=$(win_pkg_name) $(win_docs) win-pkg: $(win_pkg_name) $(win_pkg_name): $(THIS).exe $(win_docs) zip -r $(win_pkg_name) $(THIS).exe $(win_docs) -cleanup_files+=README-WINDOWS -README-WINDOWS: README - sed -e '/^Requirements/,/^\[fancyvrb\]:/ d' \ - -e '/^Character encodings/,/mysite.com$$/ d' $< > $@ .PHONY: test test-markdown test: $(MAIN) diff --git a/README b/README index 82537eb6a..6251cb58d 100644 --- a/README +++ b/README @@ -36,14 +36,11 @@ Requirements ============ The `pandoc` program itself does not depend on any external libraries -or programs. The convenience programs `markdown2html`, `markdown2latex`, -`markdown2rst`, `markdown2rtf`, `markdown2s5`, `html2markdown`, -`latex2markdown`, and `rst2markdown` are implemented as symbolic links to -`pandoc`. +or programs. The wrapper script `web2markdown` requires - - `html2markdown` (included with Pandoc) + - `pandoc` (which must be in the PATH) - a POSIX-compliant shell (installed by default on all linux and unix systems, including Mac OS X, and in [Cygwin] for Windows), - `HTML Tidy` @@ -56,7 +53,7 @@ The wrapper script `web2markdown` requires The wrapper script `markdown2pdf` requires - - `markdown2latex` (included with Pandoc) + - `pandoc` (which must be in the PATH) - a POSIX-compliant shell - `pdflatex`, which should be part of any [LaTeX] distribution - the [unicode] and [fancyvrb] LaTeX packages, which are included @@ -80,47 +77,11 @@ Using Pandoc If you run `pandoc` without arguments, it will accept input from STDIN. If you run it with file names as arguments, it will take input -from those files. It accepts several command-line options. For a -list, type - - pandoc -h - -The most important options specify the format of the source file and -the output. The default reader is markdown; the default writer is -HTML. So if you don't specify a reader or writer, `pandoc` will -convert markdown to HTML. For example, - - pandoc hello.txt - -will convert `hello.txt` from markdown to HTML. For other conversions, -you must specify a reader and/or a writer using the `-r` and `-w` -flags. To convert markdown to LaTeX, you would write: - - pandoc -w latex hello.txt - -To convert html to markdown: - - pandoc -r html -w markdown hello.txt - -Supported writers include `markdown`, `latex`, `html`, `rtf` (rich text -format), `rst` (reStructuredText), and `s5` (which produces an HTML -file that acts like powerpoint). Supported readers include `markdown`, -`html`, `latex`, and `rst`. Note that the `rst` reader only parses -a subset of reStructuredText syntax. For example, it doesn't handle -tables, definition lists, option lists, or footnotes. It handles only the -constructs expressible in unextended markdown. But for simple documents -it should be adequate. The `latex` and `html` readers are also limited -in what they can do. Because the `html` reader is picky about the HTML -it parses, it is recommended that you pipe HTML through [HTML Tidy] before -sending it to `pandoc`, or use the `web2markdown` script described below. - -By default, `pandoc` writes its output to STDOUT. If you want to -write to a file, use the `-o` option or shell redirection: +from those files. By default, `pandoc` writes its output to STDOUT. +If you want to write to a file, use the `-o` option: pandoc -o hello.html hello.txt - pandoc hello.txt > hello.html - Note that you can specify multiple input files on the command line. `pandoc` will concatenate them all (with blank lines between them) before parsing: @@ -131,6 +92,44 @@ before parsing: with a proper header, rather than a fragment. For more details on this and many other command-line options, see below.) +The format of the input and output can be specified explicitly using +command-line options. The input format can be specified using the +`-r/--read` or `-f/--from` options, the output format using the +`-w/--write` or `-t/--to` options. Thus, to convert `hello.txt` from +markdown to LaTeX, you could type: + + pandoc -f markdown -t latex hello.txt + +To convert `hello.html` from html to markdown: + + pandoc -f html -t markdown hello.html + +Supported output formats include `markdown`, `latex`, `html`, `rtf` +(rich text format), `rst` (reStructuredText), and `s5` (which produces +an HTML file that acts like powerpoint). Supported input formats +include `markdown`, `html`, `latex`, and `rst`. Note that the `rst` +reader only parses a subset of reStructuredText syntax. For example, +it doesn't handle tables, definition lists, option lists, or footnotes. +It handles only the constructs expressible in unextended markdown. +But for simple documents it should be adequate. The `latex` and `html` +readers are also limited in what they can do. Because the `html` +reader is picky about the HTML it parses, it is recommended that you +pipe HTML through [HTML Tidy] before sending it to `pandoc`, or use the +`web2markdown` script described below. + +If you don't specify a reader or writer explicitly, `pandoc` will +try to determine the input and output format from the extensions of +the input and output filenames. Thus, for example, + + pandoc -o hello.tex hello.txt + +will convert `hello.txt` from markdown to LaTeX. If no output file +is specified (so that output goes to STDOUT), or if the output file's +extension is unknown, the output format will default to HTML. +If no input file is specified (so that input comes from STDIN), or +if the input files' extensions are unknown, the input format will +be assumed to be markdown unless explicitly specified. + Character encodings ------------------- @@ -150,31 +149,16 @@ The shell scripts (described below) automatically convert the input from the local encoding to UTF-8 before running them through `pandoc`, then convert the output back to the local encoding. -Convenience programs and wrapper scripts -======================================== +`markdown2pdf` and `web2markdown` +================================= -For convenience, eight variant programs are included with Pandoc: -`markdown2html` (which is equivalent to `pandoc -w html`), -`markdown2latex` (equivalent to `pandoc -w latex`), `markdown2rst` -(equivalent to `pandoc -w rst`), `markdown2rtf` (equivalent to -`pandoc -w rtf`), `markdown2s5` (equivalent to `pandoc -w s5`), -`html2markdown` (equivalent to `pandoc -r html -w markdown`), -`latex2markdown` (equivalent to `pandoc -r latex -w markdown`), and -`rst2markdown` (equivalent to `pandoc -r rst -w markdown`). These -programs take an appropriately restricted subset of `pandoc`'s -options. (Run them with the `-h` flag for a full list of allowed -options.) - -Like `pandoc`, all of these programs produce fragments by default. -If you want to produce a standalone file, complete with a header -and footer appropriate to the format, use the `-s` option: - - markdown2latex -s sample.txt > sample.tex - -Two shell scripts have also been included: +Two shell scripts, `markdown2pdf` and `web2markdown`, are included in +the standard Pandoc installation. (They are not included in the Windows +binary package, as they require a POSIX shell, but they may be used +in Windows under Cygwin.) 1. `markdown2pdf` produces a PDF file from markdown-formatted - text, using `markdown2latex` and `pdflatex`. The default + text, using `pandoc` and `pdflatex`. The default behavior of `markdown2pdf` is to create a file with the same base name as the first argument and the extension `pdf`; thus, for example, @@ -190,7 +174,7 @@ Two shell scripts have also been included: If no input file is specified, input will be taken from STDIN. 2. `web2markdown` grabs a web page from a file or URL and converts - it to markdown-formatted text, using `tidy` and `html2markdown`. + it to markdown-formatted text, using `tidy` and `pandoc`. Unless input is from STDIN, an attempt is made to determine the character encoding of the page from the "Content-type" meta tag. If this is not present, UTF-8 is assumed. Alternatively, a character @@ -207,9 +191,20 @@ Command-line options ==================== Various command-line options can be used to customize the output. -For a complete list, type - pandoc --help +`-f`, `--from`, `-r`, or `--read` can be used to specify the input +format -- the format Pandoc will be converting *from*. Available +formats are `native`, `markdown`, `rst`, `html`, and `latex`. + +`-t`, `--to`, `-w`, or `--write` can be used to specify the output +format -- the format Pandoc will be converting *to*. Available formats +are `native`, `html`, `s5`, `latex`, `markdown`, `rst`, and `rtf`. + +`-s` or `--standalone` indicates that a standalone document is to be +produced (with appropriate headers and footers), rather than a fragment. + +`-o` or `--output` specifies the name of the output file. If no output +filename is given, output will be sent to STDOUT. `-p` or `--preserve-tabs` causes tabs in the source text to be preserved, rather than converted to spaces (the default). @@ -225,12 +220,6 @@ untranslatable HTML codes and LaTeX environments. (The LaTeX reader does pass through untranslatable LaTeX commands, even if `-R` is not specified.) -`-s` or `--standalone` causes `pandoc` to produce a standalone file, -complete with appropriate document headers. By default, `pandoc` -produces a fragment. - -`-o` or `--output-file` can be used to specify an output file. - `-C` or `--custom-header` can be used to specify a custom document header. To see the headers used by default, use the `-D` option: for example, `pandoc -D html` prints the default HTML header. diff --git a/man/man1/html2markdown.1 b/man/man1/html2markdown.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/html2markdown.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/latex2markdown.1 b/man/man1/latex2markdown.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/latex2markdown.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/markdown2html.1 b/man/man1/markdown2html.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/markdown2html.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/markdown2latex.1 b/man/man1/markdown2latex.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/markdown2latex.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/markdown2pdf.1 b/man/man1/markdown2pdf.1 index c15131a42..423ac6546 100644 --- a/man/man1/markdown2pdf.1 +++ b/man/man1/markdown2pdf.1 @@ -6,14 +6,13 @@ markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex .SH DESCRIPTION \fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from standard input) from markdown\-formatted plain text to PDF, using \fBpdflatex\fR. -If no output filename is specified, the name of the output file is -derived from the input file; thus, for example, if the input file -is \fIhello.txt\fR, the output file will be \fIhello.pdf\fR. If -the input is read from STDIN and no output filename is -specified, the output file will be named \fIstdin.pdf\fR. If -multiple input files are specified, they will be concatenated before -conversion, and the name of the output file will be derived from -the first input file. +If no output filename is specified (using the \fB\-o\fR option), +the name of the output file is derived from the input file; thus, for +example, if the input file is \fIhello.txt\fR, the output file will be +\fIhello.pdf\fR. If the input is read from STDIN and no output filename +is specified, the output file will be named \fIstdin.pdf\fR. If multiple +input files are specified, they will be concatenated before conversion, +and the name of the output file will be derived from the first input file. .PP Input is assumed to be in the UTF\-8 character encoding. If your local character encoding is not UTF\-8, you should pipe input and @@ -21,11 +20,11 @@ output through \fBiconv\fR: .IP .B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8 .PP -\fBmarkdown2pdf\fR assumes that the 'unicode' package -is in latex's search path. If this package is not included in your -latex setup, it can be obtained from . +\fBmarkdown2pdf\fR assumes that the 'unicode' and 'fancyvrb' packages +are in latex's search path. If these packages are not included in your +latex setup, they can be obtained from . .PP -\fBmarkdown2pdf\fR is a wrapper around \fBmarkdown2latex\fR. +\fBmarkdown2pdf\fR is a wrapper around \fBpandoc\fR. .SH OPTIONS .TP .B \-o FILE, \-\-output=FILE @@ -37,10 +36,6 @@ Preserve tabs instead of converting them to spaces. .B \-\-tab-stop=\fITABSTOP\fB Specify tab stop (default is 4). .TP -.B \-R, \-\-parse-raw -Parse untranslatable LaTeX environments as raw LaTeX, -instead of ignoring them. -.TP .B \-N, \-\-number-sections Number section headings in LaTeX output. (Default is not to number them.) .TP diff --git a/man/man1/markdown2rst.1 b/man/man1/markdown2rst.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/markdown2rst.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/markdown2rtf.1 b/man/man1/markdown2rtf.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/markdown2rtf.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/markdown2s5.1 b/man/man1/markdown2s5.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/markdown2s5.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/pandoc.1 b/man/man1/pandoc.1 index 82c9ae321..f6280f463 100644 --- a/man/man1/pandoc.1 +++ b/man/man1/pandoc.1 @@ -1,8 +1,6 @@ .TH PANDOC 1 "December 15, 2006" Pandoc "User Manuals" .SH NAME -pandoc, markdown2html, markdown2latex, markdown2rst, markdown2rtf, -markdown2s5, html2markdown2, latex2markdown, rst2markdown \- general -markup converter +pandoc \- general markup converter .SH SYNOPSIS \fBpandoc\fR [\fIoptions\fR] [\fIinput\-file\fR]... .SH DESCRIPTION @@ -13,41 +11,37 @@ slide shows. .PP If no \fIinput\-file\fR is specified, input is read from STDIN. Otherwise, the \fIinput\-files\fR are concatenated (with a blank -line between each) and used as input. Output goes to standard -output. If you want output to a file, use the \fB\-o\fR option or -shell redirection: +line between each) and used as input. Output goes to STDOUT by +default. For output to a file, use the \fB\-o\fR option: .IP .B pandoc \-o output.html input.txt +.PP +The input and output formats may be specified using command-line options +(see \fBOPTIONS\fR, below, for details). If these formats are not +specified explicitly, \fIPandoc\fR will attempt to determine them +from the extensions of the input and output filenames. If input comes +from STDIN or from a file with an unknown extension, the input is assumed +to be markdown. If no output filename is specified using the \fB\-o\fR +option, or if a filename is specified but its extension is unknown, +the output will default to HTML. Thus, for example, .IP -.B pandoc input.txt > output.html +.B pandoc -o chap1.tex chap1.txt .PP -The default behavior of \fIPandoc\fR is to convert the input from -markdown\-formatted plain text to HTML. Different input and output -formats can be specified using command\-line options. For example, +converts \fIchap1.txt\fR from markdown to LaTeX. And .IP -.B pandoc \-f latex \-t markdown chap1.tex > chap1.txt +.B pandoc README .PP -converts \fIchap1.tex\fR from LaTeX to markdown\-formatted plain text. -See below for a detailed list of command\-line options. -.PP -For convenience, eight variant programs are available: -\fBmarkdown2html\fR (same as \fBpandoc \-w html\fR), -\fBmarkdown2latex\fR (same as \fBpandoc \-w latex\fR), -\fBmarkdown2rst\fR (same as \fBpandoc \-w rst\fR), -\fBmarkdown2rtf\fR (same as \fBpandoc \-w rtf\fR), -\fBmarkdown2s5\fR (same as \fBpandoc \-w s5\fR), -\fBhtml2markdown\fR (same as \fBpandoc \-r html \-w markdown\fR), -\fBlatex2markdown\fR (same as \fBpandoc \-r latex \-w markdown\fR), -and \fBrst2markdown\fR (same as \fBpandoc \-r rst \-w markdown\fR). -These programs take an appropriately restricted subset of \fBpandoc\fR's -options. (Run them with the \fB-h\fR flag for a full list of allowed -options.) +converts \fIREADME\fR from markdown to HTML. .PP \fIPandoc\fR uses the UTF\-8 character encoding for both input and output. If your local character encoding is not UTF\-8, you should pipe input and output through \fBiconv\fR: .IP .B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8 +.PP +\fIPandoc\fR's HTML parser is not very forgiving. If your input is +HTML, consider running it through \fBtidy\fR(1) before passing it +to Pandoc. Or use \fBweb2markdown\fR(1), a wrapper around \fBpandoc\fR. .SH OPTIONS .TP @@ -158,9 +152,7 @@ Show usage message. .SH "SEE ALSO" \fBweb2markdown\fR(1), -\fBmarkdown2pdf\fR(1), -\fBiconv\fR(1) - +\fBmarkdown2pdf\fR(1). The .I README file distributed with Pandoc contains full documentation. diff --git a/man/man1/rst2markdown.1 b/man/man1/rst2markdown.1 deleted file mode 100644 index 7b82576d6..000000000 --- a/man/man1/rst2markdown.1 +++ /dev/null @@ -1 +0,0 @@ -.so man1/pandoc.1 diff --git a/man/man1/web2markdown.1 b/man/man1/web2markdown.1 index a570cfc97..242b50671 100644 --- a/man/man1/web2markdown.1 +++ b/man/man1/web2markdown.1 @@ -16,7 +16,7 @@ option. from STDIN, UTF-8 is assumed. A character encoding may be specified explicitly using the \fB\-e\fR option. .PP -\fBweb2markdown\fR is a wrapper for \fBhtml2markdown\fR. +\fBweb2markdown\fR is a wrapper for \fBpandoc\fR. .SH OPTIONS .TP .B \-s, \-\-standalone @@ -76,7 +76,6 @@ web2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com .SH "SEE ALSO" \fBpandoc\fR(1), -\fBhtml2markdown\fR(1), \fBiconv\fR(1) .SH AUTHOR John MacFarlane and Recai Oktas diff --git a/src/Main.hs b/src/Main.hs index 0f8567517..94be551d3 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, defaultRTFHeader, defaultS5Header, defaultLaTeXHeader ) import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Regex ( mkRegex, splitRegex ) +import Text.Regex ( mkRegex, matchRegex ) import System ( exitWith, getArgs, getProgName ) import System.Exit import System.Console.GetOpt @@ -94,8 +94,8 @@ data Opt = Opt { optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces , optTabStop :: Int -- ^ Number of spaces per tab , optStandalone :: Bool -- ^ If @True@, include header, footer - , optReader :: ParserState -> String -> Pandoc -- ^ Read format - , optWriter :: WriterOptions -> Pandoc -> String -- ^ Write fmt + , optReader :: String -- ^ Reader format + , optWriter :: String -- ^ Writer format , optParseRaw :: Bool -- ^ If @True@, parse unconvertable -- HTML and TeX , optCSS :: String -- ^ CSS file to link to @@ -103,64 +103,55 @@ data Opt = Opt , optIncludeBeforeBody :: String -- ^ File to include at top of body , optIncludeAfterBody :: String -- ^ File to include at end of body , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" - , optDefaultHeader :: String -- ^ Default header , optTitlePrefix :: String -- ^ Optional prefix for HTML title , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX , optIncremental :: Bool -- ^ If @True@, incremental lists in S5 , optSmart :: Bool -- ^ If @True@, use smart typography , optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML - , optShowUsage :: Bool -- ^ If @True@, show usage message , optDebug :: Bool -- ^ If @True@, output debug messages } -- | Defaults for command-line options. -startOpt :: Opt -startOpt = Opt +defaultOpts :: Opt +defaultOpts = Opt { optPreserveTabs = False , optTabStop = 4 , optStandalone = False - , optReader = readMarkdown - , optWriter = writeHtml + , optReader = "" -- null for default reader + , optWriter = "" -- null for default writer , optParseRaw = False , optCSS = "" , optIncludeInHeader = "" , optIncludeBeforeBody = "" , optIncludeAfterBody = "" , optCustomHeader = "DEFAULT" - , optDefaultHeader = defaultHtmlHeader , optTitlePrefix = "" , optOutputFile = "" -- null for stdout , optNumberSections = False , optIncremental = False , optSmart = False , optASCIIMathML = False - , optShowUsage = False , optDebug = False } --- | A list of functions, each transforming the options data structure in response --- to a command-line option. -allOptions :: [OptDescr (Opt -> IO Opt)] -allOptions = +-- | A list of functions, each transforming the options data structure +-- in response to a command-line option. +options :: [OptDescr (Opt -> IO Opt)] +options = [ Option "fr" ["from","read"] (ReqArg - (\arg opt -> case (lookup (map toLower arg) readers) of - Just reader -> return opt { optReader = reader } - Nothing -> error ("Unknown reader: " ++ arg) ) + (\arg opt -> return opt { optReader = map toLower arg }) "FORMAT") - ("Source format (" ++ - (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )") + ("Input format (" ++ (joinWithSep ", " (map fst readers)) ++ + ")") , Option "tw" ["to","write"] (ReqArg - (\arg opt -> case (lookup (map toLower arg) writers) of - Just (writer, defaultHeader) -> - return opt { optWriter = writer, - optDefaultHeader = defaultHeader } - Nothing -> error ("Unknown writer: " ++ arg) ) + (\arg opt -> return opt { optWriter = map toLower arg }) "FORMAT") - ("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )") + ("Output format (" ++ (joinWithSep ", " (map fst writers)) ++ + ")") , Option "s" ["standalone"] (NoArg @@ -169,8 +160,7 @@ allOptions = , Option "o" ["output"] (ReqArg - (\arg opt -> do - return opt { optOutputFile = arg }) + (\arg opt -> return opt { optOutputFile = arg }) "FILENAME") "Name of output file" @@ -286,57 +276,66 @@ allOptions = , Option "h" ["help"] (NoArg - (\opt -> return opt { optShowUsage = True })) + (\_ -> do + prg <- getProgName + hPutStr stderr (reformatUsageInfo $ + usageInfo (prg ++ " [OPTIONS] [FILES]") options) + exitWith $ ExitFailure 2)) "Show help" ] --- parse name of calling program and return default reader and writer descriptions -parseProgName name = - case (splitRegex (mkRegex "2") (map toLower name)) of - [from, to] -> (from, to) - _ -> ("markdown", "html") - --- set default options based on reader and writer descriptions; start is starting options -setDefaultOpts from to start = - case ((lookup from readers), (lookup to writers)) of - (Just reader, Just (writer, header)) -> start {optReader = reader, - optWriter = writer, - optDefaultHeader = header} - _ -> start - --- True if single-letter option is in option list -inOptList :: [Char] -> OptDescr (Opt -> IO Opt) -> Bool -inOptList list desc = - let (Option letters _ _ _) = desc in - any (\x -> x `elem` list) letters - -- Reformat usage message so it doesn't wrap illegibly +reformatUsageInfo :: String -> String reformatUsageInfo = gsub " *--" " --" . gsub "(-[A-Za-z0-9]) *--" "\\1, --" . gsub " *([^- ])" "\n\t\\1" +-- Determine default reader based on source file extensions +defaultReaderName :: [String] -> String +defaultReaderName [] = "markdown" +defaultReaderName (x:xs) = + let x' = map toLower x in + case (matchRegex (mkRegex ".*\\.(.*)") x') of + Nothing -> defaultReaderName xs -- no extension + Just ["xhtml"] -> "html" + Just ["html"] -> "html" + Just ["htm"] -> "html" + Just ["tex"] -> "latex" + Just ["latex"] -> "latex" + Just ["ltx"] -> "latex" + Just ["rst"] -> "rst" + Just ["native"] -> "native" + Just _ -> "markdown" + +-- Determine default writer based on output file extension +defaultWriterName :: String -> String +defaultWriterName "" = "html" -- no output file +defaultWriterName x = + let x' = map toLower x in + case (matchRegex (mkRegex ".*\\.(.*)") x') of + Nothing -> "markdown" -- no extension + Just [""] -> "markdown" -- empty extension + Just ["tex"] -> "latex" + Just ["latex"] -> "latex" + Just ["ltx"] -> "latex" + Just ["rtf"] -> "rtf" + Just ["rst"] -> "rst" + Just ["s5"] -> "s5" + Just ["native"] -> "native" + Just ["txt"] -> "markdown" + Just ["text"] -> "markdown" + Just ["md"] -> "markdown" + Just ["markdown"] -> "markdown" + Just _ -> "html" + main = do - name <- getProgName - let (from, to) = parseProgName name - - let irrelevantOptions = if not ('2' `elem` name) - then "" - else "frtwD" ++ - (if (to /= "html" && to /= "s5") then "SmcT" else "") ++ - (if (to /= "latex") then "N" else "") ++ - (if (to /= "s5") then "i" else "") ++ - (if (from /= "html" && from /= "latex") then "R" else "") - - let options = filter (not . inOptList irrelevantOptions) allOptions - - let defaultOpts = setDefaultOpts from to startOpt - args <- getArgs let (actions, sources, errors) = getOpt Permute options args if (not (null errors)) then do + name <- getProgName mapM (\e -> hPutStrLn stderr e) errors hPutStrLn stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options) @@ -350,30 +349,39 @@ main = do let Opt { optPreserveTabs = preserveTabs , optTabStop = tabStop , optStandalone = standalone - , optReader = reader - , optWriter = writer + , optReader = readerName + , optWriter = writerName , optParseRaw = parseRaw , optCSS = css , optIncludeInHeader = includeHeader , optIncludeBeforeBody = includeBefore , optIncludeAfterBody = includeAfter , optCustomHeader = customHeader - , optDefaultHeader = defaultHeader , optTitlePrefix = titlePrefix , optOutputFile = outputFile , optNumberSections = numberSections , optIncremental = incremental , optSmart = smart , optASCIIMathML = asciiMathML - , optShowUsage = showUsage , optDebug = debug } = opts - if showUsage - then do - hPutStr stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options) - exitWith $ ExitFailure 2 - else return () + -- assign reader and writer based on options and filenames + let readerName' = if null readerName + then defaultReaderName sources + else readerName + + let writerName' = if null writerName + then defaultWriterName outputFile + else writerName + + reader <- case (lookup readerName' readers) of + Just r -> return r + Nothing -> error ("Unknown reader: " ++ readerName') + + (writer, defaultHeader) <- case (lookup writerName' writers) of + Just (w,h) -> return (w, h) + Nothing -> error ("Unknown writer: " ++ writerName') output <- if ((null outputFile) || debug) then return stdout @@ -385,7 +393,6 @@ main = do hPutStr stderr $ concatMap (\s -> "INPUT=" ++ s ++ "\n") sources else return () - let writingS5 = (defaultHeader == defaultS5Header) let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop) let addBlank str = str ++ "\n\n" let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings @@ -407,7 +414,7 @@ main = do writerTitlePrefix = titlePrefix, writerSmart = smart, writerTabStop = tabStop, - writerS5 = writingS5, + writerS5 = (writerName=="s5"), writerIncremental = incremental, writerNumberSections = numberSections, writerIncludeBefore = includeBefore, diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in index c222c1cbd..71d58a7cd 100644 --- a/src/wrappers/markdown2pdf.in +++ b/src/wrappers/markdown2pdf.in @@ -1,6 +1,6 @@ #!/bin/sh -e -REQUIRED="markdown2latex pdflatex" +REQUIRED="pdflatex" ### common.sh @@ -9,9 +9,12 @@ REQUIRED="markdown2latex pdflatex" texname=output logfile=$THIS_TEMPDIR/log -if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then - [ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \ - -e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2 +if ! pandoc -s -d -r markdown -w latex "$@" >$THIS_TEMPDIR/$texname.tex \ +2>$logfile; then + [ -f $logfile ] && sed -e 's/^pandoc/markdown2pdf/g' \ + -e '/^INPUT=/d' -e '/^OUTPUT=/d' \ + -e '/^[[:space:]]*\(-f\|-t\|-s\|-R\|-S\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\ + -e 's/(implies -s)//g' $logfile >&2 exit 1 fi diff --git a/src/wrappers/web2markdown.in b/src/wrappers/web2markdown.in index 64ff3db9b..89e884c3d 100644 --- a/src/wrappers/web2markdown.in +++ b/src/wrappers/web2markdown.in @@ -2,7 +2,7 @@ # converts HTML from a URL, file, or stdin to markdown # uses an available program to fetch URL and tidy to normalize it first -REQUIRED="tidy html2markdown" +REQUIRED="tidy" ### common.sh @@ -72,14 +72,16 @@ grabber= while [ $# -gt 0 ]; do case "$1" in -h|--help) - html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2 + pandoc -h 2>&1 | sed -e 's/pandoc/web2markdown/' \ + -e '/^[[:space:]]*\(-f\|-t\|-S\|-N\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\ + 1>&2 err " -e ENCODING, --encoding=ENCODING" err " Specify character encoding of input" err " -g COMMAND, --grabber=COMMAND" err " Specify command to be used to grab contents of URL" exit 0 ;; -v|--version) - html2markdown -v + pandoc -v 2>&1 | sed -e 's/pandoc/web2markdown/' 1>&2 exit 0 ;; -e) shift @@ -112,7 +114,7 @@ while [ $# -gt 0 ]; do shift done -# Unpack options. Now "$@" will hold the html2markdown options. +# Unpack options. Now "$@" will hold the pandoc options. oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs" inurl= @@ -162,10 +164,11 @@ else # assume UTF-8 fi if [ -z "$argument" ]; then - tidy -utf8 2>/dev/null | html2markdown "$@" + tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@" else if [ -f "$argument" ]; then - to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@" + to_utf8 "$argument" | + tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@" else err "File '$argument' not found." exit 1