initial import

git-svn-id: https://pandoc.googlecode.com/svn/trunk@2 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2006-10-17 14:22:29 +00:00
parent e7dbfef4d8
commit df7b682251
137 changed files with 19996 additions and 0 deletions

3
BUGS Normal file
View file

@ -0,0 +1,3 @@
# Known Bugs

1
ChangeLog Normal file
View file

@ -0,0 +1 @@
Please see changelog.Debian (debian/changelog in source tree).

74
LICENSE Normal file
View file

@ -0,0 +1,74 @@
(c) 2006 John MacFarlane (jgm At berkeley.edu). Released under the
[GPL][], version 2 or greater.
[GPL]: http://www.gnu.org/copyleft/gpl.html
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------
UTF8.hs
Copyright (c) 2003, OGI School of Science & Engineering, Oregon Health &
Science University, All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
- Neither the name of OGI or OHSU nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
modified by Martin Norbäck
modified to pass illegal utf-8 sequences through unchanged
----------------------------------------------------------------------
ASCIIMathML.js
Copyright 2005, Peter Jipsen, Chapman University
<http://www1.chapman.edu/~jipsen/mathml/asciimath.html>
Released under the [GPL][].
----------------------------------------------------------------------
S5 slides.js and css files
by Eric A. Meyer
<http://meyerweb.com/eric/tools/s5
Released under an explicit Public Domain License

146
Makefile Normal file
View file

@ -0,0 +1,146 @@
# Makefile for Pandoc.
#-------------------------------------------------------------------------------
# Cabal constants
#-------------------------------------------------------------------------------
CABAL := $(wildcard *.cabal)
NAME := $(shell sed -ne 's/^[Nn]ame:[[:space:]]*//p' $(CABAL))
THIS := $(shell echo $(NAME) | tr A-Z a-z)
VERSION := $(shell sed -ne 's/^[Vv]ersion:[[:space:]]*//p' $(CABAL))
EXECUTABLES := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL))
#-------------------------------------------------------------------------------
# Variables to setup through environment
#-------------------------------------------------------------------------------
PREFIX ?= /usr/local
DESTDIR ?=
#-------------------------------------------------------------------------------
# Constant names and commands in source tree
#-------------------------------------------------------------------------------
SRCDIR := src
MANDIR := man
BUILDDIR := dist
BUILDCONF := .setup-config
BUILDCMD := runhaskell Setup.hs
#-------------------------------------------------------------------------------
# Installation paths
#-------------------------------------------------------------------------------
BINPATH := $(DESTDIR)$(PREFIX)/bin
DATAPATH := $(DESTDIR)$(PREFIX)/share
DOCPATH := $(DATAPATH)/doc/$(THIS)
LIBDOCPATH := $(DATAPATH)/doc/$(THIS)-doc
MANPATH := $(DATAPATH)/man
PKGPATH := $(DATAPATH)/$(THIS)
#-------------------------------------------------------------------------------
# Generic Makefile variables
#-------------------------------------------------------------------------------
INSTALL := install -c
INSTALL_PROGRAM := $(INSTALL) -m 755
INSTALL_DATA := $(INSTALL) -m 644
#-------------------------------------------------------------------------------
# Recipes
#-------------------------------------------------------------------------------
.PHONY: all
all: build
.PHONY: templates
templates: $(SRCDIR)/templates
$(SRCDIR)/templates:
$(MAKE) -C $(SRCDIR)/templates
.PHONY: prep
prep:
# Darcs cannot preserve file permissions.
-for p in configure debian/rules; do chmod +x $$p; done
.PHONY: configure
cleanup_files+=$(BUILDDIR) $(BUILDCONF) $(CABAL:%.cabal=%).buildinfo
configure: $(BUILDCONF)
$(BUILDCONF): prep
$(BUILDCMD) configure --prefix=$(PREFIX)
.PHONY: build
build: templates configure
$(BUILDCMD) build
.PHONY: build-lib-doc
build-lib-doc: html
cleanup_files+=html
html: $(BUILDCONF)
$(BUILDCMD) haddock && mv $(BUILDDIR)/doc/html .
cleanup_files+=$(EXECUTABLES)
$(EXECUTABLES): build
# Ugly kludge to seperate program and library installations.
# Leave the library installation to Cabal ('install-lib' target).
find $(BUILDDIR) -type f -name "$(EXECUTABLES)" -perm +a=x -exec mv {} . \;
# XXX: Note that we don't handle PREFIX correctly at the install-* stages,
# i.e. any PREFIX given at the configuration time is lost, unless it is
# also supplied (via environment) at these stages.
.PHONY: install-exec uninstall-exec
bin_all:=$(EXECUTABLES) html2markdown markdown2latex latex2markdown markdown2pdf
install-exec: $(bin_all)
$(INSTALL) -d $(BINPATH); \
for p in $(bin_all); do $(INSTALL_PROGRAM) $$p $(BINPATH)/; done
uninstall-exec:
-for p in $(bin_all); do rm -f $(BINPATH)/$$p; done
.PHONY: install-doc uninstall-doc
doc_all:=README.html README BUGS TODO
cleanup_files+=README.html
install-doc: $(doc_all)
$(INSTALL) -d $(DOCPATH) && $(INSTALL_DATA) $(doc_all) $(DOCPATH)/; \
$(INSTALL) -d $(MANPATH) && cp -a $(MANDIR)/* $(MANPATH)/
uninstall-doc:
-for d in $(doc_all); do rm -f $(DOCPATH)/$$d; done
-cd $(MANDIR) && find . -type f -exec rm -f "$(MANPATH)/{}" \;
# Handle program installation manually (due to the deficiencies in Cabal).
.PHONY: install uninstall
install: install-exec install-doc
# FIXME: incomplete support for uninstallation.
uninstall: uninstall-exec uninstall-doc
.PHONY: install-lib install-lib-doc
install-lib:
@$(BUILDCMD) install || true # required since we move executable
install-lib-doc: build-lib-doc
$(INSTALL) -d $(LIBDOCPATH) && cp -a html $(LIBDOCPATH)/
.PHONY: test test-markdown
test: $(EXECUTABLES)
@cd tests && perl runtests.pl -s $(PWD)/$(THIS)
test-markdown: $(EXECUTABLES)
@cd tests/MarkdownTest_1.0.3 && perl MarkdownTest.pl -s $(PWD)/$(THIS) -tidy
%.html: %
./$(THIS) -s $^ >$@ || rm -f $@
# Stolen and slightly improved from a GPLed Makefile. Credits to John Meacham.
src_all:=$(shell find $(SRCDIR) -type f -name '*hs' | egrep -v '^\./(_darcs|lib|test)/')
cleanup_files+=$(patsubst %,$(SRCDIR)/%,tags tags.sorted)
tags: $(src_all)
cd $(SRCDIR) && hasktags -c $(src_all:$(SRCDIR)/%=%); \
LC_ALL=C sort tags >tags.sorted; mv tags.sorted tags
deb: debian prep
if [ -x /usr/bin/debuild ]; then \
debuild -i_darcs -I_darcs -uc -us; \
else \
echo "*** Please install devscripts package. ***"; \
dpkg-buildpackage -i_darcs -I_darcs -uc -us; \
fi
.PHONY: distclean clean
distclean: clean prep
if [ -d debian ]; then fakeroot debian/rules clean; fi
clean:
-if [ -f $(BUILDCONF) ]; then $(BUILDCMD) clean; fi
-rm -rf $(cleanup_files)

1
Pandoc.buildinfo.in Normal file
View file

@ -0,0 +1 @@
Ghc-Options: @HCFLAGS@

33
Pandoc.cabal Normal file
View file

@ -0,0 +1,33 @@
Name: Pandoc
Version: 0.21
License: GPL
License-File: LICENSE
Author: John MacFarlane <jgm@berkeley.edu>
Homepage: http://sophos.berkeley.edu/macfarlane/pandoc
Category: Text
Synopsis: Utilities for translating between various markup formats
Description: Haskell utilities for translating between markdown, HTML,
reStructuredText, LaTeX, and RTF
Build-Depends: base, haskell98, parsec
Hs-Source-Dir: src
Exposed-Modules: Text.ParserCombinators.Pandoc,
Text.Pandoc.ASCIIMathML,
Text.Pandoc.Definition,
Text.Pandoc.HtmlEntities,
Text.Pandoc.Shared,
Text.Pandoc.UTF8,
Text.Pandoc.Writers.DefaultHeaders,
Text.Pandoc.Writers.HTML,
Text.Pandoc.Writers.Markdown,
Text.Pandoc.Writers.LaTeX,
Text.Pandoc.Writers.RST,
Text.Pandoc.Writers.S5,
Text.Pandoc.Writers.RTF,
Text.Pandoc.Readers.HTML,
Text.Pandoc.Readers.Markdown,
Text.Pandoc.Readers.RST,
Text.Pandoc.Readers.LaTeX
Executable: pandoc
Hs-Source-Dir: src
Main-Is: Main.hs

508
README Normal file
View file

@ -0,0 +1,508 @@
% pandoc
% John MacFarlane
% August 10, 2006
`pandoc` converts files from one markup format to another. It can
read [markdown] and (with some limitations) [reStructuredText], [HTML], and
[LaTeX], and it can write [markdown], [reStructuredText], [HTML],
[LaTeX], [RTF], and [S5] HTML slide shows. It is written in
[Haskell], using the excellent [Parsec] parser combinator library.
[markdown]: http://daringfireball.net/projects/markdown/
[reStructuredText]: http://docutils.sourceforge.net/docs/ref/rst/introduction.html
[S5]: http://meyerweb.com/eric/tools/s5/
[HTML]: http://www.w3.org/TR/html40/
[LaTeX]: http://www.latex-project.org/
[RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format
[Haskell]: http://www.haskell.org/
[Parsec]: http://www.cs.uu.nl/~daan/download/parsec/parsec.html
(c) 2006 John MacFarlane (jgm At berkeley.edu). Released under the
[GPL], version 2 or greater. This software carries no warranty of
any kind. (See LICENSE for full copyright and warranty notices.)
[GPL]: http://www.gnu.org/copyleft/gpl.html
# Installation
## Installing GHC
To compile `pandoc`, you'll need [GHC] version 6.4 or greater.
If you don't have GHC already, you can get it from the
[GHC Download] page.
[GHC]: http://www.haskell.org/ghc/
[GHC Download]: http://www.haskell.org/ghc/download.html
Note: As of this writing, there's no MacOS X installer package for
GHC 6.4.2 (the latest version). There is an installer for
GHC 6.4.1 [here](http://www.haskell.org/ghc/download_ghc_641.html#macosx).
It will work just fine on PPC-based Macs. GHC has not yet been ported
to Intel Macs: see <http://hackage.haskell.org/trac/ghc/wiki/X86OSXGhc>.
You'll also need standard build tools: GNU Make, sed, bash, and perl.
These are standard on unix systems (including MacOS X). If you're
using Windows, you can install [Cygwin].
[Cygwin]: http://www.cygwin.com/
Note: I have tested `pandoc` on MacOS X and Linux systems. I have not
tried it on Windows, and I have no idea whether it will work on Windows.
## Installing `pandoc`
1. Change to the directory containing the `pandoc` distribution.
2. Compile:
make
3. Optional, but recommended:
make test
4. If you want to install the `pandoc` program and the relevant wrappers
and documents (including this file) into `/usr/local` directory, type:
make install
If you only want the `pandoc` program and the shell scripts `latex2markdown`,
`markdown2latex`, `markdown2pdf`, `markdown2html`, `html2markdown` installed
into your `~/bin` directory, type (note the **`-exec`** suffix):
PREFIX=~ make install-exec
5. If you want to install the Pandoc library modules for use in
other Haskell programs, type (as root):
make install-lib
6. To install the library documentation (into `/usr/local/pandoc-doc`),
type:
make install-lib-doc
# Using `pandoc`
You can run `pandoc` like this:
./pandoc
If you copy the `pandoc` executable to a directory in your path
(perhaps using `make install`), you can invoke it without the "./":
pandoc
If you run `pandoc` without arguments, it will accept input from
STDIN. If you run it with file names as arguments, it will take input
from those files. It accepts several command-line options. For a
list, type
pandoc -h
The most important options specify the format of the source file and
the output. The default reader is markdown; the default writer is
HTML. So if you don't specify a reader or writer, `pandoc` will
convert markdown to HTML. To convert markdown to LaTeX, you could
write:
pandoc -w latex input.txt
To convert html to markdown:
pandoc -r html -w markdown input.txt
Supported writers include markdown, LaTeX, HTML, RTF,
reStructuredText, and S5 (which produces an HTML file that acts like
powerpoint). Supported readers include markdown, HTML, LaTeX, and
reStructuredText. Note that the rst (reStructuredText) reader only
parses a subset of rst syntax. For example, it doesn't handle tables,
definition lists, option lists, or footnotes. It handles only the
constructs expressible in unextended markdown. But for simple
documents it should be adequate. The LaTeX and HTML readers are also
limited in what they can do.
`pandoc` writes its output to STDOUT. If you want to write to a file,
use redirection:
pandoc input.txt > output.html
Note that you can specify multiple input files on the command line.
`pandoc` will concatenate them all (with blank lines between them)
before parsing:
pandoc -s chapter1.txt chapter2.txt chapter3.txt references.txt > book.html
## Character encoding
Unfortunately, due to limitations in GHC, `pandoc` does not
automatically detect the system's local character encoding. Hence,
all input and output is assumed to be in the UTF-8 encoding. If you
use accented or foreign characters, you should convert the input file
to UTF-8 before processing it with `pandoc`. This can be done by
piping the input through [`iconv`]: for example,
iconv -t utf-8 source.txt | pandoc > output.html
will convert `source.txt` from the local encoding to UTF-8, then
convert it to HTML, putting the output in `output.html`.
[`iconv`]: http://www.gnu.org/software/libiconv/
The shell scripts (described below) automatically convert the source
from the local encoding to UTF-8 before running them through `pandoc`.
## The shell scripts
For convenience, five shell scripts have been included that make it
easy to run `pandoc` without remembering all the command-line options.
All of the scripts presuppose that `pandoc` is in the path, and
`html2markdown` also presupposes that `curl` and `tidy` are in the
path.
1. `markdown2html` converts markdown to HTML, running `iconv` first to
convert the file to UTF-8. (This can be used as a replacement for
`Markdown.pl`.)
2. `html2markdown` can take either a filename or a URL as argument. If
it is given a URL, it uses `curl` to fetch the contents of the
specified URL, then filters this through `tidy` to straighten up the
HTML and convert to UTF-8, and finally passes this HTML to `pandoc` to
produce markdown text:
html2markdown http://www.fsf.org
html2markdown www.fsf.org
html2markdown subdir/mylocalfile.html
3. `latex2markdown` converts a LaTeX file to markdown.
latex2markdown mytexfile.tex
4. `markdown2latex` converts markdown to LaTeX:
markdown2latex mytextfile.txt
5. `markdown2pdf` converts markdown to PDF, using LaTeX, but removing
all the intermediate files created by LaTeX. Example:
markdown2pdf mytextfile.txt
creates a file `mytextfile.pdf` in the working directory.
# Command-line options
Various command-line options can be used to customize the output.
For a complete list, type
pandoc --help
`-p` or `--preserve-tabs` causes tabs in the source text to be
preserved, rather than converted to spaces (the default).
`--tabstop` allows the user to set the tab stop (which defaults to 4).
`-R` or `--parse-raw` causes the HTML and LaTeX readers to parse HTML
codes and LaTeX environments that it can't translate as raw HTML or
LaTeX. Raw HTML can be printed in markdown, reStructuredText, HTML,
and S5 output; raw LaTeX can be printed in markdown, reStructuredText,
and LaTeX output. The default is for the readers to omit
untranslatable HTML codes and LaTeX environments. (The LaTeX reader
does pass through untranslatable LaTeX commands, even if `-R` is not
specified.)
`-s` or `--standalone` causes `pandoc` to produce a standalone file,
complete with appropriate document headers. By default, `pandoc`
produces a fragment.
`--custom-header` can be used to specify a custom document header. To
see the headers used by default, use the `-D` option: for example,
`pandoc -D html` prints the default HTML header.
`-c` or `--css` allows the user to specify a custom stylesheet that
will be linked to in HTML and S5 output.
`-H` or `--include-in-header` specifies a file to be included
(verbatim) at the end of the document header. This can be used, for
example, to include special CSS or javascript in HTML documents.
`-B` or `--include-before-body` specifies a file to be included
(verbatim) at the beginning of the document body (after the `<body>`
tag in HTML, or the `\begin{document}` command in LaTeX). This can be
used to include navigation bars or banners in HTML documents.
`-A` or `--include-after-body` specifies a file to be included
(verbatim) at the end of the docment body (before the `</body>` tag in
HTML, or the `\end{document}` command in LaTeX).
`-T` or `--title-prefix` specifies a string to be included as a prefix
at the beginning of the title that appears in the HTML header (but not
in the title as it appears at the beginning of the HTML body). (See
below on Titles.)
`-S` or `--smartypants` causes `pandoc` to produce typographically
correct HTML output, along the lines of John Gruber's [Smartypants].
Straight quotes are converted to curly quotes, `---` to dashes, and
`...` to ellipses.
[Smartypants]: http://daringfireball.net/projects/smartypants/
`-m` or `--asciimathml` will cause LaTeX formulas (between $ signs) in
HTML or S5 to display as formulas rather than as code. The trick will
not work in all browsers, but it works in Firefox. Peter Jipsen's
[ASCIIMathML] script is used to do the magic.
[ASCIIMathML]: http://www1.chapman.edu/~jipsen/mathml/asciimath.html
`-i` or `--incremental` causes all lists in S5 output to be displayed
incrementally by default (one item at a time). The normal default
is for lists to be displayed all at once.
`-N` or `--number-sections` causes sections to be numbered in LaTeX
output. By default, sections are not numbered.
# `pandoc`'s markdown vs. standard markdown
In parsing markdown, `pandoc` departs from and extends [standard markdown]
in a few respects. (To run `pandoc` on the official
markdown test suite, type `make markdown_tests`.)
[standard markdown]: http://daringfireball.net/projects/markdown/syntax
## Lists
`pandoc` behaves differently from standard markdown on some "edge
cases" involving lists. Consider this source:
1. First
2. Second:
- Fee
- Fie
- Foe
3. Third
`pandoc` transforms this into a "compact list" (with no `<p>` tags
around "First", "Second", or "Third"), while markdown puts `<p>`
tags around "Second" and "Third" (but not "First"), because of
the blank space around "Third". `pandoc` follows a simple rule:
if the text is followed by a blank line, it is treated as a
paragraph. Since "Second" is followed by a list, and not a blank
line, it isn't treated as a paragraph. The fact that the list
is followed by a blank line is irrelevant.
## Literal quotes in titles
Standard markdown allows unescaped literal quotes in titles, as
in
[foo]: "bar "embedded" baz"
`pandoc` requires all quotes within titles to be escaped:
[foo]: "bar \"embedded\" baz"
## Reference links
`pandoc` allows implicit reference links in either of two styles:
1. Here's my [link]
2. Here's my [link][]
[link]: linky.com
If there's no corresponding reference, the implicit reference link
will appear as regular bracketed text. Note: even `[link][]` will
appear as `[link]` if there's no reference for `link`. If you want
`[link][]`, use a backslash escape: `\[link]\[]`.
## Footnotes
`pandoc`'s markdown allows footnotes, using the following syntax:
here is a footnote reference,^(1) and another.^(longnote)
^(1) Here is the footnote. It can go anywhere in the document,
except in embedded contexts like block quotes or lists.
^(longnote) Here's the other note. This one contains multiple
blocks.
^
^ Caret characters are used to indicate that the blocks all belong
to a single footnote (as with block quotes).
^
^ If you want, you can use a caret at the beginning of every line,
^ as with blockquotes, but all that you need is a caret at the
^ beginning of the first line of the block and any preceding
^ blank lines.
Footnote references may not contain spaces, tabs, or newlines.
## Embedded HTML
`pandoc` treats embedded HTML in markdown a bit differently than
Markdown 1.0. While Markdown 1.0 leaves HTML blocks exactly as they
are, `pandoc` treats text between HTML tags as markdown. Thus, for
example, `pandoc` will turn
<table>
<tr>
<td>*one*</td>
<td>[a link](http://google.com)</td>
</tr>
</table>
into
<table>
<tr>
<td><em>one</em></td>
<td><a href="http://google.com">a link</a></td>
</tr>
</table>
whereas Markdown 1.0 will preserve it as is.
There is one exception to this rule: text between `<script>` and
`</script>` tags is not interpreted as markdown.
This departure from standard markdown should make it easier to mix
markdown with HTML block elements. For example, one can surround
a block of markdown text with `<div>` tags without preventing it
from being interpreted as markdown.
## Title blocks
If the file begins with a title block
% title
% author(s) (separated by commas)
% date
it will be parsed as bibliographic information, not regular text. (It
will be used, for example, in the title of standalone LaTeX or HTML
output.) The block may contain just a title, a title and an author,
or all three lines. Each must begin with a % and fit on one line.
The title may contain standard inline formatting. If you want to
include an author but no title, or a title and a date but no author,
you need a blank line:
% My title
%
% June 15, 2006
Titles will be written only when the `--standalone` (`-s`) option is
chosen. In HTML output, titles will appear twice: once in the
document head -- this is the title that will appear at the top of the
window in a browser -- and once at the beginning of the document body.
The title in the document head can have an optional prefix attached
(`--title-prefix` or `-T` option). The title in the body appears as
an H1 element with class "title", so it can be suppressed or
reformatted with CSS.
If a title prefix is specified with `-T` and no title block appears
in the document, the title prefix will be used by itself as the
HTML title.
## Box-style blockquotes
`pandoc` supports emacs-style boxquote block quotes, in addition to
standard markdown (email-style) boxquotes:
,----
| They look like this.
`----
## Inline LaTeX
Anything between two $ characters will be parsed as LaTeX math. The
opening $ must have a character immediately to its right, while the
closing $ must have a character immediately to its left. Thus,
`$20,000 and $30,000` won't parse as math. The $ character can be
escaped with a backslash if needed.
If you pass the `-m` (`--asciimathml`) option to `pandoc`, it will
include the [ASCIIMathML] script in the resulting HTML. This will
cause LaTeX math to be displayed as formulas in better browsers.
[ASCIIMathML]: http://www1.chapman.edu/~jipsen/asciimath.html
Inline LaTeX commands will also be preserved and passed unchanged
to the LaTeX writer. Thus, for example, you can use LaTeX to
include BibTeX citations:
This result was proved in \cite{jones.1967}.
You can also use LaTeX environments. For example,
\begin{tabular}{|l|l|}\hline
Age & Frequency \\ \hline
18--25 & 15 \\
26--35 & 33 \\
36--45 & 22 \\ \hline
\end{tabular}
Note, however, that material between the begin and end tags will
be interpreted as raw LaTeX, not as markdown.
## Custom headers
When run with the "standalone" option (`-s`), `pandoc` creates a
standalone file, complete with an appropriate header. To see the
default headers used for html and latex, use the following commands:
pandoc -D html
pandoc -D latex
If you want to use a different header, just create a file containing
it and specify it on the command line as follows:
pandoc --header=MyHeaderFile
# Producing S5 with `pandoc`
Producing an [S5] slide show with `pandoc` is easy. A title page is
constructed automatically from the document's title block (see above).
Each section (with a level-one header) produces a single slide. (Note
that if the section is too big, the slide will not fit on the page; S5
is not smart enough to produce multiple pages.)
Here's the markdown source for a simple slide show, `eating.txt`:
% Eating Habits
% John Doe
% March 22, 2005
# In the morning
- Eat eggs
- Drink coffee
# In the evening
- Eat spaghetti
- Drink wine
To produce the slide show, simply type
pandoc -w s5 -s eating.txt > eating.html
and open up `eating.html` in a browser. The HTML file embeds
all the required javascript and CSS, so no other files are necessary.
Note that by default, the S5 writer produces lists that display
"all at once." If you want your lists to display incrementally
(one item at a time), use the `-i` option. If you want a
particular list to depart from the default (that is, to display
incrementally without the `-i` option and all at once with the
`-i` option), put it in a block quote:
> - Eat spaghetti
> - Drink wine
In this way incremental and nonincremental lists can be mixed in
a single document.

2
Setup.hs Normal file
View file

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMainWithHooks defaultUserHooks

2
TODO Normal file
View file

@ -0,0 +1,2 @@
# TODO

20
configure vendored Normal file
View file

@ -0,0 +1,20 @@
#!/bin/sh
# GHC does not recognize any environment variable, unlike with GCC which honors
# CFLAGS. This hook is a workaround for this flaw and introduces an HCFLAGS
# variable.
CABAL=Pandoc.cabal
BUILDINFO=${CABAL%%.cabal}.buildinfo
[ -f $BUILDINFO.in ] || {
echo >&2 "==> No $BUILDINFO.in exists; skipping $BUILDINFO creation."
exit 0
}
if [ "${HCFLAGS-UNDEFINED}" = "UNDEFINED" ]; then
echo >&2 "==> No HCFLAGS defined; skipping $BUILDINFO creation."
exit 0
fi
sed -e "s#@HCFLAGS@#$HCFLAGS#g" $BUILDINFO.in >$BUILDINFO

19
debian/changelog vendored Normal file
View file

@ -0,0 +1,19 @@
pandoc (0.21) unstable; urgency=low
* Revamp and split Debian package. Closes: #391666.
* Revamped build system.
* Various fixes in wrapper scripts.
-- Recai Oktaş <roktas@debian.org> Sun, 3 Sep 2006 13:25:18 +0300
pandoc (0.2) unstable; urgency=low
* Fixed unicode/utf-8 translation
-- John MacFarlane <clemens@kclee.de> Mon, 14 Aug 2006 00:00:00 -0400
pandoc (0.1) unstable; urgency=low
* Initial creation of debian package
-- John MacFarlane <clemens@kclee.de> Mon, 14 Aug 2006 00:00:00 -0400

1
debian/compat vendored Normal file
View file

@ -0,0 +1 @@
4

42
debian/control vendored Normal file
View file

@ -0,0 +1,42 @@
Source: pandoc
Section: text
Priority: optional
Maintainer: Recai Oktaş <roktas@debian.org>
Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.11), ghc6 (>=6.4), perl
Build-Depends-Indep: haddock
Standards-Version: 3.7.2.0
Package: pandoc
Section: text
Architecture: any
Depends: ${shlibs:Depends}
Suggests: tetex-bin, tidy, wget | w3m
Description: General markup converter
Pandoc is an implementation of Markdown (and much more) in Haskell. It
can convert Markdown formatted text to HTML, LaTeX, rich text format,
reStructuredText, or an S5 HTML slide show. It can also convert HTML,
LaTeX, and reStructuredText to Markdown.
Package: libghc6-pandoc-dev
Section: libdevel
Architecture: any
Depends: ${haskell:Depends}
Suggests: pandoc-doc
Description: General markup converter
Pandoc is an implementation of Markdown (and much more) in Haskell. It
can convert Markdown formatted text to HTML, LaTeX, rich text format,
reStructuredText, or an S5 HTML slide show. It can also convert HTML,
LaTeX, and reStructuredText to Markdown.
.
This package contains the libraries compiled for GHC 6.
Package: pandoc-doc
Section: doc
Architecture: all
Description: General markup converter
Pandoc is an implementation of Markdown (and much more) in Haskell. It
can convert Markdown formatted text to HTML, LaTeX, rich text format,
reStructuredText, or an S5 HTML slide show. It can also convert HTML,
LaTeX, and reStructuredText to Markdown.
.
This package contains the library documentation for Pandoc.

62
debian/copyright vendored Normal file
View file

@ -0,0 +1,62 @@
Pandoc is copyright 2006 by John MacFarlane <jgm@berkeley.edu>. It is licensed
under the terms of the GPL version 2 or later. On Debian systems, the complete
text of the GPL can be found in /usr/share/common-licenses/GPL.
This package was debianized by Recai Oktaş <roktas@debian.org>. It was
downloaded from http://sophos.berkeley.edu/macfarlane/
----------------------------------------------------------------------
UTF8.hs
Copyright (c) 2003, OGI School of Science & Engineering, Oregon Health &
Science University, All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
- Neither the name of OGI or OHSU nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
modified by Martin Norbäck
modified to pass illegal utf-8 sequences through unchanged
----------------------------------------------------------------------
ASCIIMathML.js
Copyright 2005, Peter Jipsen, Chapman University
<http://www1.chapman.edu/~jipsen/mathml/asciimath.html>
Released under the GPL.
----------------------------------------------------------------------
S5 slides.js and css files
by Eric A. Meyer
<http://meyerweb.com/eric/tools/s5
Released under an explicit Public Domain License

11
debian/pandoc-doc.doc-base vendored Normal file
View file

@ -0,0 +1,11 @@
Document: pandoc-doc
Title: Pandoc documentation
Author: John MacFarlane
Abstract: This is the documentation of Pandoc, which includes the API
documentation of the Pandoc library and documentation for the Pandoc
tools.
Section: Apps/Programming
Format: html
Index: /usr/share/doc/pandoc-doc/index.html
Files: /usr/share/doc/pandoc-doc/*.html /usr/share/doc/pandoc-doc/pandoc/*.html

1
debian/pandoc-doc.docs vendored Normal file
View file

@ -0,0 +1 @@
html/*

1
debian/pandoc.dirs vendored Normal file
View file

@ -0,0 +1 @@
usr/bin

113
debian/rules vendored Normal file
View file

@ -0,0 +1,113 @@
#!/usr/bin/make -f
#
# debian/rules for pandoc.
# Copyright © 2006 Recai Oktaş <roktasATdebian.org>
#
# This file is based on John Goerzen's Cabal Debian template.
# See http://www.n-heptane.com/nhlab/repos/cabalDebianTemplate/
#
# Licensed under the GNU General Public License, version 2.
# See the file 'http://www.gnu.org/copyleft/gpl.txt'.
THIS := pandoc
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
# Handle noopt in DEB_BUILD_OPTIONS. Emulate CFLAGS (as HCFLAGS).
ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
MAKE_ENVVARS := HCFLAGS=-O0
else
MAKE_ENVVARS := HCFLAGS=-O2
endif
MAKE_ENVVARS += PREFIX=/usr
configure: configure-stamp
configure-stamp:
dh_testdir
$(MAKE_ENVVARS) $(MAKE) configure
touch configure-stamp
build: build-stamp
build-stamp: configure-stamp
dh_testdir
touch build-stamp
clean:
dh_testdir
dh_testroot
$(MAKE) clean
rm -rf setup Setup.hi Setup.ho Setup.o .*config* dist html
rm -f build-stamp configure-stamp
dh_clean
install: build
dh_testdir
dh_testroot
dh_clean -k
dh_installdirs -a
dh_haskell -a
# Hack! Cabal builds executables while building libraries. Move these
# files to top dir where the Makefile install target expects to find.
# See "BUGS" section at the following document:
# http://www.n-heptane.com/nhlab/repos/cabalDebianTemplate/INSTRUCTIONS.txt
find debian/libghc6-$(THIS)-dev -type d -name 'bin' -true | \
while read bin; do mv $$bin/* .; rm -rf $$bin; done
DESTDIR=debian/$(THIS) $(MAKE_ENVVARS) $(MAKE) install
build-indep: build-indep-stamp
build-indep-stamp:
dh_testdir
$(MAKE) build-lib-doc
install-indep: build-indep
dh_testdir
dh_testroot
dh_clean -k
dh_installdirs -i
dh_haskell -i
# Build architecture-independent files here.
binary-indep: build-indep install-indep
dh_testdir
dh_testroot
dh_installchangelogs -i
dh_installdocs -i
dh_installexamples -i
dh_installman -i
dh_link -i
dh_strip -i
dh_compress -i
dh_fixperms -i
dh_installdeb -i
dh_shlibdeps -i
dh_gencontrol -i
dh_md5sums -i
dh_builddeb -i
# Build architecture-dependent files here.
binary-arch: build install
dh_testdir
dh_testroot
dh_installchangelogs -a
dh_installdocs -a
dh_installexamples -a
dh_installman -a
dh_link -a
dh_strip -a -Xhtml2 -Xmarkdown2 -Xlatex2
dh_compress -a
dh_fixperms -a
dh_installdeb -a
dh_shlibdeps -a
dh_gencontrol -a
dh_md5sums -a
dh_builddeb -a
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install build-indep install-indep

39
html2markdown Normal file
View file

@ -0,0 +1,39 @@
#!/bin/sh -e
# converts html to markdown
# uses an available program to fetch URL and tidy to normalize it first
[ -n "$(which pandoc)" ] || {
echo >&2 "You need 'pandoc' to use this program!"
exit 1
}
[ -n "$(which tidy)" ] || {
echo >&2 "You need 'tidy' to use this program!"
exit 1
}
if [ -z "$1" ] || [ -f $1 ]; then
tidy -utf8 $1 2>/dev/null | pandoc -r html -w markdown -s
else
# Treat given argument as an URL. Locate a
# sensible text based browser (note the order).
for p in wget lynx w3m curl links w3c; do
if which $p >/dev/null; then
DUMPER=$p
break
fi
done
# Setup proper options.
case "$DUMPER" in
wget) OPT="-O-" ;;
lynx) OPT="-source" ;;
w3m) OPT="-dump_source" ;;
curl) OPT="" ;;
links) OPT="-source" ;;
w3c) OPT="-n -get" ;;
"") echo -n >&2 "Needs a program to fetch the URL "
echo -n >&2 "(e.g. wget, w3m, lynx, w3m or curl)."
exit 1 ;;
esac
# Fetch and feed to pandoc.
$DUMPER $OPT $1 2>/dev/null | tidy -utf8 2>/dev/null | pandoc -r html -w markdown -s
fi

7
latex2markdown Normal file
View file

@ -0,0 +1,7 @@
#!/bin/sh -e
# runs pandoc to convert latex to markdown
[ -n "$(which pandoc)" ] || {
echo >&2 "You need 'pandoc' to use this program!"
exit 1
}
iconv -t utf-8 $* | pandoc -r latex -w markdown -s

15
man/man1/html2markdown.1 Normal file
View file

@ -0,0 +1,15 @@
.TH PANDOC 1 "AUGUST 2006" Linux "User Manuals"
.SH NAME
html2markdown \- converts HTML to markdown-formatted text
.SH SYNOPSIS
.B html2markdown [input-file or URL]
.SH DESCRIPTION
.B html2markdown
converts input-file (or text from STDIN) or URL from HTML to
markdown-formatted plain text. Uses an available program
(e.g. wget, w3m, lynx or curl) to fetch the URL.
.SH AUTHOR
John MacFarlane <jgm at berkeley.edu>
.SH "SEE ALSO"
pandoc (1), markdown2html (1), markdown2latex (1), latex2markdown (1), markdown2pdf (1)

13
man/man1/latex2markdown.1 Normal file
View file

@ -0,0 +1,13 @@
.TH PANDOC 1 "AUGUST 2006" Linux "User Manuals"
.SH NAME
latex2markdown \- converts LaTeX to markdown-formatted text
.SH SYNOPSIS
.B latex2markdown [input-file]
.SH DESCRIPTION
.B latex2markdown
converts input-file from LaTeX to markdown-formatted plain text.
.SH AUTHOR
John MacFarlane <jgm at berkeley.edu>
.SH "SEE ALSO"
pandoc (1), markdown2html (1), html2markdown (1), markdown2latex (1), markdown2pdf (1)

15
man/man1/markdown2html.1 Normal file
View file

@ -0,0 +1,15 @@
.TH PANDOC 1 "AUGUST 2006" Linux "User Manuals"
.SH NAME
markdown2html \- converts markdown-formatted text to HTML
.SH SYNOPSIS
.B markdown2html [input-files]
.SH DESCRIPTION
.B markdown2html
converts input-files (or text from STDIN) from markdown-formatted
plain text to HTML.
.SH AUTHOR
John MacFarlane <jgm at berkeley.edu>
.SH "SEE ALSO"
pandoc (1), html2markdown (1), markdown2latex (1), latex2markdown (1),
markdown2pdf (1)

15
man/man1/markdown2latex.1 Normal file
View file

@ -0,0 +1,15 @@
.TH PANDOC 1 "AUGUST 2006" Linux "User Manuals"
.SH NAME
markdown2latex \- converts markdown-formatted text to LaTeX
.SH SYNOPSIS
.B markdown2latex [input-files]
.SH DESCRIPTION
.B markdown2latex
converts input-files (or text from STDIN) from markdown-formatted
plain text to LaTeX.
.SH AUTHOR
John MacFarlane <jgm at berkeley.edu>
.SH "SEE ALSO"
pandoc (1), markdown2html (1), html2markdown (1), latex2markdown (1),
markdown2pdf (1)

16
man/man1/markdown2pdf.1 Normal file
View file

@ -0,0 +1,16 @@
.TH PANDOC 1 "AUGUST 2006" Linux "User Manuals"
.SH NAME
markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex
.SH SYNOPSIS
.B markdown2pdf [input-file]
.SH DESCRIPTION
.B markdown2pdf
converts input-file (or text from STDIN, which in this case, the
resulting PDF file is named as 'stdin.pdf') from markdown-formatted
plain text to PDF, using LaTeX. Cleans up intermediate LaTeX files.
.SH AUTHOR
John MacFarlane <jgm at berkeley.edu>
.SH "SEE ALSO"
pandoc (1), markdown2html (1), html2markdown (1), markdown2latex (1),
latex2markdown (1)

20
man/man1/pandoc.1 Normal file
View file

@ -0,0 +1,20 @@
.TH PANDOC 1 "AUGUST 2006" Linux "User Manuals"
.SH NAME
pandoc \- general markup converter
.SH SYNOPSIS
.B pandoc [options] [input-files]
.SH DESCRIPTION
.B pandoc
converts files from one markup format to another. It can read markdown
and (with some limitations) reStructuredText, HTML, and LaTeX, and it
can write markdown, reStructuredText, HTML, LaTeX, RTF, and S5 HTML
slide shows.
.SH OPTIONS
For full usage information, including command-line options,
type
.B pandoc -h
.SH AUTHOR
John MacFarlane <jgm at berkeley.edu>
.SH "SEE ALSO"
markdown2html (1), html2markdown (1), markdown2latex (1), latex2markdown (1), markdown2pdf (1)

7
markdown2html Normal file
View file

@ -0,0 +1,7 @@
#!/bin/sh -e
# converts markdown to HTML
[ -n "$(which pandoc)" ] || {
echo >&2 "You need 'pandoc' to use this program!"
exit 1
}
iconv -t utf-8 $* | pandoc

7
markdown2latex Normal file
View file

@ -0,0 +1,7 @@
#!/bin/sh -e
# converts markdown to latex
[ -n "$(which pandoc)" ] || {
echo >&2 "You need 'pandoc' to use this program!"
exit 1
}
iconv -t utf-8 $* | pandoc -w latex -s

44
markdown2pdf Normal file
View file

@ -0,0 +1,44 @@
#!/bin/sh -e
# converts markdown to latex, then uses latex to make a PDF
[ -n "$(which pandoc)" ] || {
echo >&2 "You need 'pandoc' to use this program!"
exit 1
}
[ -n "$(which pdflatex)" ] || {
echo >&2 "You need 'pdflatex' to use this program!"
exit 1
}
TEMP=${TMPDIR-/tmp}/markdown2pdf.$$
trap "status=$?; rm -rf $TEMP; exit $status" 0 INT
if [ -z "$1" ]; then
BASE='stdin' # input is STDIN, since no argument given
else
filename=${1##*/}
BASE=${filename%\.*}
fi
mkdir -p $TEMP && iconv -t utf-8 $* | pandoc -w latex -s > $TEMP/$BASE.tex
(
cd $TEMP
if ! pdflatex -interaction=batchmode $BASE.tex >/dev/null 2>&1; then
echo >&2 "LaTeX errors:"
cat >&2 $BASE.log
exit 1
fi
) || exit $?
is_target_exists=
if [ -f $BASE.pdf ]; then
is_target_exists=1
fi
cp --suffix=~ --backup $TEMP/$BASE.pdf .
echo -n >&2 "Created $BASE.pdf"
[ -z "$is_target_exists" ] || {
echo -n >&2 " (previous file has been backed up as '$BASE.pdf~')"
}
echo >&2 .

945
src/ASCIIMathML.js Normal file
View file

@ -0,0 +1,945 @@
/*
ASCIIMathML.js
==============
This file contains JavaScript functions to convert ASCII math notation
to Presentation MathML. The conversion is done while the (X)HTML page
loads, and should work with Firefox/Mozilla/Netscape 7+ and Internet
Explorer 6+MathPlayer (http://www.dessci.com/en/products/mathplayer/).
Just add the next line to your (X)HTML page with this file in the same folder:
<script type="text/javascript" src="ASCIIMathML.js"></script>
This is a convenient and inexpensive solution for authoring MathML.
Version 1.4.7 Dec 15, 2005, (c) Peter Jipsen http://www.chapman.edu/~jipsen
Latest version at http://www.chapman.edu/~jipsen/mathml/ASCIIMathML.js
For changes see http://www.chapman.edu/~jipsen/mathml/asciimathchanges.txt
If you use it on a webpage, please send the URL to jipsen@chapman.edu
Modified July 2006 by John MacFarlane (added CODE to list of contexts
in which replacement does not occur, modified AMisMathMLAvailable
to better identify Safari browser).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License (at http://www.gnu.org/copyleft/gpl.html)
for more details.
*/
var checkForMathML = true; // check if browser can display MathML
var notifyIfNoMathML = true; // display note if no MathML capability
var alertIfNoMathML = false; // show alert box if no MathML capability
var mathcolor = "red"; // change it to "" (to inherit) or any other color
var mathfontfamily = "serif"; // change to "" to inherit (works in IE)
// or another family (e.g. "arial")
var displaystyle = true; // puts limits above and below large operators
var showasciiformulaonhover = true; // helps students learn ASCIIMath
var decimalsign = "."; // change to "," if you like, beware of `(1,2)`!
var AMdelimiter1 = "`", AMescape1 = "\\\\`"; // can use other characters
var AMdelimiter2 = "$", AMescape2 = "\\\\\\$", AMdelimiter2regexp = "\\$";
var doubleblankmathdelimiter = false; // if true, x+1 is equal to `x+1`
// for IE this works only in <!-- -->
//var separatetokens;// has been removed (email me if this is a problem)
var isIE = document.createElementNS==null;
if (document.getElementById==null)
alert("This webpage requires a recent browser such as\
\nMozilla/Netscape 7+ or Internet Explorer 6+MathPlayer")
// all further global variables start with "AM"
function AMcreateElementXHTML(t) {
if (isIE) return document.createElement(t);
else return document.createElementNS("http://www.w3.org/1999/xhtml",t);
}
function AMnoMathMLNote() {
var nd = AMcreateElementXHTML("h3");
nd.setAttribute("align","center")
nd.appendChild(AMcreateElementXHTML("p"));
nd.appendChild(document.createTextNode("To view the "));
var an = AMcreateElementXHTML("a");
an.appendChild(document.createTextNode("ASCIIMathML"));
an.setAttribute("href","http://www.chapman.edu/~jipsen/asciimath.html");
nd.appendChild(an);
nd.appendChild(document.createTextNode(" notation use Internet Explorer 6+"));
an = AMcreateElementXHTML("a");
an.appendChild(document.createTextNode("MathPlayer"));
an.setAttribute("href","http://www.dessci.com/en/products/mathplayer/download.htm");
nd.appendChild(an);
nd.appendChild(document.createTextNode(" or Netscape/Mozilla/Firefox"));
nd.appendChild(AMcreateElementXHTML("p"));
return nd;
}
function AMisMathMLavailable() {
var regex = /KHTML/; /* This line and the next two modified by JM for better Safari detection */
if (navigator.appName.slice(0,8)=="Netscape")
if (navigator.appVersion.slice(0,1)>="5" && !regex.test(navigator.userAgent)) return null;
else return AMnoMathMLNote();
else if (navigator.appName.slice(0,9)=="Microsoft")
try {
var ActiveX = new ActiveXObject("MathPlayer.Factory.1");
return null;
} catch (e) {
return AMnoMathMLNote();
}
else return AMnoMathMLNote();
}
// character lists for Mozilla/Netscape fonts
var AMcal = [0xEF35,0x212C,0xEF36,0xEF37,0x2130,0x2131,0xEF38,0x210B,0x2110,0xEF39,0xEF3A,0x2112,0x2133,0xEF3B,0xEF3C,0xEF3D,0xEF3E,0x211B,0xEF3F,0xEF40,0xEF41,0xEF42,0xEF43,0xEF44,0xEF45,0xEF46];
var AMfrk = [0xEF5D,0xEF5E,0x212D,0xEF5F,0xEF60,0xEF61,0xEF62,0x210C,0x2111,0xEF63,0xEF64,0xEF65,0xEF66,0xEF67,0xEF68,0xEF69,0xEF6A,0x211C,0xEF6B,0xEF6C,0xEF6D,0xEF6E,0xEF6F,0xEF70,0xEF71,0x2128];
var AMbbb = [0xEF8C,0xEF8D,0x2102,0xEF8E,0xEF8F,0xEF90,0xEF91,0x210D,0xEF92,0xEF93,0xEF94,0xEF95,0xEF96,0x2115,0xEF97,0x2119,0x211A,0x211D,0xEF98,0xEF99,0xEF9A,0xEF9B,0xEF9C,0xEF9D,0xEF9E,0x2124];
var CONST = 0, UNARY = 1, BINARY = 2, INFIX = 3, LEFTBRACKET = 4,
RIGHTBRACKET = 5, SPACE = 6, UNDEROVER = 7, DEFINITION = 8,
LEFTRIGHT = 9, TEXT = 10; // token types
var AMsqrt = {input:"sqrt", tag:"msqrt", output:"sqrt", tex:null, ttype:UNARY},
AMroot = {input:"root", tag:"mroot", output:"root", tex:null, ttype:BINARY},
AMfrac = {input:"frac", tag:"mfrac", output:"/", tex:null, ttype:BINARY},
AMdiv = {input:"/", tag:"mfrac", output:"/", tex:null, ttype:INFIX},
AMover = {input:"stackrel", tag:"mover", output:"stackrel", tex:null, ttype:BINARY},
AMsub = {input:"_", tag:"msub", output:"_", tex:null, ttype:INFIX},
AMsup = {input:"^", tag:"msup", output:"^", tex:null, ttype:INFIX},
AMtext = {input:"text", tag:"mtext", output:"text", tex:null, ttype:TEXT},
AMmbox = {input:"mbox", tag:"mtext", output:"mbox", tex:null, ttype:TEXT},
AMquote = {input:"\"", tag:"mtext", output:"mbox", tex:null, ttype:TEXT};
var AMsymbols = [
//some greek symbols
{input:"alpha", tag:"mi", output:"\u03B1", tex:null, ttype:CONST},
{input:"beta", tag:"mi", output:"\u03B2", tex:null, ttype:CONST},
{input:"chi", tag:"mi", output:"\u03C7", tex:null, ttype:CONST},
{input:"delta", tag:"mi", output:"\u03B4", tex:null, ttype:CONST},
{input:"Delta", tag:"mo", output:"\u0394", tex:null, ttype:CONST},
{input:"epsi", tag:"mi", output:"\u03B5", tex:"epsilon", ttype:CONST},
{input:"varepsilon", tag:"mi", output:"\u025B", tex:null, ttype:CONST},
{input:"eta", tag:"mi", output:"\u03B7", tex:null, ttype:CONST},
{input:"gamma", tag:"mi", output:"\u03B3", tex:null, ttype:CONST},
{input:"Gamma", tag:"mo", output:"\u0393", tex:null, ttype:CONST},
{input:"iota", tag:"mi", output:"\u03B9", tex:null, ttype:CONST},
{input:"kappa", tag:"mi", output:"\u03BA", tex:null, ttype:CONST},
{input:"lambda", tag:"mi", output:"\u03BB", tex:null, ttype:CONST},
{input:"Lambda", tag:"mo", output:"\u039B", tex:null, ttype:CONST},
{input:"mu", tag:"mi", output:"\u03BC", tex:null, ttype:CONST},
{input:"nu", tag:"mi", output:"\u03BD", tex:null, ttype:CONST},
{input:"omega", tag:"mi", output:"\u03C9", tex:null, ttype:CONST},
{input:"Omega", tag:"mo", output:"\u03A9", tex:null, ttype:CONST},
{input:"phi", tag:"mi", output:"\u03C6", tex:null, ttype:CONST},
{input:"varphi", tag:"mi", output:"\u03D5", tex:null, ttype:CONST},
{input:"Phi", tag:"mo", output:"\u03A6", tex:null, ttype:CONST},
{input:"pi", tag:"mi", output:"\u03C0", tex:null, ttype:CONST},
{input:"Pi", tag:"mo", output:"\u03A0", tex:null, ttype:CONST},
{input:"psi", tag:"mi", output:"\u03C8", tex:null, ttype:CONST},
{input:"Psi", tag:"mi", output:"\u03A8", tex:null, ttype:CONST},
{input:"rho", tag:"mi", output:"\u03C1", tex:null, ttype:CONST},
{input:"sigma", tag:"mi", output:"\u03C3", tex:null, ttype:CONST},
{input:"Sigma", tag:"mo", output:"\u03A3", tex:null, ttype:CONST},
{input:"tau", tag:"mi", output:"\u03C4", tex:null, ttype:CONST},
{input:"theta", tag:"mi", output:"\u03B8", tex:null, ttype:CONST},
{input:"vartheta", tag:"mi", output:"\u03D1", tex:null, ttype:CONST},
{input:"Theta", tag:"mo", output:"\u0398", tex:null, ttype:CONST},
{input:"upsilon", tag:"mi", output:"\u03C5", tex:null, ttype:CONST},
{input:"xi", tag:"mi", output:"\u03BE", tex:null, ttype:CONST},
{input:"Xi", tag:"mo", output:"\u039E", tex:null, ttype:CONST},
{input:"zeta", tag:"mi", output:"\u03B6", tex:null, ttype:CONST},
//binary operation symbols
{input:"*", tag:"mo", output:"\u22C5", tex:"cdot", ttype:CONST},
{input:"**", tag:"mo", output:"\u22C6", tex:"star", ttype:CONST},
{input:"//", tag:"mo", output:"/", tex:null, ttype:CONST},
{input:"\\\\", tag:"mo", output:"\\", tex:"backslash", ttype:CONST},
{input:"setminus", tag:"mo", output:"\\", tex:null, ttype:CONST},
{input:"xx", tag:"mo", output:"\u00D7", tex:"times", ttype:CONST},
{input:"-:", tag:"mo", output:"\u00F7", tex:"divide", ttype:CONST},
{input:"@", tag:"mo", output:"\u2218", tex:"circ", ttype:CONST},
{input:"o+", tag:"mo", output:"\u2295", tex:"oplus", ttype:CONST},
{input:"ox", tag:"mo", output:"\u2297", tex:"otimes", ttype:CONST},
{input:"o.", tag:"mo", output:"\u2299", tex:"odot", ttype:CONST},
{input:"sum", tag:"mo", output:"\u2211", tex:null, ttype:UNDEROVER},
{input:"prod", tag:"mo", output:"\u220F", tex:null, ttype:UNDEROVER},
{input:"^^", tag:"mo", output:"\u2227", tex:"wedge", ttype:CONST},
{input:"^^^", tag:"mo", output:"\u22C0", tex:"bigwedge", ttype:UNDEROVER},
{input:"vv", tag:"mo", output:"\u2228", tex:"vee", ttype:CONST},
{input:"vvv", tag:"mo", output:"\u22C1", tex:"bigvee", ttype:UNDEROVER},
{input:"nn", tag:"mo", output:"\u2229", tex:"cap", ttype:CONST},
{input:"nnn", tag:"mo", output:"\u22C2", tex:"bigcap", ttype:UNDEROVER},
{input:"uu", tag:"mo", output:"\u222A", tex:"cup", ttype:CONST},
{input:"uuu", tag:"mo", output:"\u22C3", tex:"bigcup", ttype:UNDEROVER},
//binary relation symbols
{input:"!=", tag:"mo", output:"\u2260", tex:"ne", ttype:CONST},
{input:":=", tag:"mo", output:":=", tex:null, ttype:CONST},
{input:"lt", tag:"mo", output:"<", tex:null, ttype:CONST},
{input:"<=", tag:"mo", output:"\u2264", tex:"le", ttype:CONST},
{input:"lt=", tag:"mo", output:"\u2264", tex:"leq", ttype:CONST},
{input:">=", tag:"mo", output:"\u2265", tex:"ge", ttype:CONST},
{input:"geq", tag:"mo", output:"\u2265", tex:null, ttype:CONST},
{input:"-<", tag:"mo", output:"\u227A", tex:"prec", ttype:CONST},
{input:"-lt", tag:"mo", output:"\u227A", tex:null, ttype:CONST},
{input:">-", tag:"mo", output:"\u227B", tex:"succ", ttype:CONST},
{input:"-<=", tag:"mo", output:"\u2AAF", tex:"preceq", ttype:CONST},
{input:">-=", tag:"mo", output:"\u2AB0", tex:"succeq", ttype:CONST},
{input:"in", tag:"mo", output:"\u2208", tex:null, ttype:CONST},
{input:"!in", tag:"mo", output:"\u2209", tex:"notin", ttype:CONST},
{input:"sub", tag:"mo", output:"\u2282", tex:"subset", ttype:CONST},
{input:"sup", tag:"mo", output:"\u2283", tex:"supset", ttype:CONST},
{input:"sube", tag:"mo", output:"\u2286", tex:"subseteq", ttype:CONST},
{input:"supe", tag:"mo", output:"\u2287", tex:"supseteq", ttype:CONST},
{input:"-=", tag:"mo", output:"\u2261", tex:"equiv", ttype:CONST},
{input:"~=", tag:"mo", output:"\u2245", tex:"cong", ttype:CONST},
{input:"~~", tag:"mo", output:"\u2248", tex:"approx", ttype:CONST},
{input:"prop", tag:"mo", output:"\u221D", tex:"propto", ttype:CONST},
//logical symbols
{input:"and", tag:"mtext", output:"and", tex:null, ttype:SPACE},
{input:"or", tag:"mtext", output:"or", tex:null, ttype:SPACE},
{input:"not", tag:"mo", output:"\u00AC", tex:"neg", ttype:CONST},
{input:"=>", tag:"mo", output:"\u21D2", tex:"implies", ttype:CONST},
{input:"if", tag:"mo", output:"if", tex:null, ttype:SPACE},
{input:"<=>", tag:"mo", output:"\u21D4", tex:"iff", ttype:CONST},
{input:"AA", tag:"mo", output:"\u2200", tex:"forall", ttype:CONST},
{input:"EE", tag:"mo", output:"\u2203", tex:"exists", ttype:CONST},
{input:"_|_", tag:"mo", output:"\u22A5", tex:"bot", ttype:CONST},
{input:"TT", tag:"mo", output:"\u22A4", tex:"top", ttype:CONST},
{input:"|--", tag:"mo", output:"\u22A2", tex:"vdash", ttype:CONST},
{input:"|==", tag:"mo", output:"\u22A8", tex:"models", ttype:CONST},
//grouping brackets
{input:"(", tag:"mo", output:"(", tex:null, ttype:LEFTBRACKET},
{input:")", tag:"mo", output:")", tex:null, ttype:RIGHTBRACKET},
{input:"[", tag:"mo", output:"[", tex:null, ttype:LEFTBRACKET},
{input:"]", tag:"mo", output:"]", tex:null, ttype:RIGHTBRACKET},
{input:"{", tag:"mo", output:"{", tex:null, ttype:LEFTBRACKET},
{input:"}", tag:"mo", output:"}", tex:null, ttype:RIGHTBRACKET},
{input:"|", tag:"mo", output:"|", tex:null, ttype:LEFTRIGHT},
//{input:"||", tag:"mo", output:"||", tex:null, ttype:LEFTRIGHT},
{input:"(:", tag:"mo", output:"\u2329", tex:"langle", ttype:LEFTBRACKET},
{input:":)", tag:"mo", output:"\u232A", tex:"rangle", ttype:RIGHTBRACKET},
{input:"<<", tag:"mo", output:"\u2329", tex:null, ttype:LEFTBRACKET},
{input:">>", tag:"mo", output:"\u232A", tex:null, ttype:RIGHTBRACKET},
{input:"{:", tag:"mo", output:"{:", tex:null, ttype:LEFTBRACKET, invisible:true},
{input:":}", tag:"mo", output:":}", tex:null, ttype:RIGHTBRACKET, invisible:true},
//miscellaneous symbols
{input:"int", tag:"mo", output:"\u222B", tex:null, ttype:CONST},
{input:"dx", tag:"mi", output:"{:d x:}", tex:null, ttype:DEFINITION},
{input:"dy", tag:"mi", output:"{:d y:}", tex:null, ttype:DEFINITION},
{input:"dz", tag:"mi", output:"{:d z:}", tex:null, ttype:DEFINITION},
{input:"dt", tag:"mi", output:"{:d t:}", tex:null, ttype:DEFINITION},
{input:"oint", tag:"mo", output:"\u222E", tex:null, ttype:CONST},
{input:"del", tag:"mo", output:"\u2202", tex:"partial", ttype:CONST},
{input:"grad", tag:"mo", output:"\u2207", tex:"nabla", ttype:CONST},
{input:"+-", tag:"mo", output:"\u00B1", tex:"pm", ttype:CONST},
{input:"O/", tag:"mo", output:"\u2205", tex:"emptyset", ttype:CONST},
{input:"oo", tag:"mo", output:"\u221E", tex:"infty", ttype:CONST},
{input:"aleph", tag:"mo", output:"\u2135", tex:null, ttype:CONST},
{input:"...", tag:"mo", output:"...", tex:"ldots", ttype:CONST},
{input:":.", tag:"mo", output:"\u2234", tex:"therefore", ttype:CONST},
{input:"/_", tag:"mo", output:"\u2220", tex:"angle", ttype:CONST},
{input:"\\ ", tag:"mo", output:"\u00A0", tex:null, ttype:CONST},
{input:"quad", tag:"mo", output:"\u00A0\u00A0", tex:null, ttype:CONST},
{input:"qquad", tag:"mo", output:"\u00A0\u00A0\u00A0\u00A0", tex:null, ttype:CONST},
{input:"cdots", tag:"mo", output:"\u22EF", tex:null, ttype:CONST},
{input:"vdots", tag:"mo", output:"\u22EE", tex:null, ttype:CONST},
{input:"ddots", tag:"mo", output:"\u22F1", tex:null, ttype:CONST},
{input:"diamond", tag:"mo", output:"\u22C4", tex:null, ttype:CONST},
{input:"square", tag:"mo", output:"\u25A1", tex:null, ttype:CONST},
{input:"|__", tag:"mo", output:"\u230A", tex:"lfloor", ttype:CONST},
{input:"__|", tag:"mo", output:"\u230B", tex:"rfloor", ttype:CONST},
{input:"|~", tag:"mo", output:"\u2308", tex:"lceiling", ttype:CONST},
{input:"~|", tag:"mo", output:"\u2309", tex:"rceiling", ttype:CONST},
{input:"CC", tag:"mo", output:"\u2102", tex:null, ttype:CONST},
{input:"NN", tag:"mo", output:"\u2115", tex:null, ttype:CONST},
{input:"QQ", tag:"mo", output:"\u211A", tex:null, ttype:CONST},
{input:"RR", tag:"mo", output:"\u211D", tex:null, ttype:CONST},
{input:"ZZ", tag:"mo", output:"\u2124", tex:null, ttype:CONST},
{input:"f", tag:"mi", output:"f", tex:null, ttype:UNARY, func:true},
{input:"g", tag:"mi", output:"g", tex:null, ttype:UNARY, func:true},
//standard functions
{input:"lim", tag:"mo", output:"lim", tex:null, ttype:UNDEROVER},
{input:"Lim", tag:"mo", output:"Lim", tex:null, ttype:UNDEROVER},
{input:"sin", tag:"mo", output:"sin", tex:null, ttype:UNARY, func:true},
{input:"cos", tag:"mo", output:"cos", tex:null, ttype:UNARY, func:true},
{input:"tan", tag:"mo", output:"tan", tex:null, ttype:UNARY, func:true},
{input:"sinh", tag:"mo", output:"sinh", tex:null, ttype:UNARY, func:true},
{input:"cosh", tag:"mo", output:"cosh", tex:null, ttype:UNARY, func:true},
{input:"tanh", tag:"mo", output:"tanh", tex:null, ttype:UNARY, func:true},
{input:"cot", tag:"mo", output:"cot", tex:null, ttype:UNARY, func:true},
{input:"sec", tag:"mo", output:"sec", tex:null, ttype:UNARY, func:true},
{input:"csc", tag:"mo", output:"csc", tex:null, ttype:UNARY, func:true},
{input:"log", tag:"mo", output:"log", tex:null, ttype:UNARY, func:true},
{input:"ln", tag:"mo", output:"ln", tex:null, ttype:UNARY, func:true},
{input:"det", tag:"mo", output:"det", tex:null, ttype:UNARY, func:true},
{input:"dim", tag:"mo", output:"dim", tex:null, ttype:CONST},
{input:"mod", tag:"mo", output:"mod", tex:null, ttype:CONST},
{input:"gcd", tag:"mo", output:"gcd", tex:null, ttype:UNARY, func:true},
{input:"lcm", tag:"mo", output:"lcm", tex:null, ttype:UNARY, func:true},
{input:"lub", tag:"mo", output:"lub", tex:null, ttype:CONST},
{input:"glb", tag:"mo", output:"glb", tex:null, ttype:CONST},
{input:"min", tag:"mo", output:"min", tex:null, ttype:UNDEROVER},
{input:"max", tag:"mo", output:"max", tex:null, ttype:UNDEROVER},
//arrows
{input:"uarr", tag:"mo", output:"\u2191", tex:"uparrow", ttype:CONST},
{input:"darr", tag:"mo", output:"\u2193", tex:"downarrow", ttype:CONST},
{input:"rarr", tag:"mo", output:"\u2192", tex:"rightarrow", ttype:CONST},
{input:"->", tag:"mo", output:"\u2192", tex:"to", ttype:CONST},
{input:"|->", tag:"mo", output:"\u21A6", tex:"mapsto", ttype:CONST},
{input:"larr", tag:"mo", output:"\u2190", tex:"leftarrow", ttype:CONST},
{input:"harr", tag:"mo", output:"\u2194", tex:"leftrightarrow", ttype:CONST},
{input:"rArr", tag:"mo", output:"\u21D2", tex:"Rightarrow", ttype:CONST},
{input:"lArr", tag:"mo", output:"\u21D0", tex:"Leftarrow", ttype:CONST},
{input:"hArr", tag:"mo", output:"\u21D4", tex:"Leftrightarrow", ttype:CONST},
//commands with argument
AMsqrt, AMroot, AMfrac, AMdiv, AMover, AMsub, AMsup,
{input:"hat", tag:"mover", output:"\u005E", tex:null, ttype:UNARY, acc:true},
{input:"bar", tag:"mover", output:"\u00AF", tex:"overline", ttype:UNARY, acc:true},
{input:"vec", tag:"mover", output:"\u2192", tex:null, ttype:UNARY, acc:true},
{input:"dot", tag:"mover", output:".", tex:null, ttype:UNARY, acc:true},
{input:"ddot", tag:"mover", output:"..", tex:null, ttype:UNARY, acc:true},
{input:"ul", tag:"munder", output:"\u0332", tex:"underline", ttype:UNARY, acc:true},
AMtext, AMmbox, AMquote,
{input:"bb", tag:"mstyle", atname:"fontweight", atval:"bold", output:"bb", tex:null, ttype:UNARY},
{input:"mathbf", tag:"mstyle", atname:"fontweight", atval:"bold", output:"mathbf", tex:null, ttype:UNARY},
{input:"sf", tag:"mstyle", atname:"fontfamily", atval:"sans-serif", output:"sf", tex:null, ttype:UNARY},
{input:"mathsf", tag:"mstyle", atname:"fontfamily", atval:"sans-serif", output:"mathsf", tex:null, ttype:UNARY},
{input:"bbb", tag:"mstyle", atname:"mathvariant", atval:"double-struck", output:"bbb", tex:null, ttype:UNARY, codes:AMbbb},
{input:"mathbb", tag:"mstyle", atname:"mathvariant", atval:"double-struck", output:"mathbb", tex:null, ttype:UNARY, codes:AMbbb},
{input:"cc", tag:"mstyle", atname:"mathvariant", atval:"script", output:"cc", tex:null, ttype:UNARY, codes:AMcal},
{input:"mathcal", tag:"mstyle", atname:"mathvariant", atval:"script", output:"mathcal", tex:null, ttype:UNARY, codes:AMcal},
{input:"tt", tag:"mstyle", atname:"fontfamily", atval:"monospace", output:"tt", tex:null, ttype:UNARY},
{input:"mathtt", tag:"mstyle", atname:"fontfamily", atval:"monospace", output:"mathtt", tex:null, ttype:UNARY},
{input:"fr", tag:"mstyle", atname:"mathvariant", atval:"fraktur", output:"fr", tex:null, ttype:UNARY, codes:AMfrk},
{input:"mathfrak", tag:"mstyle", atname:"mathvariant", atval:"fraktur", output:"mathfrak", tex:null, ttype:UNARY, codes:AMfrk}
];
function compareNames(s1,s2) {
if (s1.input > s2.input) return 1
else return -1;
}
var AMnames = []; //list of input symbols
function AMinitSymbols() {
var texsymbols = [], i;
for (i=0; i<AMsymbols.length; i++)
if (AMsymbols[i].tex)
texsymbols[texsymbols.length] = {input:AMsymbols[i].tex,
tag:AMsymbols[i].tag, output:AMsymbols[i].output, ttype:AMsymbols[i].ttype};
AMsymbols = AMsymbols.concat(texsymbols);
AMsymbols.sort(compareNames);
for (i=0; i<AMsymbols.length; i++) AMnames[i] = AMsymbols[i].input;
}
var AMmathml = "http://www.w3.org/1998/Math/MathML";
function AMcreateElementMathML(t) {
if (isIE) return document.createElement("m:"+t);
else return document.createElementNS(AMmathml,t);
}
function AMcreateMmlNode(t,frag) {
// var node = AMcreateElementMathML(name);
if (isIE) var node = document.createElement("m:"+t);
else var node = document.createElementNS(AMmathml,t);
node.appendChild(frag);
return node;
}
function newcommand(oldstr,newstr) {
AMsymbols = AMsymbols.concat([{input:oldstr, tag:"mo", output:newstr,
tex:null, ttype:DEFINITION}]);
}
function AMremoveCharsAndBlanks(str,n) {
//remove n characters and any following blanks
var st;
if (str.charAt(n)=="\\" && str.charAt(n+1)!="\\" && str.charAt(n+1)!=" ")
st = str.slice(n+1);
else st = str.slice(n);
for (var i=0; i<st.length && st.charCodeAt(i)<=32; i=i+1);
return st.slice(i);
}
function AMposition(arr, str, n) {
// return position >=n where str appears or would be inserted
// assumes arr is sorted
if (n==0) {
var h,m;
n = -1;
h = arr.length;
while (n+1<h) {
m = (n+h) >> 1;
if (arr[m]<str) n = m; else h = m;
}
return h;
} else
for (var i=n; i<arr.length && arr[i]<str; i++);
return i; // i=arr.length || arr[i]>=str
}
function AMgetSymbol(str) {
//return maximal initial substring of str that appears in names
//return null if there is none
var k = 0; //new pos
var j = 0; //old pos
var mk; //match pos
var st;
var tagst;
var match = "";
var more = true;
for (var i=1; i<=str.length && more; i++) {
st = str.slice(0,i); //initial substring of length i
j = k;
k = AMposition(AMnames, st, j);
if (k<AMnames.length && str.slice(0,AMnames[k].length)==AMnames[k]){
match = AMnames[k];
mk = k;
i = match.length;
}
more = k<AMnames.length && str.slice(0,AMnames[k].length)>=AMnames[k];
}
AMpreviousSymbol=AMcurrentSymbol;
if (match!=""){
AMcurrentSymbol=AMsymbols[mk].ttype;
return AMsymbols[mk];
}
// if str[0] is a digit or - return maxsubstring of digits.digits
AMcurrentSymbol=CONST;
k = 1;
st = str.slice(0,1);
var integ = true;
while ("0"<=st && st<="9" && k<=str.length) {
st = str.slice(k,k+1);
k++;
}
if (st == decimalsign) {
st = str.slice(k,k+1);
if ("0"<=st && st<="9") {
integ = false;
k++;
while ("0"<=st && st<="9" && k<=str.length) {
st = str.slice(k,k+1);
k++;
}
}
}
if ((integ && k>1) || k>2) {
st = str.slice(0,k-1);
tagst = "mn";
} else {
k = 2;
st = str.slice(0,1); //take 1 character
tagst = (("A">st || st>"Z") && ("a">st || st>"z")?"mo":"mi");
}
if (st=="-" && AMpreviousSymbol==INFIX) {
AMcurrentSymbol = INFIX; //trick "/" into recognizing "-" on second parse
return {input:st, tag:tagst, output:st, ttype:UNARY, func:true};
}
return {input:st, tag:tagst, output:st, ttype:CONST};
}
function AMremoveBrackets(node) {
var st;
if (node.nodeName=="mrow") {
st = node.firstChild.firstChild.nodeValue;
if (st=="(" || st=="[" || st=="{") node.removeChild(node.firstChild);
}
if (node.nodeName=="mrow") {
st = node.lastChild.firstChild.nodeValue;
if (st==")" || st=="]" || st=="}") node.removeChild(node.lastChild);
}
}
/*Parsing ASCII math expressions with the following grammar
v ::= [A-Za-z] | greek letters | numbers | other constant symbols
u ::= sqrt | text | bb | other unary symbols for font commands
b ::= frac | root | stackrel binary symbols
l ::= ( | [ | { | (: | {: left brackets
r ::= ) | ] | } | :) | :} right brackets
S ::= v | lEr | uS | bSS Simple expression
I ::= S_S | S^S | S_S^S | S Intermediate expression
E ::= IE | I/I Expression
Each terminal symbol is translated into a corresponding mathml node.*/
var AMnestingDepth,AMpreviousSymbol,AMcurrentSymbol;
function AMparseSexpr(str) { //parses str and returns [node,tailstr]
var symbol, node, result, i, st,// rightvert = false,
newFrag = document.createDocumentFragment();
str = AMremoveCharsAndBlanks(str,0);
symbol = AMgetSymbol(str); //either a token or a bracket or empty
if (symbol == null || symbol.ttype == RIGHTBRACKET && AMnestingDepth > 0) {
return [null,str];
}
if (symbol.ttype == DEFINITION) {
str = symbol.output+AMremoveCharsAndBlanks(str,symbol.input.length);
symbol = AMgetSymbol(str);
}
switch (symbol.ttype) {
case UNDEROVER:
case CONST:
str = AMremoveCharsAndBlanks(str,symbol.input.length);
return [AMcreateMmlNode(symbol.tag, //its a constant
document.createTextNode(symbol.output)),str];
case LEFTBRACKET: //read (expr+)
AMnestingDepth++;
str = AMremoveCharsAndBlanks(str,symbol.input.length);
result = AMparseExpr(str,true);
AMnestingDepth--;
if (typeof symbol.invisible == "boolean" && symbol.invisible)
node = AMcreateMmlNode("mrow",result[0]);
else {
node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
node = AMcreateMmlNode("mrow",node);
node.appendChild(result[0]);
}
return [node,result[1]];
case TEXT:
if (symbol!=AMquote) str = AMremoveCharsAndBlanks(str,symbol.input.length);
if (str.charAt(0)=="{") i=str.indexOf("}");
else if (str.charAt(0)=="(") i=str.indexOf(")");
else if (str.charAt(0)=="[") i=str.indexOf("]");
else if (symbol==AMquote) i=str.slice(1).indexOf("\"")+1;
else i = 0;
if (i==-1) i = str.length;
st = str.slice(1,i);
if (st.charAt(0) == " ") {
node = AMcreateElementMathML("mspace");
node.setAttribute("width","1ex");
newFrag.appendChild(node);
}
newFrag.appendChild(
AMcreateMmlNode(symbol.tag,document.createTextNode(st)));
if (st.charAt(st.length-1) == " ") {
node = AMcreateElementMathML("mspace");
node.setAttribute("width","1ex");
newFrag.appendChild(node);
}
str = AMremoveCharsAndBlanks(str,i+1);
return [AMcreateMmlNode("mrow",newFrag),str];
case UNARY:
str = AMremoveCharsAndBlanks(str,symbol.input.length);
result = AMparseSexpr(str);
if (result[0]==null) return [AMcreateMmlNode(symbol.tag,
document.createTextNode(symbol.output)),str];
if (typeof symbol.func == "boolean" && symbol.func) { // functions hack
st = str.charAt(0);
if (st=="^" || st=="_" || st=="/" || st=="|" || st==",") {
return [AMcreateMmlNode(symbol.tag,
document.createTextNode(symbol.output)),str];
} else {
node = AMcreateMmlNode("mrow",
AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output)));
node.appendChild(result[0]);
return [node,result[1]];
}
}
AMremoveBrackets(result[0]);
if (symbol.input == "sqrt") { // sqrt
return [AMcreateMmlNode(symbol.tag,result[0]),result[1]];
} else if (typeof symbol.acc == "boolean" && symbol.acc) { // accent
node = AMcreateMmlNode(symbol.tag,result[0]);
node.appendChild(AMcreateMmlNode("mo",document.createTextNode(symbol.output)));
return [node,result[1]];
} else { // font change command
if (!isIE && typeof symbol.codes != "undefined") {
for (i=0; i<result[0].childNodes.length; i++)
if (result[0].childNodes[i].nodeName=="mi" || result[0].nodeName=="mi") {
st = (result[0].nodeName=="mi"?result[0].firstChild.nodeValue:
result[0].childNodes[i].firstChild.nodeValue);
var newst = [];
for (var j=0; j<st.length; j++)
if (st.charCodeAt(j)>64 && st.charCodeAt(j)<91) newst = newst +
String.fromCharCode(symbol.codes[st.charCodeAt(j)-65]);
else newst = newst + st.charAt(j);
if (result[0].nodeName=="mi")
result[0]=AMcreateElementMathML("mo").
appendChild(document.createTextNode(newst));
else result[0].replaceChild(AMcreateElementMathML("mo").
appendChild(document.createTextNode(newst)),result[0].childNodes[i]);
}
}
node = AMcreateMmlNode(symbol.tag,result[0]);
node.setAttribute(symbol.atname,symbol.atval);
return [node,result[1]];
}
case BINARY:
str = AMremoveCharsAndBlanks(str,symbol.input.length);
result = AMparseSexpr(str);
if (result[0]==null) return [AMcreateMmlNode("mo",
document.createTextNode(symbol.input)),str];
AMremoveBrackets(result[0]);
var result2 = AMparseSexpr(result[1]);
if (result2[0]==null) return [AMcreateMmlNode("mo",
document.createTextNode(symbol.input)),str];
AMremoveBrackets(result2[0]);
if (symbol.input=="root" || symbol.input=="stackrel")
newFrag.appendChild(result2[0]);
newFrag.appendChild(result[0]);
if (symbol.input=="frac") newFrag.appendChild(result2[0]);
return [AMcreateMmlNode(symbol.tag,newFrag),result2[1]];
case INFIX:
str = AMremoveCharsAndBlanks(str,symbol.input.length);
return [AMcreateMmlNode("mo",document.createTextNode(symbol.output)),str];
case SPACE:
str = AMremoveCharsAndBlanks(str,symbol.input.length);
node = AMcreateElementMathML("mspace");
node.setAttribute("width","1ex");
newFrag.appendChild(node);
newFrag.appendChild(
AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output)));
node = AMcreateElementMathML("mspace");
node.setAttribute("width","1ex");
newFrag.appendChild(node);
return [AMcreateMmlNode("mrow",newFrag),str];
case LEFTRIGHT:
// if (rightvert) return [null,str]; else rightvert = true;
AMnestingDepth++;
str = AMremoveCharsAndBlanks(str,symbol.input.length);
result = AMparseExpr(str,false);
AMnestingDepth--;
var st = "";
if (result[0].lastChild!=null)
st = result[0].lastChild.firstChild.nodeValue;
if (st == "|") { // its an absolute value subterm
node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
node = AMcreateMmlNode("mrow",node);
node.appendChild(result[0]);
return [node,result[1]];
} else { // the "|" is a \mid
node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
node = AMcreateMmlNode("mrow",node);
return [node,str];
}
default:
//alert("default");
str = AMremoveCharsAndBlanks(str,symbol.input.length);
return [AMcreateMmlNode(symbol.tag, //its a constant
document.createTextNode(symbol.output)),str];
}
}
function AMparseIexpr(str) {
var symbol, sym1, sym2, node, result, underover;
str = AMremoveCharsAndBlanks(str,0);
sym1 = AMgetSymbol(str);
result = AMparseSexpr(str);
node = result[0];
str = result[1];
symbol = AMgetSymbol(str);
if (symbol.ttype == INFIX && symbol.input != "/") {
str = AMremoveCharsAndBlanks(str,symbol.input.length);
// if (symbol.input == "/") result = AMparseIexpr(str); else ...
result = AMparseSexpr(str);
if (result[0] == null) // show box in place of missing argument
result[0] = AMcreateMmlNode("mo",document.createTextNode("\u25A1"));
else AMremoveBrackets(result[0]);
str = result[1];
// if (symbol.input == "/") AMremoveBrackets(node);
if (symbol.input == "_") {
sym2 = AMgetSymbol(str);
underover = (sym1.ttype == UNDEROVER);
if (sym2.input == "^") {
str = AMremoveCharsAndBlanks(str,sym2.input.length);
var res2 = AMparseSexpr(str);
AMremoveBrackets(res2[0]);
str = res2[1];
node = AMcreateMmlNode((underover?"munderover":"msubsup"),node);
node.appendChild(result[0]);
node.appendChild(res2[0]);
node = AMcreateMmlNode("mrow",node); // so sum does not stretch
} else {
node = AMcreateMmlNode((underover?"munder":"msub"),node);
node.appendChild(result[0]);
}
} else {
node = AMcreateMmlNode(symbol.tag,node);
node.appendChild(result[0]);
}
}
return [node,str];
}
function AMparseExpr(str,rightbracket) {
var symbol, node, result, i, nodeList = [],
newFrag = document.createDocumentFragment();
do {
str = AMremoveCharsAndBlanks(str,0);
result = AMparseIexpr(str);
node = result[0];
str = result[1];
symbol = AMgetSymbol(str);
if (symbol.ttype == INFIX && symbol.input == "/") {
str = AMremoveCharsAndBlanks(str,symbol.input.length);
result = AMparseIexpr(str);
if (result[0] == null) // show box in place of missing argument
result[0] = AMcreateMmlNode("mo",document.createTextNode("\u25A1"));
else AMremoveBrackets(result[0]);
str = result[1];
AMremoveBrackets(node);
node = AMcreateMmlNode(symbol.tag,node);
node.appendChild(result[0]);
newFrag.appendChild(node);
symbol = AMgetSymbol(str);
}
else if (node!=undefined) newFrag.appendChild(node);
} while ((symbol.ttype != RIGHTBRACKET &&
(symbol.ttype != LEFTRIGHT || rightbracket)
|| AMnestingDepth == 0) && symbol!=null && symbol.output!="");
if (symbol.ttype == RIGHTBRACKET || symbol.ttype == LEFTRIGHT) {
// if (AMnestingDepth > 0) AMnestingDepth--;
var len = newFrag.childNodes.length;
if (len>0 && newFrag.childNodes[len-1].nodeName == "mrow" && len>1 &&
newFrag.childNodes[len-2].nodeName == "mo" &&
newFrag.childNodes[len-2].firstChild.nodeValue == ",") { //matrix
var right = newFrag.childNodes[len-1].lastChild.firstChild.nodeValue;
if (right==")" || right=="]") {
var left = newFrag.childNodes[len-1].firstChild.firstChild.nodeValue;
if (left=="(" && right==")" && symbol.output != "}" ||
left=="[" && right=="]") {
var pos = []; // positions of commas
var matrix = true;
var m = newFrag.childNodes.length;
for (i=0; matrix && i<m; i=i+2) {
pos[i] = [];
node = newFrag.childNodes[i];
if (matrix) matrix = node.nodeName=="mrow" &&
(i==m-1 || node.nextSibling.nodeName=="mo" &&
node.nextSibling.firstChild.nodeValue==",")&&
node.firstChild.firstChild.nodeValue==left &&
node.lastChild.firstChild.nodeValue==right;
if (matrix)
for (var j=0; j<node.childNodes.length; j++)
if (node.childNodes[j].firstChild.nodeValue==",")
pos[i][pos[i].length]=j;
if (matrix && i>1) matrix = pos[i].length == pos[i-2].length;
}
if (matrix) {
var row, frag, n, k, table = document.createDocumentFragment();
for (i=0; i<m; i=i+2) {
row = document.createDocumentFragment();
frag = document.createDocumentFragment();
node = newFrag.firstChild; // <mrow>(-,-,...,-,-)</mrow>
n = node.childNodes.length;
k = 0;
node.removeChild(node.firstChild); //remove (
for (j=1; j<n-1; j++) {
if (typeof pos[i][k] != "undefined" && j==pos[i][k]){
node.removeChild(node.firstChild); //remove ,
row.appendChild(AMcreateMmlNode("mtd",frag));
k++;
} else frag.appendChild(node.firstChild);
}
row.appendChild(AMcreateMmlNode("mtd",frag));
if (newFrag.childNodes.length>2) {
newFrag.removeChild(newFrag.firstChild); //remove <mrow>)</mrow>
newFrag.removeChild(newFrag.firstChild); //remove <mo>,</mo>
}
table.appendChild(AMcreateMmlNode("mtr",row));
}
node = AMcreateMmlNode("mtable",table);
if (typeof symbol.invisible == "boolean" && symbol.invisible) node.setAttribute("columnalign","left");
newFrag.replaceChild(node,newFrag.firstChild);
}
}
}
}
str = AMremoveCharsAndBlanks(str,symbol.input.length);
if (typeof symbol.invisible != "boolean" || !symbol.invisible) {
node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
newFrag.appendChild(node);
}
}
return [newFrag,str];
}
function AMparseMath(str) {
var result, node = AMcreateElementMathML("mstyle");
if (mathcolor != "") node.setAttribute("mathcolor",mathcolor);
if (displaystyle) node.setAttribute("displaystyle","true");
if (mathfontfamily != "") node.setAttribute("fontfamily",mathfontfamily);
AMnestingDepth = 0;
node.appendChild(AMparseExpr(str.replace(/^\s+/g,""),false)[0]);
node = AMcreateMmlNode("math",node);
if (showasciiformulaonhover) //fixed by djhsu so newline
node.setAttribute("title",str.replace(/\s+/g," "));//does not show in Gecko
if (mathfontfamily != "" && (isIE || mathfontfamily != "serif")) {
var fnode = AMcreateElementXHTML("font");
fnode.setAttribute("face",mathfontfamily);
fnode.appendChild(node);
return fnode;
}
return node;
}
function AMstrarr2docFrag(arr, linebreaks) {
var newFrag=document.createDocumentFragment();
var expr = false;
for (var i=0; i<arr.length; i++) {
if (expr) newFrag.appendChild(AMparseMath(arr[i]));
else {
var arri = (linebreaks ? arr[i].split("\n\n") : [arr[i]]);
newFrag.appendChild(AMcreateElementXHTML("span").
appendChild(document.createTextNode(arri[0])));
for (var j=1; j<arri.length; j++) {
newFrag.appendChild(AMcreateElementXHTML("p"));
newFrag.appendChild(AMcreateElementXHTML("span").
appendChild(document.createTextNode(arri[j])));
}
}
expr = !expr;
}
return newFrag;
}
function AMprocessNodeR(n, linebreaks) {
var mtch, str, arr, frg, i;
if (n.childNodes.length == 0) {
if ((n.nodeType!=8 || linebreaks) &&
n.parentNode.nodeName!="form" && n.parentNode.nodeName!="FORM" &&
n.parentNode.nodeName!="textarea" && n.parentNode.nodeName!="TEXTAREA" &&
n.parentNode.nodeName!="code" && n.parentNode.nodeName!="CODE" && /* added by JM */
n.parentNode.nodeName!="pre" && n.parentNode.nodeName!="PRE") {
str = n.nodeValue;
if (!(str == null)) {
str = str.replace(/\r\n\r\n/g,"\n\n");
if (doubleblankmathdelimiter) {
str = str.replace(/\x20\x20\./g," "+AMdelimiter1+".");
str = str.replace(/\x20\x20,/g," "+AMdelimiter1+",");
str = str.replace(/\x20\x20/g," "+AMdelimiter1+" ");
}
str = str.replace(/\x20+/g," ");
str = str.replace(/\s*\r\n/g," ");
mtch = false;
str = str.replace(new RegExp(AMescape2, "g"),
function(st){mtch=true;return "AMescape2"});
str = str.replace(new RegExp(AMescape1, "g"),
function(st){mtch=true;return "AMescape1"});
str = str.replace(new RegExp(AMdelimiter2regexp, "g"),AMdelimiter1);
arr = str.split(AMdelimiter1);
for (i=0; i<arr.length; i++)
arr[i]=arr[i].replace(/AMescape2/g,AMdelimiter2).
replace(/AMescape1/g,AMdelimiter1);
if (arr.length>1 || mtch) {
if (checkForMathML) {
checkForMathML = false;
var nd = AMisMathMLavailable();
AMnoMathML = nd != null;
if (AMnoMathML && notifyIfNoMathML)
if (alertIfNoMathML)
alert("To view the ASCIIMathML notation use Internet Explorer 6 +\nMathPlayer (free from www.dessci.com)\n\
or Firefox/Mozilla/Netscape");
else AMbody.insertBefore(nd,AMbody.childNodes[0]);
}
if (!AMnoMathML) {
frg = AMstrarr2docFrag(arr,n.nodeType==8);
var len = frg.childNodes.length;
n.parentNode.replaceChild(frg,n);
return len-1;
} else return 0;
}
}
} else return 0;
} else if (n.nodeName!="math") {
for (i=0; i<n.childNodes.length; i++)
i += AMprocessNodeR(n.childNodes[i], linebreaks);
}
return 0;
}
function AMprocessNode(n, linebreaks, spanclassAM) {
var frag,st;
if (spanclassAM!=null) {
frag = document.getElementsByTagName("span")
for (var i=0;i<frag.length;i++)
if (frag[i].className == "AM")
AMprocessNodeR(frag[i],linebreaks);
} else {
try {
st = n.innerHTML;
} catch(err) {}
if (st==null ||
st.indexOf(AMdelimiter1)!=-1 || st.indexOf(AMdelimiter2)!=-1)
AMprocessNodeR(n,linebreaks);
}
if (isIE) { //needed to match size and font of formula to surrounding text
frag = document.getElementsByTagName('math');
for (var i=0;i<frag.length;i++) frag[i].update()
}
}
var AMbody;
var AMnoMathML = false, AMtranslated = false;
function translate(spanclassAM) {
if (!AMtranslated) { // run this only once
AMtranslated = true;
AMinitSymbols();
AMbody = document.getElementsByTagName("body")[0];
AMprocessNode(AMbody, false, spanclassAM);
}
}
if (isIE) { // avoid adding MathPlayer info explicitly to each webpage
document.write("<object id=\"mathplayer\"\
classid=\"clsid:32F66A20-7614-11D4-BD11-00104BD3F987\"></object>");
document.write("<?import namespace=\"m\" implementation=\"#mathplayer\"?>");
}
// GO1.1 Generic onload by Brothercake
// http://www.brothercake.com/
//onload function (replaces the onload="translate()" in the <body> tag)
function generic()
{
translate();
};
//setup onload function
if(typeof window.addEventListener != 'undefined')
{
//.. gecko, safari, konqueror and standard
window.addEventListener('load', generic, false);
}
else if(typeof document.addEventListener != 'undefined')
{
//.. opera 7
document.addEventListener('load', generic, false);
}
else if(typeof window.attachEvent != 'undefined')
{
//.. win/ie
window.attachEvent('onload', generic);
}
//** remove this condition to degrade older browsers
else
{
//.. mac/ie5 and anything else that gets this far
//if there's an existing onload function
if(typeof window.onload == 'function')
{
//store it
var existing = onload;
//add new onload handler
window.onload = function()
{
//call existing onload function
existing();
//call generic onload function
generic();
};
}
else
{
//setup onload function
window.onload = generic;
}
}

298
src/Main.hs Normal file
View file

@ -0,0 +1,298 @@
-- | Main Pandoc program. Parses command-line options and calls the
-- appropriate readers and writers.
module Main where
import Text.Pandoc.UTF8 ( decodeUTF8, encodeUTF8 )
import Text.Pandoc.Readers.Markdown ( readMarkdown )
import Text.Pandoc.Readers.HTML ( readHtml )
import Text.Pandoc.Writers.S5 ( s5CSS, s5Javascript, writeS5 )
import Text.Pandoc.Writers.RST ( writeRST )
import Text.Pandoc.Readers.RST ( readRST )
import Text.Pandoc.ASCIIMathML ( asciiMathMLScript )
import Text.Pandoc.Writers.HTML ( writeHtml )
import Text.Pandoc.Writers.LaTeX ( writeLaTeX )
import Text.Pandoc.Readers.LaTeX ( readLaTeX )
import Text.Pandoc.Writers.RTF ( writeRTF )
import Text.Pandoc.Writers.Markdown ( writeMarkdown )
import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, defaultRTFHeader, defaultS5Header, defaultLaTeXHeader )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import System ( exitWith, getArgs, getProgName )
import System.Exit
import System.Console.GetOpt
import IO ( stdout, stderr, hPutStrLn )
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf )
import Char ( toLower )
import Control.Monad ( (>>=) )
version :: String
version = "0.2"
-- | Association list of formats and readers.
readers :: [(String, ParserState -> String -> Pandoc)]
readers = [("native" , readPandoc)
,("markdown" , readMarkdown)
,("rst" , readRST)
,("html" , readHtml)
,("latex" , readLaTeX)
]
-- | Reader for native Pandoc format.
readPandoc :: ParserState -> String -> Pandoc
readPandoc state input = read input
-- | Association list of formats and pairs of writers and default headers.
writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
writers = [("native" , (writeDoc, ""))
,("html" , (writeHtml, defaultHtmlHeader))
,("s5" , (writeS5, defaultS5Header))
,("latex" , (writeLaTeX, defaultLaTeXHeader))
,("markdown" , (writeMarkdown, ""))
,("rst" , (writeRST, ""))
,("rtf" , (writeRTF, defaultRTFHeader))
]
-- | Writer for Pandoc native format.
writeDoc :: WriterOptions -> Pandoc -> String
writeDoc options = prettyPandoc
-- | Data structure for command line options.
data Opt = Opt
{ optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces
, optTabStop :: Int -- ^ Number of spaces per tab
, optStandalone :: Bool -- ^ If @True@, include header and footer
, optReader :: ParserState -> String -> Pandoc -- ^ Reader to use
, optWriter :: WriterOptions -> Pandoc -> String -- ^ Writer to use
, optParseRaw :: Bool -- ^ If @True@, parse unconvertable HTML and TeX
, optCSS :: String -- ^ CSS file to link to
, optIncludeInHeader :: String -- ^ File to include in header
, optIncludeBeforeBody :: String -- ^ File to include at beginning of body
, optIncludeAfterBody :: String -- ^ File to include at end of body
, optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
, optDefaultHeader :: String -- ^ Default header
, optTitlePrefix :: String -- ^ Optional prefix for HTML title
, optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
, optIncremental :: Bool -- ^ If @True@, show lists incrementally in S5
, optSmartypants :: Bool -- ^ If @True@, use smart quotes, dashes, ...
, optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML or S5
}
-- | Defaults for command-line options.
startOpt :: Opt
startOpt = Opt
{ optPreserveTabs = False
, optTabStop = 4
, optStandalone = False
, optReader = readMarkdown
, optWriter = writeHtml
, optParseRaw = False
, optCSS = ""
, optIncludeInHeader = ""
, optIncludeBeforeBody = ""
, optIncludeAfterBody = ""
, optCustomHeader = "DEFAULT"
, optDefaultHeader = defaultHtmlHeader
, optTitlePrefix = ""
, optNumberSections = False
, optIncremental = False
, optSmartypants = False
, optASCIIMathML = False
}
-- | A list of functions, each transforming the options data structure in response
-- to a command-line option.
options :: [OptDescr (Opt -> IO Opt)]
options =
[ Option "v" ["version"]
(NoArg
(\_ -> do
hPutStrLn stderr ("Version " ++ version)
exitWith ExitSuccess))
"Print version"
, Option "h" ["help"]
(NoArg
(\_ -> do
prg <- getProgName
hPutStrLn stderr (usageInfo (prg ++ " [OPTIONS] [FILES] - convert FILES from one markup format to another\nIf no OPTIONS specified, converts from markdown to html.\nIf no FILES specified, input is read from STDIN.\nOptions:") options)
exitWith ExitSuccess))
"Show help"
, Option "fr" ["from","read"]
(ReqArg
(\arg opt -> case (lookup (map toLower arg) readers) of
Just reader -> return opt { optReader = reader }
Nothing -> error ("Unknown reader: " ++ arg) )
"FORMAT")
("Source format (" ++ (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )")
, Option "tw" ["to","write"]
(ReqArg
(\arg opt -> case (lookup (map toLower arg) writers) of
Just (writer, defaultHeader) ->
return opt { optWriter = writer,
optDefaultHeader = defaultHeader }
Nothing -> error ("Unknown writer: " ++ arg) )
"FORMAT")
("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )")
, Option "s" ["standalone"]
(NoArg
(\opt -> return opt { optStandalone = True }))
"Include needed header and footer on output"
, Option "p" ["preserve-tabs"]
(NoArg
(\opt -> return opt { optPreserveTabs = True }))
"Preserve tabs instead of converting to spaces"
, Option "" ["tab-stop"]
(ReqArg
(\arg opt -> return opt { optTabStop = (read arg) } )
"TABSTOP")
"Tab stop (default 4)"
, Option "R" ["parse-raw"]
(NoArg
(\opt -> return opt { optParseRaw = True }))
"Parse untranslatable HTML codes and LaTeX environments as raw"
, Option "S" ["smartypants"]
(NoArg
(\opt -> return opt { optSmartypants = True }))
"Use smartypants for html output"
, Option "m" ["asciimathml"]
(NoArg
(\opt -> return opt { optASCIIMathML = True, optStandalone = True }))
"Use ASCIIMathML script in html output"
, Option "i" ["incremental"]
(NoArg
(\opt -> return opt { optIncremental = True }))
"Make list items display incrementally in S5"
, Option "N" ["number-sections"]
(NoArg
(\opt -> return opt { optNumberSections = True }))
"Number sections in LaTeX"
, Option "c" ["css"]
(ReqArg
(\arg opt -> return opt { optCSS = arg, optStandalone = True })
"CSS")
"Link to CSS style sheet"
, Option "H" ["include-in-header"]
(ReqArg
(\arg opt -> do
text <- readFile arg
return opt { optIncludeInHeader = text, optStandalone = True })
"FILENAME")
"File to include at end of header (implies -s)"
, Option "B" ["include-before-body"]
(ReqArg
(\arg opt -> do
text <- readFile arg
return opt { optIncludeBeforeBody = text })
"FILENAME")
"File to include before document body"
, Option "A" ["include-after-body"]
(ReqArg
(\arg opt -> do
text <- readFile arg
return opt { optIncludeAfterBody = text })
"FILENAME")
"File to include after document body"
, Option "" ["custom-header"]
(ReqArg
(\arg opt -> do
text <- readFile arg
return opt { optCustomHeader = text, optStandalone = True })
"FILENAME")
"File to use for custom header (implies -s)"
, Option "T" ["title-prefix"]
(ReqArg
(\arg opt -> return opt { optTitlePrefix = arg, optStandalone = True })
"STRING")
"String to prefix to HTML window title"
, Option "D" ["print-default-header"]
(ReqArg
(\arg opt -> do
let header = case (lookup arg writers) of
Just (writer, head) -> head
Nothing -> error ("Unknown reader: " ++ arg)
hPutStrLn stdout header
exitWith ExitSuccess)
"FORMAT")
"Print default header for FORMAT"
]
main = do
args <- getArgs
let (actions, sources, errors) = getOpt RequireOrder options args
-- thread option data structure through all supplied option actions
opts <- foldl (>>=) (return startOpt) actions
let Opt { optPreserveTabs = preserveTabs
, optTabStop = tabStop
, optStandalone = standalone
, optReader = reader
, optWriter = writer
, optParseRaw = parseRaw
, optCSS = css
, optIncludeInHeader = includeHeader
, optIncludeBeforeBody = includeBefore
, optIncludeAfterBody = includeAfter
, optCustomHeader = customHeader
, optDefaultHeader = defaultHeader
, optTitlePrefix = titlePrefix
, optNumberSections = numberSections
, optIncremental = incremental
, optSmartypants = smartypants
, optASCIIMathML = asciiMathML
} = opts
let writingS5 = (defaultHeader == defaultS5Header)
let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop)
let addBlank str = str ++ "\n\n"
let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings
let filter = tabFilter . addBlank . removeCRs
let startParserState = defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
stateStandalone = standalone }
let csslink = if (css == "") then
""
else
"<link rel=\"stylesheet\" href=\"" ++ css ++
"\" type=\"text/css\" media=\"all\" />\n"
let asciiMathMLLink = if asciiMathML then asciiMathMLScript else ""
let header = (if (customHeader == "DEFAULT") then defaultHeader else customHeader) ++
csslink ++ asciiMathMLLink ++ includeHeader
let writerOptions = WriterOptions { writerStandalone = standalone,
writerHeader = header,
writerTitlePrefix = titlePrefix,
writerSmartypants = smartypants,
writerTabStop = tabStop,
writerS5 = writingS5,
writerIncremental = incremental,
writerNumberSections = numberSections,
writerIncludeBefore = includeBefore,
writerIncludeAfter = includeAfter }
(readSources sources) >>= (putStrLn . encodeUTF8 . (writer writerOptions) .
(reader startParserState) . filter .
decodeUTF8 . (joinWithSep "\n"))
where
readSources [] = mapM readSource ["-"]
readSources sources = mapM readSource sources
readSource "-" = getContents
readSource source = readFile source

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,50 @@
-- | Definition of 'Pandoc' data structure for format-neutral representation
-- of documents.
module Text.Pandoc.Definition where
data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
-- | Bibliographic information for the document: title (list of 'Inline'),
-- authors (list of strings), date (string).
data Meta = Meta [Inline] -- title
[String] -- authors
String -- date
deriving (Eq, Show, Read)
-- | Block element.
data Block
= Plain [Inline] -- ^ Plain text, not a paragraph
| Blank -- ^ A blank line
| Null -- ^ Nothing
| Para [Inline] -- ^ Paragraph
| Key [Inline] Target -- ^ Reference key: name (list of inlines) and 'Target'
| CodeBlock String -- ^ Code block (literal)
| RawHtml String -- ^ Raw HTML block (literal)
| BlockQuote [Block] -- ^ Block quote (list of blocks)
| OrderedList [[Block]] -- ^ Ordered list (list of items, each a list of blocks)
| BulletList [[Block]] -- ^ Bullet list (list of items, each a list of blocks)
| Header Int [Inline] -- ^ Header - level (integer) and text (list of inlines)
| HorizontalRule -- ^ Horizontal rule
| Note String [Block] -- ^ Footnote or endnote - reference (string), text (list of blocks)
deriving (Eq, Read, Show)
-- | Target for a link: either a URL or an indirect (labeled) reference.
data Target
= Src String String -- ^ First string is URL, second is title
| Ref [Inline] -- ^ Label (list of inlines) for an indirect reference
deriving (Show, Eq, Read)
-- | Inline elements.
data Inline
= Str String -- ^ Text (string)
| Emph [Inline] -- ^ Emphasized text (list of inlines)
| Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
| Code String -- ^ Inline code (literal)
| Space -- ^ Inter-word space
| LineBreak -- ^ Hard line break
| TeX String -- ^ LaTeX code (literal)
| HtmlInline String -- ^ HTML code (literal)
| Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target
| Image [Inline] Target -- ^ Image: alternative text (list of inlines) and target
| NoteRef String -- ^ Footnote or endnote reference
deriving (Show, Eq, Read)

View file

@ -0,0 +1,306 @@
-- | Functions for encoding unicode characters as HTML entity
-- references, and vice versa.
module Text.Pandoc.HtmlEntities (
htmlEntityToChar,
charToHtmlEntity,
decodeEntities,
encodeEntities
) where
import Char ( chr, ord )
import Text.Regex ( mkRegex, matchRegexAll )
import Maybe ( fromMaybe )
-- regexs for entities
decimalCodedEntity = mkRegex "&#([0-9]+);"
characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;"
-- | Return a string with all entity references decoded to unicode characters
-- where possible.
decodeEntities :: String -> String
decodeEntities str =
case (matchRegexAll characterEntity str) of
Nothing -> str
Just (before, match, rest, _) -> before ++ replacement ++ (decodeEntities rest)
where replacement = case (htmlEntityToChar match) of
Just ch -> [ch]
Nothing -> match
-- | Returns a string with characters replaced with entity references where possible.
encodeEntities :: String -> String
encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c))
-- | If the string is a valid entity reference, returns @Just@ the character,
-- otherwise @Nothing@.
htmlEntityToChar :: String -> Maybe Char
htmlEntityToChar entity =
case (lookup entity htmlEntityTable) of
Just ch -> Just ch
Nothing -> case (matchRegexAll decimalCodedEntity entity) of
Just (_, _, _, [sub]) -> Just (chr (read sub))
Nothing -> Nothing
-- | If there is an entity reference corresponding to the character, returns
-- @Just@ the entity reference, otherwise @Nothing@.
charToHtmlEntity :: Char -> Maybe String
charToHtmlEntity char =
let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in
if (length matches) == 0 then
Nothing
else
Just (fst (head matches))
htmlEntityTable :: [(String, Char)]
htmlEntityTable = [
("&quot;", chr 34),
("&amp;", chr 38),
("&lt;", chr 60),
("&gt;", chr 62),
("&nbsp;", chr 160),
("&iexcl;", chr 161),
("&cent;", chr 162),
("&pound;", chr 163),
("&curren;", chr 164),
("&yen;", chr 165),
("&brvbar;", chr 166),
("&sect;", chr 167),
("&uml;", chr 168),
("&copy;", chr 169),
("&ordf;", chr 170),
("&laquo;", chr 171),
("&not;", chr 172),
("&shy;", chr 173),
("&reg;", chr 174),
("&macr;", chr 175),
("&deg;", chr 176),
("&plusmn;", chr 177),
("&sup2;", chr 178),
("&sup3;", chr 179),
("&acute;", chr 180),
("&micro;", chr 181),
("&para;", chr 182),
("&middot;", chr 183),
("&cedil;", chr 184),
("&sup1;", chr 185),
("&ordm;", chr 186),
("&raquo;", chr 187),
("&frac14;", chr 188),
("&frac12;", chr 189),
("&frac34;", chr 190),
("&iquest;", chr 191),
("&Agrave;", chr 192),
("&Aacute;", chr 193),
("&Acirc;", chr 194),
("&Atilde;", chr 195),
("&Auml;", chr 196),
("&Aring;", chr 197),
("&AElig;", chr 198),
("&Ccedil;", chr 199),
("&Egrave;", chr 200),
("&Eacute;", chr 201),
("&Ecirc;", chr 202),
("&Euml;", chr 203),
("&Igrave;", chr 204),
("&Iacute;", chr 205),
("&Icirc;", chr 206),
("&Iuml;", chr 207),
("&ETH;", chr 208),
("&Ntilde;", chr 209),
("&Ograve;", chr 210),
("&Oacute;", chr 211),
("&Ocirc;", chr 212),
("&Otilde;", chr 213),
("&Ouml;", chr 214),
("&times;", chr 215),
("&Oslash;", chr 216),
("&Ugrave;", chr 217),
("&Uacute;", chr 218),
("&Ucirc;", chr 219),
("&Uuml;", chr 220),
("&Yacute;", chr 221),
("&THORN;", chr 222),
("&szlig;", chr 223),
("&agrave;", chr 224),
("&aacute;", chr 225),
("&acirc;", chr 226),
("&atilde;", chr 227),
("&auml;", chr 228),
("&aring;", chr 229),
("&aelig;", chr 230),
("&ccedil;", chr 231),
("&egrave;", chr 232),
("&eacute;", chr 233),
("&ecirc;", chr 234),
("&euml;", chr 235),
("&igrave;", chr 236),
("&iacute;", chr 237),
("&icirc;", chr 238),
("&iuml;", chr 239),
("&eth;", chr 240),
("&ntilde;", chr 241),
("&ograve;", chr 242),
("&oacute;", chr 243),
("&ocirc;", chr 244),
("&otilde;", chr 245),
("&ouml;", chr 246),
("&divide;", chr 247),
("&oslash;", chr 248),
("&ugrave;", chr 249),
("&uacute;", chr 250),
("&ucirc;", chr 251),
("&uuml;", chr 252),
("&yacute;", chr 253),
("&thorn;", chr 254),
("&yuml;", chr 255),
("&OElig;", chr 338),
("&oelig;", chr 339),
("&Scaron;", chr 352),
("&scaron;", chr 353),
("&Yuml;", chr 376),
("&fnof;", chr 402),
("&circ;", chr 710),
("&tilde;", chr 732),
("&Alpha;", chr 913),
("&Beta;", chr 914),
("&Gamma;", chr 915),
("&Delta;", chr 916),
("&Epsilon;", chr 917),
("&Zeta;", chr 918),
("&Eta;", chr 919),
("&Theta;", chr 920),
("&Iota;", chr 921),
("&Kappa;", chr 922),
("&Lambda;", chr 923),
("&Mu;", chr 924),
("&Nu;", chr 925),
("&Xi;", chr 926),
("&Omicron;", chr 927),
("&Pi;", chr 928),
("&Rho;", chr 929),
("&Sigma;", chr 931),
("&Tau;", chr 932),
("&Upsilon;", chr 933),
("&Phi;", chr 934),
("&Chi;", chr 935),
("&Psi;", chr 936),
("&Omega;", chr 937),
("&alpha;", chr 945),
("&beta;", chr 946),
("&gamma;", chr 947),
("&delta;", chr 948),
("&epsilon;", chr 949),
("&zeta;", chr 950),
("&eta;", chr 951),
("&theta;", chr 952),
("&iota;", chr 953),
("&kappa;", chr 954),
("&lambda;", chr 955),
("&mu;", chr 956),
("&nu;", chr 957),
("&xi;", chr 958),
("&omicron;", chr 959),
("&pi;", chr 960),
("&rho;", chr 961),
("&sigmaf;", chr 962),
("&sigma;", chr 963),
("&tau;", chr 964),
("&upsilon;", chr 965),
("&phi;", chr 966),
("&chi;", chr 967),
("&psi;", chr 968),
("&omega;", chr 969),
("&thetasym;", chr 977),
("&upsih;", chr 978),
("&piv;", chr 982),
("&ensp;", chr 8194),
("&emsp;", chr 8195),
("&thinsp;", chr 8201),
("&zwnj;", chr 8204),
("&zwj;", chr 8205),
("&lrm;", chr 8206),
("&rlm;", chr 8207),
("&ndash;", chr 8211),
("&mdash;", chr 8212),
("&lsquo;", chr 8216),
("&rsquo;", chr 8217),
("&sbquo;", chr 8218),
("&ldquo;", chr 8220),
("&rdquo;", chr 8221),
("&bdquo;", chr 8222),
("&dagger;", chr 8224),
("&Dagger;", chr 8225),
("&bull;", chr 8226),
("&hellip;", chr 8230),
("&permil;", chr 8240),
("&prime;", chr 8242),
("&Prime;", chr 8243),
("&lsaquo;", chr 8249),
("&rsaquo;", chr 8250),
("&oline;", chr 8254),
("&frasl;", chr 8260),
("&euro;", chr 8364),
("&image;", chr 8465),
("&weierp;", chr 8472),
("&real;", chr 8476),
("&trade;", chr 8482),
("&alefsym;", chr 8501),
("&larr;", chr 8592),
("&uarr;", chr 8593),
("&rarr;", chr 8594),
("&darr;", chr 8595),
("&harr;", chr 8596),
("&crarr;", chr 8629),
("&lArr;", chr 8656),
("&uArr;", chr 8657),
("&rArr;", chr 8658),
("&dArr;", chr 8659),
("&hArr;", chr 8660),
("&forall;", chr 8704),
("&part;", chr 8706),
("&exist;", chr 8707),
("&empty;", chr 8709),
("&nabla;", chr 8711),
("&isin;", chr 8712),
("&notin;", chr 8713),
("&ni;", chr 8715),
("&prod;", chr 8719),
("&sum;", chr 8721),
("&minus;", chr 8722),
("&lowast;", chr 8727),
("&radic;", chr 8730),
("&prop;", chr 8733),
("&infin;", chr 8734),
("&ang;", chr 8736),
("&and;", chr 8743),
("&or;", chr 8744),
("&cap;", chr 8745),
("&cup;", chr 8746),
("&int;", chr 8747),
("&there4;", chr 8756),
("&sim;", chr 8764),
("&cong;", chr 8773),
("&asymp;", chr 8776),
("&ne;", chr 8800),
("&equiv;", chr 8801),
("&le;", chr 8804),
("&ge;", chr 8805),
("&sub;", chr 8834),
("&sup;", chr 8835),
("&nsub;", chr 8836),
("&sube;", chr 8838),
("&supe;", chr 8839),
("&oplus;", chr 8853),
("&otimes;", chr 8855),
("&perp;", chr 8869),
("&sdot;", chr 8901),
("&lceil;", chr 8968),
("&rceil;", chr 8969),
("&lfloor;", chr 8970),
("&rfloor;", chr 8971),
("&lang;", chr 9001),
("&rang;", chr 9002),
("&loz;", chr 9674),
("&spades;", chr 9824),
("&clubs;", chr 9827),
("&hearts;", chr 9829),
("&diams;", chr 9830)
]

View file

@ -0,0 +1,434 @@
-- | Converts HTML to 'Pandoc' document.
module Text.Pandoc.Readers.HTML (
readHtml,
rawHtmlInline,
rawHtmlBlock,
anyHtmlBlockTag,
anyHtmlInlineTag
) where
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.HtmlEntities ( decodeEntities, htmlEntityToChar )
import Maybe ( fromMaybe )
import Char ( toUpper, toLower )
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ParserState -- ^ Parser state
-> String -- ^ String to parse
-> Pandoc
readHtml = readWith parseHtml
-- for testing
testString :: String -> IO ()
testString = testStringWith parseHtml
--
-- Constants
--
inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite",
"code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q",
"s", "samp", "select", "small", "span", "strike", "strong", "sub",
"sup", "textarea", "tt", "u", "var"]
--
-- HTML utility functions
--
-- | Read blocks until end tag.
blocksTilEnd tag = try (do
blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag)
return blocks)
-- | Read inlines until end tag.
inlinesTilEnd tag = try (do
inlines <- manyTill inline (htmlEndTag tag)
return inlines)
-- extract type from a tag: e.g. br from <br>, < br >, </br>, etc.
extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of
Just [match] -> (map toLower match)
Nothing -> ""
anyHtmlTag = try (do
char '<'
spaces
tag <- many1 alphaNum
attribs <- htmlAttributes
spaces
ender <- option "" (string "/")
let ender' = if (null ender) then "" else " /"
spaces
char '>'
return ("<" ++ tag ++ attribs ++ ender' ++ ">"))
anyHtmlEndTag = try (do
char '<'
spaces
char '/'
spaces
tagType <- many1 alphaNum
spaces
char '>'
return ("</" ++ tagType ++ ">"))
htmlTag :: String -> GenParser Char st (String, [(String, String)])
htmlTag tag = try (do
char '<'
spaces
stringAnyCase tag
attribs <- many htmlAttribute
spaces
option "" (string "/")
spaces
char '>'
return (tag, (map (\(name, content, raw) -> (name, content)) attribs)))
-- parses a quoted html attribute value
quoted quoteChar = do
result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar]))
return (result, [quoteChar])
htmlAttributes = do
attrList <- many htmlAttribute
return (concatMap (\(name, content, raw) -> raw) attrList)
htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
-- minimized boolean attribute (no = and value)
htmlMinimizedAttribute = try (do
spaces
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
notFollowedBy (char '=')
let content = name
return (name, content, (" " ++ name)))
htmlRegularAttribute = try (do
spaces
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
char '='
spaces
(content, quoteStr) <- choice [ (quoted '\''),
(quoted '"'),
(do{ a <- (many (alphaNum <|> (oneOf "-._:")));
return (a,"")} ) ]
return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
htmlEndTag tag = try (do
char '<'
spaces
char '/'
spaces
stringAnyCase tag
spaces
char '>'
return ("</" ++ tag ++ ">"))
-- | Returns @True@ if the tag is an inline tag.
isInline tag = (extractTagType tag) `elem` inlineHtmlTags
anyHtmlBlockTag = try (do
tag <- choice [anyHtmlTag, anyHtmlEndTag]
if isInline tag then
fail "inline tag"
else
return tag)
anyHtmlInlineTag = try (do
tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
if isInline tag then
return tag
else
fail "not an inline tag")
-- scripts must be treated differently, because they can contain <> etc.
htmlScript = try (do
open <- string "<script"
rest <- manyTill anyChar (htmlEndTag "script")
return (open ++ rest ++ "</script>"))
rawHtmlBlock = do
notFollowedBy (do {choice [htmlTag "/body", htmlTag "/html"]; return ' '})
body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition]
sp <- (many space)
state <- getState
if stateParseRaw state then
return (RawHtml (body ++ sp))
else
return Null
htmlComment = try (do
string "<!--"
comment <- manyTill anyChar (try (string "-->"))
return ("<!--" ++ comment ++ "-->"))
--
-- parsing documents
--
xmlDec = try (do
string "<?"
rest <- manyTill anyChar (char '>')
return ("<?" ++ rest ++ ">"))
definition = try (do
string "<!"
rest <- manyTill anyChar (char '>')
return ("<!" ++ rest ++ ">"))
nonTitleNonHead = try (do
notFollowedBy' (htmlTag "title")
notFollowedBy' (htmlTag "/head")
result <- choice [do {rawHtmlBlock; return ' '}, anyChar]
return result)
parseTitle = try (do
(tag, attribs) <- htmlTag "title"
contents <- inlinesTilEnd tag
spaces
return contents)
-- parse header and return meta-information (for now, just title)
parseHead = try (do
htmlTag "head"
spaces
skipMany nonTitleNonHead
contents <- option [] parseTitle
skipMany nonTitleNonHead
htmlTag "/head"
return (contents, [], ""))
skipHtmlTag tag = option ("",[]) (htmlTag tag)
-- h1 class="title" representation of title in body
bodyTitle = try (do
(tag, attribs) <- htmlTag "h1"
cl <- case (extractAttribute "class" attribs) of
Just "title" -> do {return ""}
otherwise -> fail "not title"
inlinesTilEnd "h1"
return "")
parseHtml = do
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
skipHtmlTag "html"
spaces
(title, authors, date) <- option ([], [], "") parseHead
spaces
skipHtmlTag "body"
spaces
option "" bodyTitle -- skip title in body, because it's represented in meta
blocks <- parseBlocks
spaces
option "" (htmlEndTag "body")
spaces
option "" (htmlEndTag "html")
many anyChar -- ignore anything after </html>
eof
state <- getState
let keyBlocks = stateKeyBlocks state
return (Pandoc (Meta title authors date) (blocks ++ (reverse keyBlocks)))
--
-- parsing blocks
--
parseBlocks = do
spaces
result <- sepEndBy block spaces
return result
block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
rawHtmlBlock ] <?> "block"
--
-- header blocks
--
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
headerLevel n = try (do
let level = "h" ++ show n
(tag, attribs) <- htmlTag level
contents <- inlinesTilEnd level
return (Header n (normalizeSpaces contents)))
--
-- hrule block
--
hrule = try (do
(tag, attribs) <- htmlTag "hr"
state <- getState
if (not (null attribs)) && (stateParseRaw state) then
unexpected "attributes in hr" -- in this case we want to parse it as raw html
else
return HorizontalRule)
--
-- code blocks
--
codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block"
preCodeBlock = try (do
htmlTag "pre"
spaces
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
spaces
htmlEndTag "pre"
return (CodeBlock (decodeEntities result)))
bareCodeBlock = try (do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
return (CodeBlock (decodeEntities result)))
--
-- block quotes
--
blockQuote = try (do
tag <- htmlTag "blockquote"
spaces
blocks <- blocksTilEnd "blockquote"
return (BlockQuote blocks))
--
-- list blocks
--
list = choice [ bulletList, orderedList ] <?> "list"
orderedList = try (do
tag <- htmlTag "ol"
spaces
items <- sepEndBy1 listItem spaces
htmlEndTag "ol"
return (OrderedList items))
bulletList = try (do
tag <- htmlTag "ul"
spaces
items <- sepEndBy1 listItem spaces
htmlEndTag "ul"
return (BulletList items))
listItem = try (do
tag <- htmlTag "li"
spaces
blocks <- blocksTilEnd "li"
return blocks)
--
-- paragraph block
--
para = try (do
tag <- htmlTag "p"
result <- inlinesTilEnd "p"
return (Para (normalizeSpaces result)))
--
-- plain block
--
plain = do
result <- many1 inline
return (Plain (normalizeSpaces result))
--
-- inline
--
inline = choice [ text, special ] <?> "inline"
text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "text"
special = choice [ link, image, rawHtmlInline ] <?> "link, inline html, or image"
entity = try (do
char '&'
body <- choice [(many1 letter),
(try (do{ char '#'; num <- many1 digit; return ("#" ++ num)}))]
char ';'
return (Str [fromMaybe '?' (htmlEntityToChar ("&" ++ body ++ ";"))]))
code = try (do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
-- remove internal line breaks, leading and trailing space, and decode entities
let result' = decodeEntities $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
return (Code result'))
rawHtmlInline = do
result <- choice [htmlScript, anyHtmlInlineTag]
state <- getState
if stateParseRaw state then
return (HtmlInline result)
else
return (Str "")
betweenTags tag = try (do
htmlTag tag
result <- inlinesTilEnd tag
return (normalizeSpaces result))
emph = try (do
result <- choice [betweenTags "em", betweenTags "it"]
return (Emph result))
strong = try (do
result <- choice [betweenTags "b", betweenTags "strong"]
return (Strong result))
whitespace = do
many1 space
return Space
-- hard line break
linebreak = do
htmlTag "br"
return LineBreak
str = do
result <- many1 (noneOf "<& \t\n")
return (Str (decodeEntities result))
--
-- links and images
--
-- extract contents of attribute (attribute names are case-insensitive)
extractAttribute name [] = Nothing
extractAttribute name ((attrName, contents):rest) =
let name' = map toLower name
attrName' = map toLower attrName in
if (attrName' == name') then Just contents else extractAttribute name rest
link = try (do
(tag, attributes) <- htmlTag "a"
url <- case (extractAttribute "href" attributes) of
Just url -> do {return url}
Nothing -> fail "no href"
let title = fromMaybe "" (extractAttribute "title" attributes)
label <- inlinesTilEnd "a"
ref <- generateReference url title
return (Link (normalizeSpaces label) ref))
image = try (do
(tag, attributes) <- htmlTag "img"
url <- case (extractAttribute "src" attributes) of
Just url -> do {return url}
Nothing -> fail "no src"
let title = fromMaybe "" (extractAttribute "title" attributes)
let alt = fromMaybe "" (extractAttribute "alt" attributes)
ref <- generateReference url title
return (Image [Str alt] ref))

View file

@ -0,0 +1,585 @@
-- | Converts LaTeX to 'Pandoc' document.
module Text.Pandoc.Readers.LaTeX (
readLaTeX,
rawLaTeXInline,
rawLaTeXEnvironment
) where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Maybe ( fromMaybe )
import Char ( chr )
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> String -- ^ String to parse
-> Pandoc
readLaTeX = readWith parseLaTeX
-- for testing
testString = testStringWith parseLaTeX
-- characters with special meaning
specialChars = "\\$%&^&_~#{}\n \t|<>"
--
-- utility functions
--
-- | Change quotation marks in a string back to "basic" quotes.
normalizeQuotes :: String -> String
normalizeQuotes = gsub "''" "\"" . gsub "`" "'"
-- | Change LaTeX En dashes between digits to hyphens.
normalizeDashes :: String -> String
normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2"
normalizePunctuation :: String -> String
normalizePunctuation = normalizeDashes . normalizeQuotes
-- | Returns command option (between []) if any, or empty string.
commandOpt = option "" (between (char '[') (char ']') (many1 (noneOf "]")))
-- | Returns text between brackets and its matching pair.
bracketedText = try (do
char '{'
result <- many (choice [ try (do{ char '\\';
b <- oneOf "{}";
return (['\\', b])}), -- escaped bracket
count 1 (noneOf "{}"),
do {text <- bracketedText; return ("{" ++ text ++ "}")} ])
char '}'
return (concat result))
-- | Parses list of arguments of LaTeX command.
commandArgs = many bracketedText
-- | Parses LaTeX command, returns (name, star, option, list of arguments).
command = try (do
char '\\'
name <- many1 alphaNum
star <- option "" (string "*") -- some commands have starred versions
opt <- commandOpt
args <- commandArgs
return (name, star, opt, args))
begin name = try (do
string "\\begin{"
string name
char '}'
option "" commandOpt
option [] commandArgs
spaces
return name)
end name = try (do
string "\\end{"
string name
char '}'
spaces
return name)
-- | Returns a list of block elements containing the contents of an environment.
environment name = try (do
begin name
spaces
contents <- manyTill block (end name)
return contents)
anyEnvironment = try (do
string "\\begin{"
name <- many alphaNum
star <- option "" (string "*") -- some environments have starred variants
char '}'
option "" commandOpt
option [] commandArgs
spaces
contents <- manyTill block (end (name ++ star))
return (BlockQuote contents))
--
-- parsing documents
--
-- | Skip everything up through \begin{document}
skipLaTeXHeader = try (do
manyTill anyChar (begin "document")
spaces
return "")
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
option "" skipLaTeXHeader -- if parsing a fragment, this might not be present
blocks <- parseBlocks
spaces
option "" (string "\\end{document}") -- if parsing a fragment, this might not be present
spaces
eof
state <- getState
let keyBlocks = stateKeyBlocks state
let noteBlocks = stateNoteBlocks state
let blocks' = filter (/= Null) blocks
return (Pandoc (Meta [] [] "") (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
--
-- parsing blocks
--
parseBlocks = do
spaces
result <- many block
return result
block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, comment,
bibliographic, para, specialEnvironment, itemBlock, unknownEnvironment,
unknownCommand ] <?> "block"
--
-- header blocks
--
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
headerLevel n = try (do
let subs = concat $ replicate (n - 1) "sub"
string ("\\" ++ subs ++ "section")
option ' ' (char '*')
char '{'
title <- manyTill inline (char '}')
spaces
return (Header n (normalizeSpaces title)))
--
-- hrule block
--
hrule = try (do
oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ]
spaces
return HorizontalRule)
--
-- code blocks
--
codeBlock = try (do
string "\\begin{verbatim}" -- don't use begin function because it gobbles whitespace
option "" blanklines -- we want to gobble blank lines, but not leading space
contents <- manyTill anyChar (try (string "\\end{verbatim}"))
spaces
return (CodeBlock (stripTrailingNewlines contents)))
--
-- block quotes
--
blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote"
blockQuote1 = try (do
blocks <- environment "quote"
spaces
return (BlockQuote blocks))
blockQuote2 = try (do
blocks <- environment "quotation"
spaces
return (BlockQuote blocks))
--
-- math block
--
mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
mathBlockWith (begin "displaymath") (end "displaymath") <|>
mathBlockWith (string "\\[") (string "\\]") <?> "math block"
mathBlockWith start end = try (do
start
spaces
result <- manyTill anyChar end
spaces
return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]]))
--
-- list blocks
--
list = bulletList <|> orderedList <?> "list"
listItem = try (do
("item", _, _, _) <- command
spaces
state <- getState
let oldParserContext = stateParserContext state
updateState (\state -> state {stateParserContext = ListItemState})
blocks <- many block
updateState (\state -> state {stateParserContext = oldParserContext})
return blocks)
orderedList = try (do
begin "enumerate"
spaces
items <- many listItem
end "enumerate"
spaces
return (OrderedList items))
bulletList = try (do
begin "itemize"
spaces
items <- many listItem
end "itemize"
spaces
return (BulletList items))
--
-- paragraph block
--
para = try (do
result <- many1 inline
spaces
return (Para (normalizeSpaces result)))
--
-- title authors date
--
bibliographic = choice [ maketitle, title, authors, date ]
maketitle = try (do
string "\\maketitle"
spaces
return Null)
title = try (do
string "\\title{"
tit <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateTitle = tit })
return Null)
authors = try (do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
return Null)
date = try (do
string "\\date{"
date' <- manyTill anyChar (char '}')
spaces
updateState (\state -> state { stateDate = date' })
return Null)
--
-- item block
-- for use in unknown environments that aren't being parsed as raw latex
--
-- this forces items to be parsed in different blocks
itemBlock = try (do
("item", _, opt, _) <- command
state <- getState
if (stateParserContext state == ListItemState) then
fail "item should be handled by list block"
else
if null opt then
return Null
else
return (Plain [Str opt]))
--
-- raw LaTeX
--
specialEnvironment = do -- these are always parsed as raw
followedBy' (choice (map (\name -> begin name) ["tabular", "figure", "tabbing", "eqnarry",
"picture", "table", "verse", "theorem"]))
rawLaTeXEnvironment
-- | Parse any LaTeX environment and return a Para block containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment :: GenParser Char st Block
rawLaTeXEnvironment = try (do
string "\\begin"
char '{'
name <- many1 alphaNum
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
opt <- option "" commandOpt
args <- option [] commandArgs
let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
contents <- manyTill (choice [(many1 (noneOf "\\")),
(do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }),
string "\\"]) (end name')
spaces
return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ optStr ++ argStr ++
(concat contents) ++ "\\end{" ++ name' ++ "}")]))
unknownEnvironment = try (do
state <- getState
result <- if stateParseRaw state then -- check to see whether we should include raw TeX
rawLaTeXEnvironment -- if so, get the whole raw environment
else
anyEnvironment -- otherwise just the contents
return result)
unknownCommand = try (do
notFollowedBy' (string "\\end{itemize}")
notFollowedBy' (string "\\end{enumerate}")
notFollowedBy' (string "\\end{document}")
(name, star, opt, args) <- command
spaces
let optStr = if null opt then "" else "[" ++ opt ++ "]"
let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
state <- getState
if (name == "item") && ((stateParserContext state) == ListItemState) then
fail "should not be parsed as raw"
else
string ""
if stateParseRaw state then
return (Plain [TeX ("\\" ++ name ++ star ++ optStr ++ argStr)])
else
return (Plain [Str (joinWithSep " " args)]))
-- latex comment
comment = try (do
char '%'
result <- manyTill anyChar newline
spaces
return Null)
--
-- inline
--
inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, accentedChar,
specialChar, specialInline, escapedChar, unescapedChar, str,
endline, whitespace ] <?> "inline"
specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?>
"link, raw TeX, note, or image"
ldots = try (do
string "\\ldots"
return (Str "..."))
accentedChar = normalAccentedChar <|> specialAccentedChar
normalAccentedChar = try (do
char '\\'
accent <- oneOf "'`^\"~"
character <- choice [ between (char '{') (char '}') anyChar, anyChar ]
let table = fromMaybe [] $ lookup character accentTable
let result = case lookup accent table of
Just num -> chr num
Nothing -> '?'
return (Str [result]))
-- an association list of letters and association list of accents
-- and decimal character numbers.
accentTable =
[ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
('N', [('~', 209)]),
('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
('n', [('~', 241)]),
('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, oslash, pound,
euro, copyright, sect ]
ccedil = try (do
char '\\'
letter <- choice [try (string "cc"), try (string "cC")]
let num = if letter == "cc" then 231 else 199
return (Str [chr num]))
aring = try (do
char '\\'
letter <- choice [try (string "aa"), try (string "AA")]
let num = if letter == "aa" then 229 else 197
return (Str [chr num]))
iuml = try (do
string "\\\""
choice [try (string "\\i"), try (string "{\\i}")]
return (Str [chr 239]))
icirc = try (do
string "\\^"
choice [try (string "\\i"), try (string "{\\i}")]
return (Str [chr 238]))
szlig = try (do
string "\\ss"
return (Str [chr 223]))
oslash = try (do
char '\\'
letter <- choice [char 'o', char 'O']
let num = if letter == 'o' then 248 else 216
return (Str [chr num]))
aelig = try (do
char '\\'
letter <- choice [try (string "ae"), try (string "AE")]
let num = if letter == "ae" then 230 else 198
return (Str [chr num]))
pound = try (do
string "\\pounds"
return (Str [chr 163]))
euro = try (do
string "\\euro"
return (Str [chr 8364]))
copyright = try (do
string "\\copyright"
return (Str [chr 169]))
sect = try (do
string "\\S"
return (Str [chr 167]))
escapedChar = escaped (oneOf " $%^&_#{}")
unescapedChar = do -- ignore standalone, nonescaped special characters
oneOf "$^&_#{}|<>"
return (Str "")
specialChar = choice [ backslash, bar, lt, gt ]
backslash = try (do
string "\\textbackslash"
return (Str "\\"))
bar = try (do
string "\\textbar"
return (Str "\\"))
lt = try (do
string "\\textless"
return (Str "<"))
gt = try (do
string "\\textgreater"
return (Str ">"))
code = try (do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
let result' = removeLeadingTrailingSpace result
return (Code result'))
emph = try (do
oneOfStrings [ "\\emph{", "\\textit{" ]
result <- manyTill inline (char '}')
return (Emph result))
lab = try (do
string "\\label{"
result <- manyTill anyChar (char '}')
return (Str ("(" ++ result ++ ")")))
ref = try (do
string "\\ref{"
result <- manyTill anyChar (char '}')
return (Str (result)))
strong = try (do
string "\\textbf{"
result <- manyTill inline (char '}')
return (Strong result))
whitespace = do
many1 (oneOf "~ \t")
return Space
-- hard line break
linebreak = try (do
string "\\\\"
return LineBreak)
str = do
result <- many1 (noneOf specialChars)
return (Str (normalizePunctuation result))
-- endline internal to paragraph
endline = try (do
newline
notFollowedBy blankline
return Space)
-- math
math = math1 <|> math2 <?> "math"
math1 = try (do
char '$'
result <- many (noneOf "$")
char '$'
return (TeX ("$" ++ result ++ "$")))
math2 = try (do
string "\\("
result <- many (noneOf "$")
string "\\)"
return (TeX ("$" ++ result ++ "$")))
--
-- links and images
--
link = try (do
string "\\href{"
url <- manyTill anyChar (char '}')
char '{'
label <- manyTill inline (char '}')
ref <- generateReference url ""
return (Link (normalizeSpaces label) ref))
image = try (do
("includegraphics", _, _, (src:lst)) <- command
return (Image [Str "image"] (Src src "")))
footnote = try (do
("footnote", _, _, (contents:[])) <- command
let blocks = case runParser parseBlocks defaultParserState "footnote" contents of
Left err -> error $ "Input:\n" ++ show contents ++
"\nError:\n" ++ show err
Right result -> result
state <- getState
let notes = stateNoteBlocks state
let nextRef = case notes of
[] -> "1"
(Note ref body):rest -> (show ((read ref) + 1))
setState (state { stateNoteBlocks = (Note nextRef blocks):notes })
return (NoteRef nextRef))
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try (do
(name, star, opt, args) <- command
let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
let argStr = concatMap (\arg -> "{" ++ arg ++ "}") args
state <- getState
if ((name == "begin") || (name == "end") || (name == "item")) then
fail "not an inline command"
else
string ""
return (TeX ("\\" ++ name ++ star ++ optStr ++ argStr)))

View file

@ -0,0 +1,582 @@
-- | Convert markdown to Pandoc document.
module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Shared
import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, anyHtmlBlockTag,
anyHtmlInlineTag )
import Text.Pandoc.HtmlEntities ( decodeEntities )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -> String -> Pandoc
readMarkdown = readWith parseMarkdown
-- | Parse markdown string with default options and print result (for testing).
testString :: String -> IO ()
testString = testStringWith parseMarkdown
--
-- Constants and data structure definitions
--
spaceChars = " \t"
endLineChars = "\n"
labelStart = '['
labelEnd = ']'
labelSep = ':'
srcStart = '('
srcEnd = ')'
imageStart = '!'
noteStart = '^'
codeStart = '`'
codeEnd = '`'
emphStart = '*'
emphEnd = '*'
emphStartAlt = '_'
emphEndAlt = '_'
autoLinkStart = '<'
autoLinkEnd = '>'
mathStart = '$'
mathEnd = '$'
bulletListMarkers = "*+-"
orderedListDelimiters = "."
escapeChar = '\\'
hruleChars = "*-_"
quoteChars = "'\""
atxHChar = '#'
titleOpeners = "\"'("
setextHChars = ['=','-']
blockQuoteChar = '>'
hyphenChar = '-'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt,
emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart,
mathEnd, imageStart, noteStart, hyphenChar]
--
-- auxiliary functions
--
-- | Skip a single endline if there is one.
skipEndline = option Space endline
indentSpaces = do
state <- getState
let tabStop = stateTabStop state
oneOfStrings [ "\t", (replicate tabStop ' ') ] <?> "indentation"
skipNonindentSpaces = do
state <- getState
let tabStop = stateTabStop state
choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
--
-- document structure
--
titleLine = try (do
char '%'
skipSpaces
line <- manyTill inline newline
return line)
authorsLine = try (do
char '%'
skipSpaces
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
newline
return (map removeLeadingTrailingSpace authors))
dateLine = try (do
char '%'
skipSpaces
date <- many (noneOf "\n")
newline
return (removeTrailingSpace date))
titleBlock = try (do
title <- option [] titleLine
author <- option [] authorsLine
date <- option "" dateLine
option "" blanklines
return (title, author, date))
parseMarkdown = do
updateState (\state -> state { stateParseRaw = True }) -- need to parse raw HTML
(title, author, date) <- option ([],[],"") titleBlock
blocks <- parseBlocks
state <- getState
let keys = reverse $ stateKeyBlocks state
return (Pandoc (Meta title author date) (blocks ++ keys))
--
-- parsing blocks
--
parseBlocks = do
result <- manyTill block eof
return result
block = choice [ codeBlock, referenceKey, note, header, hrule, list, blockQuote, rawHtmlBlocks,
rawLaTeXEnvironment, para, plain, blankBlock, nullBlock ] <?> "block"
--
-- header blocks
--
header = choice [ setextHeader, atxHeader ] <?> "header"
atxHeader = try (do
lead <- many1 (char atxHChar)
skipSpaces
txt <- many1 (do {notFollowedBy' atxClosing; inline})
atxClosing
return (Header (length lead) (normalizeSpaces txt)))
atxClosing = try (do
skipMany (char atxHChar)
skipSpaces
newline
option "" blanklines)
setextHeader = choice (map (\x -> setextH x) (enumFromTo 1 (length setextHChars)))
setextH n = try (do
txt <- many1 (do {notFollowedBy newline; inline})
endline
many1 (char (setextHChars !! (n-1)))
skipSpaces
newline
option "" blanklines
return (Header n (normalizeSpaces txt)))
--
-- hrule block
--
hruleWith chr =
try (do
skipSpaces
char chr
skipSpaces
char chr
skipSpaces
char chr
skipMany (oneOf (chr:spaceChars))
newline
option "" blanklines
return HorizontalRule)
hrule = choice (map hruleWith hruleChars) <?> "hrule"
--
-- code blocks
--
indentedLine = try (do
indentSpaces
result <- manyTill anyChar newline
return (result ++ "\n"))
-- two or more indented lines, possibly separated by blank lines
indentedBlock = try (do
res1 <- indentedLine
blanks <- many blankline
res2 <- choice [indentedBlock, indentedLine]
return (res1 ++ blanks ++ res2))
codeBlock = do
result <- choice [indentedBlock, indentedLine]
option "" blanklines
return (CodeBlock result)
--
-- note block
--
note = try (do
(NoteRef ref) <- noteRef
skipSpaces
raw <- sepBy (many (choice [nonEndline,
(try (do {endline; notFollowedBy (char noteStart); return '\n'}))
])) (try (do {newline; char noteStart; option ' ' (char ' ')}))
newline
blanklines
-- parse the extracted block, which may contain various block elements:
state <- getState
let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of
Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err
Right result -> result
return (Note ref parsed))
--
-- block quotes
--
emacsBoxQuote = try (do
string ",----"
manyTill anyChar newline
raw <- manyTill (try (do{ char '|';
option ' ' (char ' ');
result <- manyTill anyChar newline;
return result}))
(string "`----")
manyTill anyChar newline
option "" blanklines
return raw)
emailBlockQuoteStart = try (do
skipNonindentSpaces
char blockQuoteChar
option ' ' (char ' ')
return "> ")
emailBlockQuote = try (do
emailBlockQuoteStart
raw <- sepBy (many (choice [nonEndline,
(try (do{ endline;
notFollowedBy' emailBlockQuoteStart;
return '\n'}))]))
(try (do {newline; emailBlockQuoteStart}))
newline <|> (do{ eof; return '\n'})
option "" blanklines
return raw)
blockQuote = do
raw <- choice [ emailBlockQuote, emacsBoxQuote ]
-- parse the extracted block, which may contain various block elements:
state <- getState
let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of
Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err
Right result -> result
return (BlockQuote parsed)
--
-- list blocks
--
list = choice [ bulletList, orderedList ] <?> "list"
bulletListStart =
try (do
option ' ' newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
oneOf bulletListMarkers
spaceChar
skipSpaces)
orderedListStart =
try (do
option ' ' newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
many1 digit
oneOf orderedListDelimiters
oneOf spaceChars
skipSpaces)
-- parse a line of a list item (start = parser for beginning of list item)
listLine start = try (do
notFollowedBy' start
notFollowedBy blankline
notFollowedBy' (try (do{ indentSpaces;
many (spaceChar);
choice [bulletListStart, orderedListStart]}))
line <- manyTill anyChar newline
return (line ++ "\n"))
-- parse raw text for one list item, excluding start marker and continuations
rawListItem start =
try (do
start
result <- many1 (listLine start)
blanks <- many blankline
return ((concat result) ++ blanks))
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
listContinuation start =
try (do
followedBy' indentSpaces
result <- many1 (listContinuationLine start)
blanks <- many blankline
return ((concat result) ++ blanks))
listContinuationLine start = try (do
notFollowedBy blankline
notFollowedBy' start
option "" indentSpaces
result <- manyTill anyChar newline
return (result ++ "\n"))
listItem start =
try (do
first <- rawListItem start
rest <- many (listContinuation start)
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
state <- getState
let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState})
"block" raw of
Left err -> error $ "Raw block:\n" ++ raw ++ "\nError:\n" ++ show err
Right result -> result
where raw = concat (first:rest)
return parsed)
orderedList =
try (do
items <- many1 (listItem orderedListStart)
let items' = compactify items
return (OrderedList items'))
bulletList =
try (do
items <- many1 (listItem bulletListStart)
let items' = compactify items
return (BulletList items'))
--
-- paragraph block
--
para = try (do
result <- many1 inline
newline
choice [ (do{ followedBy' (oneOfStrings [">", ",----"]); return "" }), blanklines ]
let result' = normalizeSpaces result
return (Para result'))
plain = do
result <- many1 inline
let result' = normalizeSpaces result
return (Plain result')
--
-- raw html
--
rawHtmlBlocks = try (do
htmlBlocks <- many1 rawHtmlBlock
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
let combined' = if (last combined == '\n') then
init combined -- strip extra newline
else
combined
return (RawHtml combined'))
--
-- reference key
--
referenceKey =
try (do
skipSpaces
label <- reference
char labelSep
skipSpaces
option ' ' (char autoLinkStart)
src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
option ' ' (char autoLinkEnd)
tit <- option "" title
blanklines
return (Key label (Src (removeTrailingSpace src) tit)))
--
-- inline
--
text = choice [ math, strong, emph, code2, code1, str, linebreak, tabchar,
whitespace, endline ] <?> "text"
inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, ltSign, symbol ] <?> "inline"
special = choice [ link, referenceLink, rawHtmlInline, autoLink,
image, noteRef ] <?> "link, inline html, note, or image"
escapedChar = escaped anyChar
ltSign = do
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
return (Str ['<'])
specialCharsMinusLt = filter (/= '<') specialChars
symbol = do
result <- oneOf specialCharsMinusLt
return (Str [result])
hyphens = try (do
result <- many1 (char '-')
if (length result) == 1 then
skipEndline -- don't want to treat endline after hyphen as a space
else
do{ string ""; return Space }
return (Str result))
-- parses inline code, between codeStart and codeEnd
code1 =
try (do
char codeStart
result <- many (noneOf [codeEnd])
char codeEnd
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines
return (Code result'))
-- parses inline code, between 2 codeStarts and 2 codeEnds
code2 =
try (do
string [codeStart, codeStart]
result <- manyTill anyChar (try (string [codeEnd, codeEnd]))
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines
return (Code result'))
mathWord = many1 (choice [(noneOf (" \t\n\\" ++ [mathEnd])), (try (do {c <- char '\\'; notFollowedBy (char mathEnd); return c}))])
math = try (do
char mathStart
notFollowedBy space
words <- sepBy1 mathWord (many1 space)
char mathEnd
return (TeX ("$" ++ (joinWithSep " " words) ++ "$")))
emph = do
result <- choice [ (enclosed (char emphStart) (char emphEnd) inline),
(enclosed (char emphStartAlt) (char emphEndAlt) inline) ]
return (Emph (normalizeSpaces result))
strong = do
result <- choice [ (enclosed (count 2 (char emphStart)) (count 2 (char emphEnd)) inline),
(enclosed (count 2 (char emphStartAlt)) (count 2 (char emphEndAlt)) inline)]
return (Strong (normalizeSpaces result))
whitespace = do
many1 (oneOf spaceChars) <?> "whitespace"
return Space
tabchar = do
tab
return (Str "\t")
-- hard line break
linebreak = try (do
oneOf spaceChars
many1 (oneOf spaceChars)
endline
return LineBreak )
nonEndline = noneOf endLineChars
str = do
result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars)))
return (Str (decodeEntities result))
-- an endline character that can be treated as a space, not a structural break
endline =
try (do
newline
-- next line would allow block quotes without preceding blank line
-- Markdown.pl does allow this, but there's a chance of a wrapped
-- greater-than sign triggering a block quote by accident...
-- notFollowedBy (try (do { choice [emailBlockQuoteStart, string ",----"]; return ' ' }))
notFollowedBy blankline
-- parse potential list starts at beginning of line differently if in a list:
st <- getState
if (stateParserContext st) == ListItemState then
do
notFollowedBy' orderedListStart
notFollowedBy' bulletListStart
else
option () pzero
return Space)
--
-- links
--
-- a reference label for a link
reference = do
char labelStart
label <- manyTill inline (char labelEnd)
return (normalizeSpaces label)
-- source for a link, with optional title
source =
try (do
char srcStart
option ' ' (char autoLinkStart)
src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners))
option ' ' (char autoLinkEnd)
tit <- option "" title
skipSpaces
char srcEnd
return (Src (removeTrailingSpace src) tit))
titleWith startChar endChar =
try (do
skipSpaces
skipEndline -- a title can be on the next line from the source
skipSpaces
char startChar
tit <- manyTill (choice [ try (do {char '\\'; char endChar}),
(noneOf (endChar:endLineChars)) ]) (char endChar)
let tit' = gsub "\"" "&quot;" tit
return tit')
title = choice [titleWith '(' ')', titleWith '"' '"', titleWith '\'' '\''] <?> "title"
link = choice [explicitLink, referenceLink] <?> "link"
explicitLink =
try (do
label <- reference
src <- source
return (Link label src))
referenceLink = choice [referenceLinkDouble, referenceLinkSingle]
referenceLinkDouble = -- a link like [this][/url/]
try (do
label <- reference
skipSpaces
skipEndline
skipSpaces
ref <- reference
return (Link label (Ref ref)))
referenceLinkSingle = -- a link like [this]
try (do
label <- reference
return (Link label (Ref [])))
autoLink = -- a link <like.this.com>
try (do
notFollowedBy (do {anyHtmlBlockTag; return ' '})
src <- between (char autoLinkStart) (char autoLinkEnd)
(many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
case (matchRegex emailAddress src) of
Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) ""))
Nothing -> return (Link [Str src] (Src src "")))
emailAddress = mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace
image =
try (do
char imageStart
(Link label src) <- link
return (Image label src))
noteRef = try (do
char noteStart
ref <- between (char '(') (char ')') (many1 (noneOf " \t\n)"))
return (NoteRef ref))

View file

@ -0,0 +1,644 @@
-- | Parse reStructuredText and return Pandoc document.
module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Shared
import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
import List ( findIndex )
import Char ( toUpper )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
readRST = readWith parseRST
-- | Parse a string and print result (for testing).
testString :: String -> IO ()
testString = testStringWith parseRST
--
-- Constants and data structure definitions
---
bulletListMarkers = "*+-"
underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
specialChars = "\\`|*_<>$:[-"
--
-- parsing documents
--
isAnonKeyBlock block = case block of
(Key [Str "_"] str) -> True
otherwise -> False
isNotAnonKeyBlock block = not (isAnonKeyBlock block)
isHeader1 :: Block -> Bool
isHeader1 (Header 1 _) = True
isHeader1 _ = False
isHeader2 :: Block -> Bool
isHeader2 (Header 2 _) = True
isHeader2 _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
promoteHeaders :: Int -> [Block] -> [Block]
promoteHeaders num ((Header level text):rest) =
(Header (level - num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders num [] = []
-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
-- promote all the other headers.
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title and subtitle
if (any isHeader1 rest) || (any isHeader2 rest) then
((Header 1 head1):(Header 2 head2):rest, [])
else
((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
if (any isHeader1 rest) then
((Header 1 head1):rest, [])
else
((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
parseRST = do
state <- getState
input <- getInput
blocks <- parseBlocks -- first pass
let anonymousKeys = filter isAnonKeyBlock blocks
let blocks' = if (null anonymousKeys) then
blocks
else -- run parser again to fill in anonymous links...
case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys })
"RST source, second pass" input of
Left err -> error $ "\nError:\n" ++ show err
Right result -> (filter isNotAnonKeyBlock result)
let (blocks'', title) = if stateStandalone state then
titleTransform blocks'
else
(blocks', [])
state <- getState
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
return (Pandoc (Meta title' authors date) blocks'')
--
-- parsing blocks
--
parseBlocks = do
result <- manyTill block eof
return result
block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey,
imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock,
para, plain, blankBlock, nullBlock ] <?> "block"
--
-- field list
--
fieldListItem = try (do
char ':'
name <- many1 alphaNum
string ": "
skipSpaces
first <- manyTill anyChar newline
rest <- many (do{ notFollowedBy (char ':');
notFollowedBy blankline;
skipSpaces;
manyTill anyChar newline })
return (name, (joinWithSep " " (first:rest))))
fieldList = try (do
items <- many1 fieldListItem
blanklines
let authors = case (lookup "Authors" items) of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
let date = case (lookup "Date" items) of
Just dat -> dat
Nothing -> ""
let title = case (lookup "Title" items) of
Just tit -> [Str tit]
Nothing -> []
let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") &&
(x /= "Title")) items
let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining
updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title })
return (BlockQuote result))
--
-- line block
--
lineBlockLine = try (do
string "| "
white <- many (oneOf " \t")
line <- manyTill inline newline
let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
return line')
lineBlock = try (do
lines <- many1 lineBlockLine
blanklines
return $ Para (concat lines))
--
-- paragraph block
--
para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
codeBlockStart = try (do
string "::"
blankline
blankline)
-- paragraph that ends in a :: starting a code block
paraBeforeCodeBlock = try (do
result <- many1 (do {notFollowedBy' codeBlockStart; inline})
followedBy' (string "::")
return (Para (if (last result == Space) then
normalizeSpaces result
else
(normalizeSpaces result) ++ [Str ":"])))
-- regular paragraph
paraNormal = try (do
result <- many1 inline
newline
blanklines
let result' = normalizeSpaces result
return (Para result'))
plain = do
result <- many1 inline
let result' = normalizeSpaces result
return (Plain result')
--
-- image block
--
imageBlock = try (do
string ".. image:: "
src <- manyTill anyChar newline
return (Plain [Image [Str "image"] (Src src "")]))
--
-- header blocks
--
header = choice [ doubleHeader, singleHeader ] <?> "header"
-- a header with lines on top and bottom
doubleHeader = try (do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
let lenTop = length (c:rest)
skipSpaces
newline
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
let len = (sourceColumn pos) - 1
if (len > lenTop) then fail "title longer than border" else (do {return ()})
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
-- check to see if we've had this kind of header before.
-- if so, get appropriate level. if not, add to list.
state <- getState
let headerTable = stateHeaderTable state
let (headerTable', level) = case findIndex (== DoubleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return (Header level (normalizeSpaces txt)))
-- a header with line on the bottom only
singleHeader = try (do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
let len = (sourceColumn pos) - 1
blankline
c <- oneOf underlineChars
rest <- count (len - 1) (char c)
many (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable', level) = case findIndex (== SingleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return (Header level (normalizeSpaces txt)))
--
-- hrule block
--
hruleWith chr =
try (do
count 4 (char chr)
skipMany (char chr)
skipSpaces
newline
blanklines
return HorizontalRule)
hrule = choice (map hruleWith underlineChars) <?> "hrule"
--
-- code blocks
--
-- read a line indented by a given string
indentedLine indents = try (do
string indents
result <- manyTill anyChar newline
return (result ++ "\n"))
-- two or more indented lines, possibly separated by blank lines
-- if variable = True, then any indent will work, but it must be consistent through the block
-- if variable = False, indent should be one tab or equivalent in spaces
indentedBlock variable = try (do
state <- getState
let tabStop = stateTabStop state
indents <- if variable then
many1 (oneOf " \t")
else
oneOfStrings ["\t", (replicate tabStop ' ')]
firstline <- manyTill anyChar newline
rest <- many (choice [ indentedLine indents,
try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})])
option "" blanklines
return (firstline ++ "\n" ++ (concat rest)))
codeBlock = try (do
codeBlockStart
result <- indentedBlock False -- the False means we want one tab stop indent on each line
return (CodeBlock result))
--
-- raw html
--
rawHtmlBlock = try (do
string ".. raw:: html"
blanklines
result <- indentedBlock True
return (RawHtml result))
--
-- raw latex
--
rawLaTeXBlock = try (do
string ".. raw:: latex"
blanklines
result <- indentedBlock True
return (Para [(TeX result)]))
--
-- block quotes
--
blockQuote = try (do
block <- indentedBlock True
-- parse the extracted block, which may contain various block elements:
state <- getState
let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState})
"block" (block ++ "\n\n") of
Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err
Right result -> result
return (BlockQuote parsed))
--
-- list blocks
--
list = choice [ bulletList, orderedList ] <?> "list"
-- parses bullet list start and returns its length (inc. following whitespace)
bulletListStart =
try (do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
let len = length (marker:white)
return len)
withPeriodSuffix parser = try (do
a <- parser
b <- char '.'
return (a ++ [b]))
withParentheses parser = try (do
a <- char '('
b <- parser
c <- char ')'
return ([a] ++ b ++ [c]))
withRightParen parser = try (do
a <- parser
b <- char ')'
return (a ++ [b]))
upcaseWord = map toUpper
romanNumeral = do
let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ]
let upperNumerals = map upcaseWord lowerNumerals
result <- choice $ map string (lowerNumerals ++ upperNumerals)
return result
orderedListEnumerator = choice [ many1 digit,
string "#",
count 1 letter,
romanNumeral ]
-- parses ordered list start and returns its length (inc. following whitespace)
orderedListStart =
try (do
marker <- choice [ withPeriodSuffix orderedListEnumerator,
withParentheses orderedListEnumerator,
withRightParen orderedListEnumerator ]
white <- many1 spaceChar
let len = length (marker ++ white)
return len)
-- parse a line of a list item
listLine markerLength = try (do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
return (line ++ "\n"))
-- indent by specified number of spaces (or equiv. tabs)
indentWith num = do
state <- getState
let tabStop = stateTabStop state
if (num < tabStop) then
count num (char ' ')
else
choice [ try (count num (char ' ')),
(try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
-- parse raw text for one list item, excluding start marker and continuations
rawListItem start =
try (do
markerLength <- start
firstLine <- manyTill anyChar newline
restLines <- many (listLine markerLength)
return (markerLength, (firstLine ++ "\n" ++ (concat restLines))))
-- continuation of a list item - indented and separated by blankline or (in compact lists)
-- endline. Note: nested lists are parsed as continuations.
listContinuation markerLength =
try (do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return (blanks ++ (concat result)))
listItem start =
try (do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}),
many1 blankline ] -- whole list must end with blank
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
state <- getState
let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState})
"list item" raw of
Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err
Right result -> result
where raw = concat (first:rest) ++ blanks
return parsed)
orderedList =
try (do
items <- many1 (listItem orderedListStart)
let items' = compactify items
return (OrderedList items'))
bulletList =
try (do
items <- many1 (listItem bulletListStart)
let items' = compactify items
return (BulletList items'))
--
-- unknown directive (e.g. comment)
--
unknownDirective = try (do
string ".. "
manyTill anyChar newline
many (do {string " ";
char ':';
many1 (noneOf "\n:");
char ':';
many1 (noneOf "\n");
newline})
option "" blanklines
return Null)
--
-- reference key
--
referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
imageKey = try (do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- manyTill anyChar newline
return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
anonymousKey = try (do
choice [string ".. __:", string "__"]
skipSpaces
src <- manyTill anyChar newline
state <- getState
return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
regularKeyQuoted = try (do
string ".. _`"
ref <- manyTill inline (string "`:")
skipSpaces
src <- manyTill anyChar newline
return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
regularKey = try (do
string ".. _"
ref <- manyTill inline (char ':')
skipSpaces
src <- manyTill anyChar newline
return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
--
-- inline
--
text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] <?> "text"
inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline"
special = choice [ link, image ] <?> "link, inline html, or image"
hyphens = try (do
result <- many1 (char '-')
option Space endline -- don't want to treat endline after hyphen or dash as a space
return (Str result))
escapedChar = escaped anyChar
symbol = do
result <- oneOf specialChars
return (Str [result])
-- parses inline code, between codeStart and codeEnd
code =
try (do
string "``"
result <- manyTill anyChar (string "``")
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
return (Code result'))
emph = do
result <- enclosed (char '*') (char '*') inline
return (Emph (normalizeSpaces result))
strong = do
result <- enclosed (string "**") (string "**") inline
return (Strong (normalizeSpaces result))
whitespace = do
many1 spaceChar <?> "whitespace"
return Space
tabchar = do
tab
return (Str "\t")
str = do
notFollowedBy' oneWordReferenceLink
result <- many1 (noneOf (specialChars ++ "\t\n "))
return (Str result)
-- an endline character that can be treated as a space, not a structural break
endline =
try (do
newline
notFollowedBy blankline
-- parse potential list starts at beginning of line differently if in a list:
st <- getState
if ((stateParserContext st) == ListItemState) then
notFollowedBy' (choice [orderedListStart, bulletListStart])
else
option () pzero
return Space)
--
-- links
--
link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] <?> "link"
explicitLink =
try (do
char '`'
label <- manyTill inline (try (do {spaces; char '<'}))
src <- manyTill (noneOf ">\n ") (char '>')
skipSpaces
string "`_"
return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) "")))
anonymousLinkEnding =
try (do
char '_'
state <- getState
let anonKeys = stateKeyBlocks state
-- if there's a list of anon key refs (from previous pass), pop one off.
-- otherwise return an anon key ref for the next pass to take care of...
case anonKeys of
(Key [Str "_"] src):rest ->
do{ setState (state { stateKeyBlocks = rest });
return src }
otherwise -> return (Ref [Str "_"]))
referenceLink =
try (do
char '`'
label <- manyTill inline (string "`_")
src <- option (Ref []) anonymousLinkEnding
return (Link (normalizeSpaces label) src))
oneWordReferenceLink =
try (do
label <- many1 alphaNum
char '_'
src <- option (Ref []) anonymousLinkEnding
notFollowedBy alphaNum -- because this_is_not a link
return (Link [Str label] src))
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:",
"news:", "telnet:" ]
uri = try (do
scheme <- uriScheme
identifier <- many1 (noneOf " \t\n")
return (scheme ++ identifier))
autoURI = try (do
src <- uri
return (Link [Str src] (Src src "")))
emailChar = alphaNum <|> oneOf "-+_."
emailAddress = try (do
firstLetter <- alphaNum
restAddr <- many emailChar
let addr = firstLetter:restAddr
char '@'
dom <- domain
return (addr ++ '@':dom))
domainChar = alphaNum <|> char '-'
domain = try (do
first <- many1 domainChar
dom <- many1 (try (do{ char '.'; many1 domainChar }))
return (joinWithSep "." (first:dom)))
autoEmail = try (do
src <- emailAddress
return (Link [Str src] (Src ("mailto:" ++ src) "")))
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
image =
try (do
char '|'
ref <- manyTill inline (char '|')
return (Image (normalizeSpaces ref) (Ref ref)))

417
src/Text/Pandoc/Shared.hs Normal file
View file

@ -0,0 +1,417 @@
-- | Utility functions and definitions used by the various Pandoc modules.
module Text.Pandoc.Shared (
-- * Text processing
gsub,
joinWithSep,
tabsToSpaces,
backslashEscape,
escapePreservingRegex,
endsWith,
stripTrailingNewlines,
removeLeadingTrailingSpace,
removeLeadingSpace,
removeTrailingSpace,
-- * Parsing
readWith,
testStringWith,
HeaderType (..),
ParserContext (..),
ParserState (..),
defaultParserState,
-- * Native format prettyprinting
prettyPandoc,
-- * Pandoc block list processing
consolidateList,
isNoteBlock,
splitBySpace,
normalizeSpaces,
compactify,
generateReference,
WriterOptions (..),
KeyTable,
keyTable,
lookupKeySrc,
refsMatch,
replaceReferenceLinks,
replaceRefLinksBlockList
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec
import Text.Pandoc.HtmlEntities ( decodeEntities )
import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex )
import Char ( toLower )
import List ( find, groupBy )
-- | Parse a string with a given parser and state.
readWith :: GenParser Char ParserState a -- ^ parser
-> ParserState -- ^ initial state
-> String -- ^ input string
-> a
readWith parser state input =
case runParser parser state "source" input of
Left err -> error $ "\nError:\n" ++ show err
Right result -> result
-- | Parse a string with @parser@ (for testing).
testStringWith :: (Show a) =>
GenParser Char ParserState a
-> String
-> IO ()
testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str
-- | Parser state
data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
| DoubleHeader Char -- ^ Lines of characters above and below
deriving (Eq, Show)
data ParserContext
= BlockQuoteState -- ^ Used when running parser on contents of blockquote
| ListItemState -- ^ Used when running parser on list item contents
| NullState -- ^ Default state
deriving (Eq, Show)
data ParserState = ParserState
{ stateParseRaw :: Bool, -- ^ Parse untranslatable HTML and LaTeX?
stateParserContext :: ParserContext, -- ^ What are we parsing?
stateKeyBlocks :: [Block], -- ^ List of reference key blocks
stateKeysUsed :: [[Inline]], -- ^ List of references used so far
stateNoteBlocks :: [Block], -- ^ List of note blocks
stateTabStop :: Int, -- ^ Tab stop
stateStandalone :: Bool, -- ^ If @True@, parse bibliographic info
stateTitle :: [Inline], -- ^ Title of document
stateAuthors :: [String], -- ^ Authors of document
stateDate :: String, -- ^ Date of document
stateHeaderTable :: [HeaderType] } -- ^ List of header types used, in what order (for reStructuredText only)
deriving Show
defaultParserState :: ParserState
defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
stateKeyBlocks = [],
stateKeysUsed = [],
stateNoteBlocks = [],
stateTabStop = 4,
stateStandalone = False,
stateTitle = [],
stateAuthors = [],
stateDate = [],
stateHeaderTable = [] }
-- | Consolidate @Str@s and @Space@s in an inline list into one big @Str@.
-- Collapse adjacent @Space@s.
consolidateList :: [Inline] -> [Inline]
consolidateList ((Str a):(Str b):rest) = consolidateList ((Str (a ++ b)):rest)
consolidateList ((Str a):Space:rest) = consolidateList ((Str (a ++ " ")):rest)
consolidateList (Space:(Str a):rest) = consolidateList ((Str (" " ++ a)):rest)
consolidateList (Space:Space:rest) = consolidateList ((Str " "):rest)
consolidateList (inline:rest) = inline:(consolidateList rest)
consolidateList [] = []
-- | Indent string as a block.
indentBy :: Int -- ^ Number of spaces to indent the block
-> Int -- ^ Number of spaces to indent first line, relative to block
-> String -- ^ Contents of block to indent
-> String
indentBy num first [] = ""
indentBy num first str =
let (firstLine:restLines) = lines str
firstLineIndent = num + first in
(replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines)
-- | Prettyprint list of Pandoc blocks elements.
prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
-> [Block] -- ^ List of blocks
-> String
prettyBlockList indent [] = indentBy indent 0 "[]"
prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
-- | Prettyprint Pandoc block element.
prettyBlock :: Block -> String
prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks)
prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ (prettyBlockList 2 blocks)
prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
prettyBlock block = show block
-- | Prettyprint Pandoc document.
prettyPandoc :: Pandoc -> String
prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks)
-- | Convert tabs to spaces (with adjustable tab stop).
tabsToSpaces :: Int -- ^ Tabstop
-> String -- ^ String to convert
-> String
tabsToSpaces tabstop str =
unlines (map (tabsInLine tabstop tabstop) (lines str))
-- | Convert tabs to spaces in one line.
tabsInLine :: Int -- ^ Number of spaces to next tab stop
-> Int -- ^ Tabstop
-> String -- ^ Line to convert
-> String
tabsInLine num tabstop "" = ""
tabsInLine num tabstop (c:cs) =
let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in
let nextnumraw = (num - (length replacement)) in
let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in
replacement ++ (tabsInLine nextnum tabstop cs)
-- | Substitute string for every occurrence of regular expression.
gsub :: String -- ^ Regular expression (as string) to substitute for
-> String -- ^ String to substitute for the regex
-> String -- ^ String to be substituted in
-> String
gsub regex replacement str = subRegex (mkRegex regex) str replacement
-- | Escape designated characters with backslash.
backslashEscape :: [Char] -- ^ list of special characters to escape
-> String -- ^ string input
-> String
backslashEscape special [] = []
backslashEscape special (x:xs) = if x `elem` special then
'\\':x:(backslashEscape special xs)
else
x:(backslashEscape special xs)
-- | Escape string by applying a function, but don't touch anything that matches regex.
escapePreservingRegex :: (String -> String) -- ^ Escaping function
-> Regex -- ^ Regular expression
-> String -- ^ String to be escaped
-> String
escapePreservingRegex escapeFunction regex str =
case (matchRegexAll regex str) of
Nothing -> escapeFunction str
Just (before, matched, after, _) ->
(escapeFunction before) ++ matched ++
(escapePreservingRegex escapeFunction regex after)
-- | Returns @True@ if string ends with given character.
endsWith :: Char -> [Char] -> Bool
endsWith char [] = False
endsWith char str = (char == last str)
-- | Returns @True@ if block is a @Note@ block
isNoteBlock :: Block -> Bool
isNoteBlock (Note ref blocks) = True
isNoteBlock _ = False
-- | Joins a list of lists, separated by another list.
joinWithSep :: [a] -- ^ List to use as separator
-> [[a]] -- ^ Lists to join
-> [a]
joinWithSep sep [] = []
joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
-- | Strip trailing newlines from string.
stripTrailingNewlines :: String -> String
stripTrailingNewlines "" = ""
stripTrailingNewlines str =
if (last str) == '\n' then
stripTrailingNewlines (init str)
else
str
-- | Remove leading and trailing space (including newlines) from string.
removeLeadingTrailingSpace :: String -> String
removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
-- | Remove leading space (including newlines) from string.
removeLeadingSpace :: String -> String
removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t'))
-- | Remove trailing space (including newlines) from string.
removeTrailingSpace :: String -> String
removeTrailingSpace = reverse . removeLeadingSpace . reverse
-- | Split list of inlines into groups separated by a space.
splitBySpace :: [Inline] -> [[Inline]]
splitBySpace lst = filter (\a -> (/= Space) (head a))
(groupBy (\a b -> (/= Space) a && (/= Space) b) lst)
-- | Normalize a list of inline elements: remove leading and trailing
-- @Space@ elements, and collapse double @Space@s into singles.
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces [] = []
normalizeSpaces list =
let removeDoubles [] = []
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
removeDoubles (x:rest) = x:(removeDoubles rest) in
let removeLeading [] = []
removeLeading lst = if ((head lst) == Space) then tail lst else lst in
let removeTrailing [] = []
removeTrailing lst = if ((last lst) == Space) then init lst else lst in
removeLeading $ removeTrailing $ removeDoubles list
-- | Change final list item from @Para@ to @Plain@ if the list should be compact.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
-> [[Block]]
compactify [] = []
compactify items =
let final = last items
others = init items in
case final of
[Para a] -> if any containsPara others then items else others ++ [[Plain a]]
otherwise -> items
containsPara :: [Block] -> Bool
containsPara [] = False
containsPara ((Para a):rest) = True
containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest)
containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest)
containsPara (x:rest) = containsPara rest
-- | Options for writers
data WriterOptions = WriterOptions
{ writerStandalone :: Bool -- ^ If @True@, writer header and footer
, writerTitlePrefix :: String -- ^ Prefix for HTML titles
, writerHeader :: String -- ^ Header for the document
, writerIncludeBefore :: String -- ^ String to include before the document body
, writerIncludeAfter :: String -- ^ String to include after the document body
, writerSmartypants :: Bool -- ^ If @True@, use smart quotes, dashes, and ellipses
, writerS5 :: Bool -- ^ @True@ if we're writing S5 instead of normal HTML
, writerIncremental :: Bool -- ^ If @True@, display S5 lists incrementally
, writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
, writerTabStop :: Int } -- ^ Tabstop for conversion between spaces and tabs
deriving Show
--
-- Functions for constructing lists of reference keys
--
-- | Returns @Just@ numerical key reference if there's already a key
-- for the specified target in the list of blocks, otherwise @Nothing@.
keyFoundIn :: [Block] -- ^ List of key blocks to search
-> Target -- ^ Target to search for
-> Maybe String
keyFoundIn [] src = Nothing
keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) then
Just num
else
keyFoundIn rest src
keyFoundIn (_:rest) src = keyFoundIn rest src
-- | Return next unique numerical key, given keyList
nextUniqueKey :: [[Inline]] -> String
nextUniqueKey keys =
let nums = [1..10000]
notAKey n = not (any (== [Str (show n)]) keys) in
case (find notAKey nums) of
Just x -> show x
Nothing -> error "Could not find unique key for reference link"
-- | Generate a reference for a URL (either an existing reference, if
-- there is one, or a new one, if there isn't) and update parser state.
generateReference :: String -- ^ URL
-> String -- ^ Title
-> GenParser tok ParserState Target
generateReference url title = do
let src = Src (decodeEntities url) (decodeEntities title)
state <- getState
let keyBlocks = stateKeyBlocks state
let keysUsed = stateKeysUsed state
case (keyFoundIn keyBlocks src) of
Just num -> return (Ref [Str num])
Nothing -> do
let nextNum = nextUniqueKey keysUsed
updateState (\st -> st {stateKeyBlocks = (Key [Str nextNum] src):keyBlocks,
stateKeysUsed = [Str nextNum]:keysUsed})
return (Ref [Str nextNum])
--
-- code to replace reference links with real links and remove unneeded key blocks
--
type KeyTable = [([Inline], Target)]
-- | Returns @True@ if block is a Key block
isRefBlock :: Block -> Bool
isRefBlock (Key _ _) = True
isRefBlock _ = False
-- | Returns a pair of a list of pairs of keys and associated sources, and a new
-- list of blocks with the included key blocks deleted.
keyTable :: [Block] -> (KeyTable, [Block])
keyTable [] = ([],[])
keyTable ((Key ref target):lst) = (((ref, target):table), rest)
where (table, rest) = keyTable lst
keyTable (Null:lst) = keyTable lst -- get rid of Nulls
keyTable (Blank:lst) = keyTable lst -- get rid of Blanks
keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), ((BlockQuote rest1):rest2))
where (table1, rest1) = keyTable blocks
(table2, rest2) = keyTable lst
keyTable ((Note ref blocks):lst) = ((table1 ++ table2), ((Note ref rest1):rest2))
where (table1, rest1) = keyTable blocks
(table2, rest2) = keyTable lst
keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), ((OrderedList rest1):rest2))
where results = map keyTable blockLists
rest1 = map snd results
table1 = concatMap fst results
(table2, rest2) = keyTable lst
keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), ((BulletList rest1):rest2))
where results = map keyTable blockLists
rest1 = map snd results
table1 = concatMap fst results
(table2, rest2) = keyTable lst
keyTable (other:lst) = (table, (other:rest))
where (table, rest) = keyTable lst
-- | Look up key in key table and return target object.
lookupKeySrc :: KeyTable -- ^ Key table
-> [Inline] -- ^ Key
-> Maybe Target
lookupKeySrc table key = case table of
[] -> Nothing
(k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key
-- | Returns @True@ if keys match (case insensitive).
refsMatch :: [Inline] -> [Inline] -> Bool
refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty
refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty
refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
refsMatch [] x = null x
refsMatch x [] = null x
-- | Replace reference links with explicit links in list of blocks, removing key blocks.
replaceReferenceLinks :: [Block] -> [Block]
replaceReferenceLinks blocks =
let (keytable, purged) = keyTable blocks in
replaceRefLinksBlockList keytable purged
-- | Use key table to replace reference links with explicit links in a list of blocks
replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block]
replaceRefLinksBlockList keytable lst = map (replaceRefLinksBlock keytable) lst
-- | Use key table to replace reference links with explicit links in a block
replaceRefLinksBlock :: KeyTable -> Block -> Block
replaceRefLinksBlock keytable (Plain lst) = Plain (map (replaceRefLinksInline keytable) lst)
replaceRefLinksBlock keytable (Para lst) = Para (map (replaceRefLinksInline keytable) lst)
replaceRefLinksBlock keytable (Header lvl lst) = Header lvl (map (replaceRefLinksInline keytable) lst)
replaceRefLinksBlock keytable (BlockQuote lst) = BlockQuote (map (replaceRefLinksBlock keytable) lst)
replaceRefLinksBlock keytable (Note ref lst) = Note ref (map (replaceRefLinksBlock keytable) lst)
replaceRefLinksBlock keytable (OrderedList lst) = OrderedList (map (replaceRefLinksBlockList keytable) lst)
replaceRefLinksBlock keytable (BulletList lst) = BulletList (map (replaceRefLinksBlockList keytable) lst)
replaceRefLinksBlock keytable other = other
-- | Use key table to replace reference links with explicit links in an inline element.
replaceRefLinksInline :: KeyTable -> Inline -> Inline
replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef)
where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of
Nothing -> (Ref ref)
Just src -> src
newText = map (replaceRefLinksInline keytable) text
replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef)
where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of
Nothing -> (Ref ref)
Just src -> src
newText = map (replaceRefLinksInline keytable) text
replaceRefLinksInline keytable (Emph lst) = Emph (map (replaceRefLinksInline keytable) lst)
replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst)
replaceRefLinksInline keytable other = other

43
src/Text/Pandoc/UTF8.hs Normal file
View file

@ -0,0 +1,43 @@
-- | Functions for converting Unicode strings to UTF-8 and vice versa.
--
-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
-- (c) 2003, OGI School of Science & Engineering, Oregon Health and
-- Science University.
--
-- Modified by Martin Norbaeck to pass illegal UTF-8 sequences through unchanged.
module Text.Pandoc.UTF8 (
decodeUTF8,
encodeUTF8
) where
-- From the Char module supplied with HBC.
-- | Take a UTF-8 string and decode it into a Unicode string.
decodeUTF8 :: String -> String
decodeUTF8 "" = ""
decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
'\x80' <= c' && c' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs
decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
'\x80' <= c' && c' <= '\xbf' &&
'\x80' <= c'' && c'' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs
decodeUTF8 (c:cs) = c : decodeUTF8 cs
-- | Take a Unicode string and encode it as a UTF-8 string.
encodeUTF8 :: String -> String
encodeUTF8 "" = ""
encodeUTF8 (c:cs) =
if c > '\x0000' && c < '\x0080' then
c : encodeUTF8 cs
else if c < toEnum 0x0800 then
let i = fromEnum c
in toEnum (0xc0 + i `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
encodeUTF8 cs
else
let i = fromEnum c
in toEnum (0xe0 + i `div` 0x1000) :
toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
encodeUTF8 cs

View file

@ -0,0 +1,27 @@
----------------------------------------------------
-- Do not edit this file by hand. Edit
-- 'templates/DefaultHeaders.hs'
-- and run ./fillTemplates.pl Text/Pandoc/Writers/DefaultHeaders.hs
----------------------------------------------------
-- | Default headers for Pandoc writers.
module Text.Pandoc.Writers.DefaultHeaders (
defaultLaTeXHeader,
defaultHtmlHeader,
defaultS5Header,
defaultRTFHeader
) where
import Text.Pandoc.Writers.S5
defaultLaTeXHeader :: String
defaultLaTeXHeader = "\\documentclass{article}\n\\usepackage{hyperref}\n\\usepackage{ucs}\n\\usepackage[utf8x]{inputenc}\n\\usepackage{graphicx}\n\\setlength{\\parindent}{0pt}\n\\setlength{\\parskip}{6pt plus 2pt minus 1pt}\n% This is needed for code blocks in footnotes:\n\\usepackage{fancyvrb}\n\\VerbatimFootnotes\n"
defaultHtmlHeader :: String
defaultHtmlHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n \"http://www.w3.org/TR/html4/loose.dtd\">\n<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<meta name=\"generator\" content=\"pandoc\" />\n<style type=\"text/css\">\ndiv.pandocNote { border-left: 1px solid grey; padding-left: 1em; }\nspan.pandocNoteRef { vertical-align: super; font-size: 80%; }\nspan.pandocNoteMarker { }\n</style>\n"
defaultS5Header :: String
defaultS5Header = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<meta name=\"generator\" content=\"pandoc\" />\n" ++ s5CSS ++ s5Javascript
defaultRTFHeader :: String
defaultRTFHeader = "{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 Times New Roman;}{\\f1 Courier;}}\n{\\colortbl;\\red255\\green0\\blue0;\\red0\\green0\\blue255;}\n\\widowctrl\\hyphauto\n\n"

View file

@ -0,0 +1,197 @@
-- | Converts Pandoc to HTML.
module Text.Pandoc.Writers.HTML (
writeHtml
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Html ( stringToHtmlString )
import Text.Regex ( mkRegex )
import Numeric ( showHex )
import Char ( ord )
import List ( isPrefixOf )
-- | Convert Pandoc document to string in HTML format.
writeHtml :: WriterOptions -> Pandoc -> String
writeHtml options (Pandoc (Meta title authors date) blocks) =
let titlePrefix = writerTitlePrefix options in
let topTitle = if not (null titlePrefix) then
[Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else [])
else
title in
let head = if (writerStandalone options) then
htmlHeader options (Meta topTitle authors date)
else
""
titleBlocks = if (writerStandalone options) && (not (null title)) &&
(not (writerS5 options)) then
[RawHtml "<h1 class=\"title\">", Plain title, RawHtml "</h1>\n"]
else
[]
foot = if (writerStandalone options) then "</body>\n</html>\n" else ""
body = (writerIncludeBefore options) ++
concatMap (blockToHtml options) (replaceReferenceLinks (titleBlocks ++ blocks)) ++
(writerIncludeAfter options) in
head ++ body ++ foot
-- | Obfuscate a "mailto:" link using Javascript.
obfuscateLink :: WriterOptions -> [Inline] -> String -> String
obfuscateLink options text src =
let text' = inlineListToHtml options text in
let linkText = if src == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'"
altText = if src == ("mailto:" ++ text') then "\\1 [at] \\2" else text' ++ " (\\1 [at] \\2)" in
gsub "mailto:([^@]*)@(.*)" ("<script type=\"text/javascript\">h='\\2';n='\\1';e=n+'@'+h;document.write('<a href=\"mailto:'+e+'\">'+" ++ linkText ++ "+'<\\/a>');</script><noscript>" ++ altText ++ "</noscript>") src
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
obfuscateChar char = let num = ord char in
let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
"&#" ++ numstr ++ ";"
-- | Escape string, preserving character entities and quote.
stringToHtml :: String -> String
stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str
-- | Escape string as in 'stringToHtml' but add smartypants filter.
stringToSmartHtml :: String -> String
stringToSmartHtml =
let escapeDoubleQuotes =
gsub "(\"|&quot;|'')" "&rdquo;" . -- rest are right quotes
gsub "([[:space:]])(\"|&quot;)" "\\1&ldquo;" . -- never right quo after space
gsub "(\"|&quot;|``)('|`|&lsquo;)([^[:punct:][:space:]])" "&ldquo;&lsquo;\\3" . -- "'word left
gsub "(\"|&quot;|``)([^[:punct:][:space:]])" "&ldquo;\\2" -- "word left
escapeSingleQuotes =
gsub "'" "&rsquo;" . -- otherwise right
gsub "([[:space:]])'" "\\1&lsquo;" . -- never right quo after space
gsub "`" "&lsquo;" . -- ` is left
gsub "([^[:punct:][:space:]])'" "\\1&rsquo;" . -- word' right
gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
gsub "^('|`)([^[:punct:][:space:]])" "&lsquo;\\2" . -- 'word left
gsub "([^[:punct:][:space:]])'(s|S)" "\\1&rsquo;\\2" . -- possessive
gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1&lsquo;\\2" . -- 'word left
gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
escapeDashes = gsub " ?-- ?" "&mdash;" .
gsub " ?--- ?" "&mdash;" .
gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;" in
escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml
-- | Escape code string as needed for HTML.
codeStringToHtml :: String -> String
codeStringToHtml [] = []
codeStringToHtml (x:xs) = case x of
'&' -> "&amp;" ++ codeStringToHtml xs
'<' -> "&lt;" ++ codeStringToHtml xs
_ -> x:(codeStringToHtml xs)
-- | Escape string to HTML appropriate for attributes
attributeStringToHtml :: String -> String
attributeStringToHtml = gsub "\"" "&quot;"
-- | Returns an HTML header with appropriate bibliographic information.
htmlHeader :: WriterOptions -> Meta -> String
htmlHeader options (Meta title authors date) =
let titletext = if (null title) then
""
else
"<title>" ++ (inlineListToHtml options title) ++ "</title>\n"
authortext = if (null authors) then
""
else
"<meta name=\"author\" content=\"" ++
(joinWithSep ", " (map stringToHtml authors)) ++ "\" />\n"
datetext = if (date == "") then
""
else
"<meta name=\"date\" content=\"" ++ (stringToHtml date) ++ "\" />\n" in
(writerHeader options) ++ authortext ++ datetext ++ titletext ++ "</head>\n<body>\n"
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> String
blockToHtml options Blank = "\n"
blockToHtml options Null = ""
blockToHtml options (Plain lst) = inlineListToHtml options lst
blockToHtml options (Para lst) = "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
blockToHtml options (BlockQuote blocks) =
if (writerS5 options) then -- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental; otherwise incremental
let inc = not (writerIncremental options) in
case blocks of
[BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst)
[OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst)
otherwise -> "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++
"</blockquote>\n"
else
"<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ "</blockquote>\n"
blockToHtml options (Note ref lst) =
let marker = "<span class=\"pandocNoteMarker\"><a name=\"note_" ++ ref ++
"\" href=\"#ref_" ++ ref ++ "\">(" ++ ref ++ ")</a></span> " in
let contents = (concatMap (blockToHtml options) lst) in
let contents' = case contents of
('<':'p':'>':rest) -> "<p class=\"first\">" ++ marker ++ rest ++ "\n"
otherwise -> marker ++ contents ++ "\n" in
"<div class=\"pandocNote\">\n" ++ contents' ++ "</div>\n"
blockToHtml options (Key _ _) = ""
blockToHtml options (CodeBlock str) = "<pre><code>" ++ (codeStringToHtml str) ++
"</code></pre>\n"
blockToHtml options (RawHtml str) = str
blockToHtml options (BulletList lst) =
let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
"<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ul>\n"
blockToHtml options (OrderedList lst) =
let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
"<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ol>\n"
blockToHtml options HorizontalRule = "<hr />\n"
blockToHtml options (Header level lst) = if ((level > 0) && (level <= 6)) then
"<h" ++ (show level) ++ ">" ++
(inlineListToHtml options lst) ++
"</h" ++ (show level) ++ ">\n"
else
"<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
listItemToHtml options list = "<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n"
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> String
inlineListToHtml options lst =
-- consolidate adjacent Str and Space elements for more intelligent
-- smartypants filtering
let lst' = consolidateList lst in
concatMap (inlineToHtml options) lst'
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> String
inlineToHtml options (Emph lst) = "<em>" ++ (inlineListToHtml options lst) ++ "</em>"
inlineToHtml options (Strong lst) = "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
inlineToHtml options (Code str) = "<code>" ++ (codeStringToHtml str) ++ "</code>"
inlineToHtml options (Str str) = if (writerSmartypants options) then
stringToSmartHtml str
else
stringToHtml str
inlineToHtml options (TeX str) = (codeStringToHtml str)
inlineToHtml options (HtmlInline str) = str
inlineToHtml options (LineBreak) = "<br />\n"
inlineToHtml options Space = " "
inlineToHtml options (Link text (Src src tit)) =
let title = attributeStringToHtml tit in
if (isPrefixOf "mailto:" src) then
obfuscateLink options text src
else
"<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
(inlineListToHtml options text) ++ "</a>"
inlineToHtml options (Link text (Ref [])) = "[" ++ (inlineListToHtml options text) ++ "]"
inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++
(inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse
inlineToHtml options (Image alt (Src source tit)) =
let title = attributeStringToHtml tit
alternate = inlineListToHtml options alt in
"<img src=\"" ++ source ++ "\"" ++
(if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++
(if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">"
inlineToHtml options (Image alternate (Ref [])) =
"![" ++ (inlineListToHtml options alternate) ++ "]"
inlineToHtml options (Image alternate (Ref ref)) =
"![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]"
inlineToHtml options (NoteRef ref) =
"<span class=\"pandocNoteRef\"><a name=\"ref_" ++ ref ++ "\" href=\"#note_" ++ ref ++
"\">(" ++ ref ++ ")</a></span>"

View file

@ -0,0 +1,164 @@
-- | Convert Pandoc to LaTeX.
module Text.Pandoc.Writers.LaTeX (
writeLaTeX
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import List ( (\\) )
-- | Convert Pandoc to LaTeX.
writeLaTeX :: WriterOptions -> Pandoc -> String
writeLaTeX options (Pandoc meta blocks) =
let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level
let body = (writerIncludeBefore options) ++
(concatMap (blockToLaTeX notes) (replaceReferenceLinks blocks)) ++
(writerIncludeAfter options) in
let head = if writerStandalone options then
latexHeader notes options meta
else
"" in
let foot = if writerStandalone options then "\n\\end{document}\n" else "" in
head ++ body ++ foot
-- | Insert bibliographic information into LaTeX header.
latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs
-> WriterOptions -- ^ Options, including LaTeX header
-> Meta -- ^ Meta with bibliographic information
-> String
latexHeader notes options (Meta title authors date) =
let titletext = if null title then
""
else
"\\title{" ++ inlineListToLaTeX notes title ++ "}\n"
authorstext = if null authors then
""
else
"\\author{" ++ (joinWithSep "\\\\" (map stringToLaTeX authors)) ++ "}\n"
datetext = if date == "" then
""
else
"\\date{" ++ stringToLaTeX date ++ "}\n"
maketitle = if null title then
""
else
"\\maketitle\n"
secnumline = if (writerNumberSections options) then
""
else
"\\setcounter{secnumdepth}{0}\n"
header = writerHeader options in
header ++ secnumline ++ titletext ++ authorstext ++ datetext ++ "\\begin{document}\n" ++ maketitle
-- escape things as needed for LaTeX (also ldots, dashes, quotes, etc.)
escapeBrackets = backslashEscape "{}"
escapeSpecial = backslashEscape "$%&~_#"
escapeBackslash = gsub "\\\\" "\\\\textbackslash{}"
fixBackslash = gsub "\\\\textbackslash\\\\\\{\\\\\\}" "\\\\textbackslash{}"
escapeHat = gsub "\\^" "\\\\^{}"
escapeBar = gsub "\\|" "\\\\textbar{}"
escapeLt = gsub "<" "\\\\textless{}"
escapeGt = gsub ">" "\\\\textgreater{}"
escapeDoubleQuotes =
gsub "\"" "''" . -- rest are right quotes
gsub "([[:space:]])\"" "\\1``" . -- never right quote after space
gsub "\"('|`)([^[:punct:][:space:]])" "``{}`\\2" . -- "'word left
gsub "\"([^[:punct:][:space:]])" "``\\1" -- "word left
escapeSingleQuotes =
gsub "('|`)(\"|``)" "`{}``" . -- '"word left
gsub "([^[:punct:][:space:]])`(s|S)" "\\1'\\2" . -- catch possessives
gsub "^'([^[:punct:][:space:]])" "`\\1" . -- 'word left
gsub "([[:space:]])'" "\\1`" . -- never right quote after space
gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1`\\2" -- 'word left (leave possessives)
escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "\\ldots{}"
escapeDashes = gsub "([0-9])-([0-9])" "\\1--\\2" .
gsub " -- " "---" .
gsub "([^[:punct:][:space:]])--([^[:punct:][:space:]])" "\\1---\\2"
escapeSmart = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses
-- | Escape string for LaTeX (including smart quotes, dashes, ellipses)
stringToLaTeX :: String -> String
stringToLaTeX = escapeSmart . escapeGt . escapeLt . escapeBar . escapeHat .
escapeSpecial . fixBackslash . escapeBrackets . escapeBackslash
-- | Remove all code elements from list of inline elements
-- (because it's illegal to have a \\verb inside a command argument)
deVerb :: [Inline] -> [Inline]
deVerb [] = []
deVerb ((Code str):rest) = (Str str):(deVerb rest)
deVerb (other:rest) = other:(deVerb rest)
-- | Convert Pandoc block element to LaTeX.
blockToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
-> Block -- ^ Block to convert
-> String
blockToLaTeX notes Blank = "\n"
blockToLaTeX notes Null = ""
blockToLaTeX notes (Plain lst) = inlineListToLaTeX notes lst ++ "\n"
blockToLaTeX notes (Para lst) = (inlineListToLaTeX notes lst) ++ "\n\n"
blockToLaTeX notes (BlockQuote lst) =
"\\begin{quote}\n" ++ (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n"
blockToLaTeX notes (Note ref lst) = ""
blockToLaTeX notes (Key _ _) = ""
blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\\end{verbatim}\n"
blockToLaTeX notes (RawHtml str) = ""
blockToLaTeX notes (BulletList lst) =
"\\begin{itemize}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n"
blockToLaTeX notes (OrderedList lst) =
"\\begin{enumerate}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n"
blockToLaTeX notes HorizontalRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n"
blockToLaTeX notes (Header level lst) =
if (level > 0) && (level <= 3) then
"\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++
(inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n"
else
(inlineListToLaTeX notes lst) ++ "\n\n"
listItemToLaTeX notes list = "\\item " ++ (concatMap (blockToLaTeX notes) list)
-- | Convert list of inline elements to LaTeX.
inlineListToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
-> [Inline] -- ^ Inlines to convert
-> String
inlineListToLaTeX notes lst =
-- first, consolidate Str and Space for more effective smartquotes:
let lst' = consolidateList lst in
concatMap (inlineToLaTeX notes) lst'
-- | Convert inline element to LaTeX
inlineToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
-> Inline -- ^ Inline to convert
-> String
inlineToLaTeX notes (Emph lst) = "\\emph{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}"
inlineToLaTeX notes (Strong lst) = "\\textbf{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}"
inlineToLaTeX notes (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr]
where stuffing = str
chr = ((enumFromTo '!' '~') \\ stuffing) !! 0
inlineToLaTeX notes (Str str) = stringToLaTeX str
inlineToLaTeX notes (TeX str) = str
inlineToLaTeX notes (HtmlInline str) = ""
inlineToLaTeX notes (LineBreak) = "\\\\\n"
inlineToLaTeX notes Space = " "
inlineToLaTeX notes (Link text (Src src tit)) =
"\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX notes (deVerb text)) ++ "}"
inlineToLaTeX notes (Link text (Ref [])) = "[" ++ (inlineListToLaTeX notes text) ++ "]"
inlineToLaTeX notes (Link text (Ref ref)) = "[" ++ (inlineListToLaTeX notes text) ++ "][" ++
(inlineListToLaTeX notes ref) ++ "]" -- this is what markdown does, for better or worse
inlineToLaTeX notes (Image alternate (Src source tit)) = "\\includegraphics{" ++ source ++ "}"
inlineToLaTeX notes (Image alternate (Ref [])) =
"![" ++ (inlineListToLaTeX notes alternate) ++ "]"
inlineToLaTeX notes (Image alternate (Ref ref)) =
"![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++ (inlineListToLaTeX notes ref) ++ "]"
inlineToLaTeX [] (NoteRef ref) = ""
inlineToLaTeX ((Note firstref firstblocks):rest) (NoteRef ref) =
if (firstref == ref) then
"\\footnote{" ++ (stripTrailingNewlines (concatMap (blockToLaTeX rest) firstblocks)) ++ "}"
else
inlineToLaTeX rest (NoteRef ref)

View file

@ -0,0 +1,149 @@
-- | Converts Pandoc to Markdown.
module Text.Pandoc.Writers.Markdown (
writeMarkdown
) where
import Text.Regex ( matchRegex, mkRegex )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.PrettyPrint.HughesPJ hiding ( Str )
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
writeMarkdown options (Pandoc meta blocks) =
let body = text (writerIncludeBefore options) <>
vcat (map (blockToMarkdown (writerTabStop options)) (formatKeys blocks)) $$
text (writerIncludeAfter options) in
let head = if (writerStandalone options) then
((metaToMarkdown meta) $$ text (writerHeader options))
else
empty in
render $ head <> body
-- | Escape special characters for Markdown.
escapeString :: String -> String
escapeString = backslashEscape "`<\\*_^"
-- | Escape embedded \" in link title.
escapeLinkTitle :: String -> String
escapeLinkTitle = gsub "\"" "\\\\\""
-- | Take list of inline elements and return wrapped doc.
wrappedMarkdown :: [Inline] -> Doc
wrappedMarkdown lst = fsep $ map (fcat . (map inlineToMarkdown)) (splitBySpace lst)
-- | Insert Blank block between key and non-key
formatKeys :: [Block] -> [Block]
formatKeys [] = []
formatKeys [x] = [x]
formatKeys ((Key x1 y1):(Key x2 y2):rest) = (Key x1 y1):(formatKeys ((Key x2 y2):rest))
formatKeys ((Key x1 y1):rest) = (Key x1 y1):Blank:(formatKeys rest)
formatKeys (x:(Key x1 y1):rest) = x:Blank:(formatKeys ((Key x1 y1):rest))
formatKeys (x:rest) = x:(formatKeys rest)
-- | Convert bibliographic information into Markdown header.
metaToMarkdown :: Meta -> Doc
metaToMarkdown (Meta [] [] "") = empty
metaToMarkdown (Meta title [] "") = (titleToMarkdown title) <> (text "\n")
metaToMarkdown (Meta title authors "") =
(titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <> (text "\n")
metaToMarkdown (Meta title authors date) =
(titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <>
(text "\n") <> (dateToMarkdown date) <> (text "\n")
titleToMarkdown :: [Inline] -> Doc
titleToMarkdown lst = text "% " <> (inlineListToMarkdown lst)
authorsToMarkdown :: [String] -> Doc
authorsToMarkdown lst = text "% " <> text (joinWithSep ", " (map escapeString lst))
dateToMarkdown :: String -> Doc
dateToMarkdown str = text "% " <> text (escapeString str)
-- | Convert Pandoc block element to markdown.
blockToMarkdown :: Int -- ^ Tab stop
-> Block -- ^ Block element
-> Doc
blockToMarkdown tabStop Blank = text ""
blockToMarkdown tabStop Null = empty
blockToMarkdown tabStop (Plain lst) = wrappedMarkdown lst
blockToMarkdown tabStop (Para lst) = (wrappedMarkdown lst) <> (text "\n")
blockToMarkdown tabStop (BlockQuote lst) =
(vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $
map (blockToMarkdown tabStop) lst) <> (text "\n")
blockToMarkdown tabStop (Note ref lst) =
let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in
if null lns then
empty
else
let first = head lns
rest = tail lns in
text ("^(" ++ (escapeString ref) ++ ") ") <> (text first) $$ (vcat $
map (\line -> (text "^ ") <> (text line)) rest) <> (text "\n")
blockToMarkdown tabStop (Key txt (Src src tit)) =
text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> text ": " <> text src <>
(if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty)
blockToMarkdown tabStop (CodeBlock str) = (nest tabStop $ vcat $ map text (lines str)) <>
(if (endsWith '\n' str) then empty else text "\n") <> text "\n"
blockToMarkdown tabStop (RawHtml str) = text str
blockToMarkdown tabStop (BulletList lst) =
vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n"
blockToMarkdown tabStop (OrderedList lst) =
vcat (zipWith (orderedListItemToMarkdown tabStop) (enumFromTo 1 (length lst)) lst) <>
text "\n"
blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n"
blockToMarkdown tabStop (Header level lst) =
text ((replicate level '#') ++ " ") <> (inlineListToMarkdown lst) <> (text "\n")
bulletListItemToMarkdown tabStop list =
hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list))
-- | Convert ordered list item (a list of blocks) to markdown.
orderedListItemToMarkdown :: Int -- ^ tab stop
-> Int -- ^ ordinal number of list item
-> [Block] -- ^ list item (list of blocks)
-> Doc
orderedListItemToMarkdown tabStop num list =
hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat (map (blockToMarkdown tabStop) list))
where spacer = if (num < 10) then " " else ""
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: [Inline] -> Doc
inlineListToMarkdown lst = hcat $ map inlineToMarkdown lst
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: Inline -> Doc
inlineToMarkdown (Emph lst) = text "*" <> (inlineListToMarkdown lst) <> text "*"
inlineToMarkdown (Strong lst) = text "**" <> (inlineListToMarkdown lst) <> text "**"
inlineToMarkdown (Code str) =
case (matchRegex (mkRegex "``") str) of
Just match -> text ("` " ++ str ++ " `")
Nothing -> case (matchRegex (mkRegex "`") str) of
Just match -> text ("`` " ++ str ++ " ``")
Nothing -> text ("`" ++ str ++ "`")
inlineToMarkdown (Str str) = text $ escapeString str
inlineToMarkdown (TeX str) = text str
inlineToMarkdown (HtmlInline str) = text str
inlineToMarkdown (LineBreak) = text " \n"
inlineToMarkdown Space = char ' '
inlineToMarkdown (Link txt (Src src tit)) =
let linktext = if (null txt) || (txt == [Str ""]) then
text "link"
else
inlineListToMarkdown txt in
char '[' <> linktext <> char ']' <> char '(' <> text src <>
(if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')'
inlineToMarkdown (Link txt (Ref [])) = char '[' <> inlineListToMarkdown txt <> text "][]"
inlineToMarkdown (Link txt (Ref ref)) = char '[' <> inlineListToMarkdown txt <> char ']' <>
char '[' <> inlineListToMarkdown ref <> char ']'
inlineToMarkdown (Image alternate (Src source tit)) =
let alt = if (null alternate) || (alternate == [Str ""]) then
text "image"
else
inlineListToMarkdown alternate in
char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <>
(if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')'
inlineToMarkdown (Image alternate (Ref [])) =
char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']'
inlineToMarkdown (Image alternate (Ref ref)) =
char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' <>
char '[' <> inlineListToMarkdown ref <> char ']'
inlineToMarkdown (NoteRef ref) = char '^' <> char '(' <> text (escapeString ref) <> char ')'

View file

@ -0,0 +1,188 @@
-- | Converts Pandoc to reStructuredText.
module Text.Pandoc.Writers.RST (
writeRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import List ( nubBy )
import Text.PrettyPrint.HughesPJ hiding ( Str )
-- | Convert Pandoc to reStructuredText.
writeRST :: WriterOptions -> Pandoc -> String
writeRST options (Pandoc meta blocks) =
let (main, refs) = unzip $ map (blockToRST (writerTabStop options))
(reformatBlocks $ replaceReferenceLinks blocks)
top = if (writerStandalone options) then
(metaToRST meta) $$ text (writerHeader options)
else
empty in
let refs' = nubBy (\x y -> (render x) == (render y)) refs in -- remove duplicate keys
let body = text (writerIncludeBefore options) <>
vcat main $$ text (writerIncludeAfter options) in
render $ top <> body $$ vcat refs'
-- | Escape special RST characters.
escapeString :: String -> String
escapeString = backslashEscape "`\\|*_"
-- | Convert list of inline elements into one 'Doc' of wrapped text and another
-- containing references.
wrappedRST :: [Inline] -> (Doc, Doc)
wrappedRST lst =
let words = splitBySpace lst in
(fsep $ map (fcat . (map (fst . inlineToRST))) words, vcat (map (snd . inlineToRST) lst))
-- | Remove reference keys, and make sure there are blanks before each list.
reformatBlocks :: [Block] -> [Block]
reformatBlocks [] = []
reformatBlocks ((Plain x):(OrderedList y):rest) =
(Para x):(reformatBlocks ((OrderedList y):rest))
reformatBlocks ((Plain x):(BulletList y):rest) = (Para x):(reformatBlocks ((BulletList y):rest))
reformatBlocks ((OrderedList x):rest) =
(OrderedList (map reformatBlocks x)):(reformatBlocks rest)
reformatBlocks ((BulletList x):rest) = (BulletList (map reformatBlocks x)):(reformatBlocks rest)
reformatBlocks ((BlockQuote x):rest) = (BlockQuote (reformatBlocks x)):(reformatBlocks rest)
reformatBlocks ((Note ref x):rest) = (Note ref (reformatBlocks x)):(reformatBlocks rest)
reformatBlocks ((Key x1 y1):rest) = reformatBlocks rest
reformatBlocks (x:rest) = x:(reformatBlocks rest)
-- | Convert bibliographic information to 'Doc'.
metaToRST :: Meta -> Doc
metaToRST (Meta title authors date) =
(titleToRST title) <> (authorsToRST authors) <> (dateToRST date)
-- | Convert title to 'Doc'.
titleToRST :: [Inline] -> Doc
titleToRST [] = empty
titleToRST lst =
let title = fst $ inlineListToRST lst in
let titleLength = length $ render title in
let border = text (replicate titleLength '=') in
border <> char '\n' <> title <> char '\n' <> border <> text "\n\n"
-- | Convert author list to 'Doc'.
authorsToRST :: [String] -> Doc
authorsToRST [] = empty
authorsToRST (first:rest) = text ":Author: " <> text first <> char '\n' <> (authorsToRST rest)
-- | Convert date to 'Doc'.
dateToRST :: String -> Doc
dateToRST [] = empty
dateToRST str = text ":Date: " <> text (escapeString str) <> char '\n'
-- | Convert Pandoc block element to a 'Doc' containing the main text and
-- another one containing any references.
blockToRST :: Int -- ^ tab stop
-> Block -- ^ block element to convert
-> (Doc, Doc) -- ^ first element is text, second is references for end of file
blockToRST tabStop Blank = (text "\n", empty)
blockToRST tabStop Null = (empty, empty)
blockToRST tabStop (Plain lst) = wrappedRST lst
blockToRST tabStop (Para [TeX str]) = -- raw latex block
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
(hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty)
blockToRST tabStop (Para lst) = ((fst $ wrappedRST lst) <> (text "\n"), snd $ wrappedRST lst)
blockToRST tabStop (BlockQuote lst) =
let (main, refs) = unzip $ map (blockToRST tabStop) lst in
((nest tabStop $ vcat $ main) <> text "\n", vcat refs)
blockToRST tabStop (Note ref blocks) =
let (main, refs) = unzip $ map (blockToRST tabStop) blocks in
((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)), vcat refs)
blockToRST tabStop (Key txt (Src src tit)) =
(text "ERROR - KEY FOUND", empty) -- shouldn't have a key here
blockToRST tabStop (CodeBlock str) =
(hang (text "::\n") tabStop (vcat $ map text (lines ('\n':(str ++ "\n")))), empty)
blockToRST tabStop (RawHtml str) =
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
(hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty)
blockToRST tabStop (BulletList lst) =
let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in
(vcat main <> text "\n", vcat refs)
blockToRST tabStop (OrderedList lst) =
let (main, refs) =
unzip $ zipWith (orderedListItemToRST tabStop) (enumFromTo 1 (length lst)) lst in
(vcat main <> text "\n", vcat refs)
blockToRST tabStop HorizontalRule = (text "--------------\n", empty)
blockToRST tabStop (Header level lst) =
let (headerText, refs) = inlineListToRST lst in
let headerLength = length $ render headerText in
let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in
let border = text $ replicate headerLength headerChar in
(headerText <> char '\n' <> border <> char '\n', refs)
-- | Convert bullet list item (list of blocks) to reStructuredText.
-- Returns a pair of 'Doc', the first the main text, the second references
bulletListItemToRST :: Int -- ^ tab stop
-> [Block] -- ^ list item (list of blocks)
-> (Doc, Doc)
bulletListItemToRST tabStop list =
let (main, refs) = unzip $ map (blockToRST tabStop) list in
(hang (text "- ") tabStop (vcat main), (vcat refs))
-- | Convert an ordered list item (list of blocks) to reStructuredText.
-- Returns a pair of 'Doc', the first the main text, the second references
orderedListItemToRST :: Int -- ^ tab stop
-> Int -- ^ ordinal number of list item
-> [Block] -- ^ list item (list of blocks)
-> (Doc, Doc)
orderedListItemToRST tabStop num list =
let (main, refs) = unzip $ map (blockToRST tabStop) list
spacer = if (length (show num) < 2) then " " else "" in
(hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs))
-- | Convert a list of inline elements to reStructuredText.
-- Returns a pair of 'Doc', the first the main text, the second references.
inlineListToRST :: [Inline] -> (Doc, Doc)
inlineListToRST lst = let (main, refs) = unzip $ map inlineToRST lst in
(hcat main, hcat refs)
-- | Convert an inline element to reStructuredText.
-- Returns a pair of 'Doc', the first the main text, the second references.
inlineToRST :: Inline -> (Doc, Doc) -- second Doc is list of refs for end of file
inlineToRST (Emph lst) = let (main, refs) = inlineListToRST lst in
(text "*" <> main <> text "*", refs)
inlineToRST (Strong lst) = let (main, refs) = inlineListToRST lst in
(text "**" <> main <> text "**", refs)
inlineToRST (Code str) = (text $ "``" ++ str ++ "``", empty)
inlineToRST (Str str) = (text $ escapeString str, empty)
inlineToRST (TeX str) = (text str, empty)
inlineToRST (HtmlInline str) = (empty, empty)
inlineToRST (LineBreak) = inlineToRST Space -- RST doesn't have line breaks
inlineToRST Space = (char ' ', empty)
--
-- Note: can assume reference links have been replaced where possible with explicit links.
--
inlineToRST (Link txt (Src src tit)) =
let (linktext, ref') = if (null txt) || (txt == [Str ""]) then
(text "link", empty)
else
inlineListToRST $ normalizeSpaces txt in
let link = char '`' <> linktext <> text "`_"
linktext' = render linktext in
let linktext'' = if (':' `elem` linktext') then "`" ++ linktext' ++ "`" else linktext' in
let ref = text ".. _" <> text linktext'' <> text ": " <> text src in
(link, ref' $$ ref)
inlineToRST (Link txt (Ref [])) =
let (linktext, refs) = inlineListToRST txt in
(char '[' <> linktext <> char ']', refs)
inlineToRST (Link txt (Ref ref)) =
let (linktext, refs1) = inlineListToRST txt
(reftext, refs2) = inlineListToRST ref in
(char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2)
inlineToRST (Image alternate (Src source tit)) =
let (alt, ref') = if (null alternate) || (alternate == [Str ""]) then
(text "image", empty)
else
inlineListToRST $ normalizeSpaces alternate in
let link = char '|' <> alt <> char '|' in
let ref = text ".. " <> link <> text " image:: " <> text source in
(link, ref' $$ ref)
inlineToRST (Image alternate (Ref [])) =
let (alttext, refs) = inlineListToRST alternate in
(char '|' <> alttext <> char '|', refs)
-- The following case won't normally occur...
inlineToRST (Image alternate (Ref ref)) =
let (alttext, refs1) = inlineListToRST alternate
(reftext, refs2) = inlineListToRST ref in
(char '|' <> alttext <> char '|', refs1 $$ refs2)
inlineToRST (NoteRef ref) = (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty)

View file

@ -0,0 +1,194 @@
-- | Convert Pandoc to rich text format.
module Text.Pandoc.Writers.RTF (
writeRTF
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import List ( isSuffixOf )
import Char ( ord, chr )
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
writeRTF options (Pandoc meta blocks) =
let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level
let head = if writerStandalone options then
rtfHeader notes (writerHeader options) meta
else
""
foot = if writerStandalone options then "\n}\n" else ""
body = (writerIncludeBefore options) ++
(concatMap (blockToRTF notes 0) (replaceReferenceLinks blocks)) ++
(writerIncludeAfter options) in
head ++ body ++ foot
-- | Convert unicode characters (> 127) into rich text format representation.
handleUnicode :: String -> String
handleUnicode [] = []
handleUnicode (c:cs) = if (ord c) > 127 then
'\\':'u':(show (ord c)) ++ "?" ++ (handleUnicode cs)
else
c:(handleUnicode cs)
escapeSpecial = backslashEscape "{\\}"
escapeTab = gsub "\\\\t" "\\\\tab "
-- | Escape strings as needed for rich text format.
stringToRTF :: String -> String
stringToRTF = handleUnicode . escapeSpecial . escapeTab
-- | Escape raw LaTeX strings for RTF. Don't escape \t; it might
-- be the first letter of a command!
latexStringToRTF :: String -> String
latexStringToRTF = handleUnicode . escapeSpecial
-- | Escape things as needed for code block in RTF.
codeStringToRTF :: String -> String
codeStringToRTF str = joinWithSep "\\line\n" (lines (stringToRTF str))
-- | Deal with raw LaTeX.
latexToRTF :: String -> String
latexToRTF str = "{\\cf1 " ++ (latexStringToRTF str) ++ "\\cf0 } "
-- | Make a paragraph with first-line indent, block indent, and space after.
rtfParSpaced :: Int -- ^ space after (in twips)
-> Int -- ^ block indent (in twips)
-> Int -- ^ first line indent (relative to block) (in twips)
-> String -- ^ string with content
-> String
rtfParSpaced spaceAfter indent firstLineIndent content =
"{\\pard \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
" \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
-- | Default paragraph.
rtfPar :: Int -- ^ block indent (in twips)
-> Int -- ^ first line indent (relative to block) (in twips)
-> String -- ^ string with content
-> String
rtfPar = rtfParSpaced 180
-- | Compact paragraph (e.g. for compact list items).
rtfCompact :: Int -- ^ block indent (in twips)
-> Int -- ^ first line indent (relative to block) (in twips)
-> String -- ^ string with content
-> String
rtfCompact = rtfParSpaced 0
-- number of twips to indent
indentIncrement = 720
listIncrement = 360
-- | Returns appropriate bullet list marker for indent level.
bulletMarker :: Int -> String
bulletMarker indent = case (indent `mod` 720) of
0 -> "\\bullet "
otherwise -> "\\endash "
-- | Returns appropriate (list of) ordered list markers for indent level.
orderedMarkers :: Int -> [String]
orderedMarkers indent = case (indent `mod` 720) of
0 -> map (\x -> show x ++ ".") [1..]
otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z']
-- | Returns RTF header.
rtfHeader :: [Block] -- ^ list of note blocks
-> String -- ^ header text
-> Meta -- ^ bibliographic information
-> String
rtfHeader notes headerText (Meta title authors date) =
let titletext = if null title then
""
else
rtfPar 0 0 ("\\qc \\b \\fs36 " ++ inlineListToRTF notes title)
authorstext = if null authors then
""
else
rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\" (map stringToRTF authors)))
datetext = if date == "" then "" else rtfPar 0 0 ("\\qc " ++ stringToRTF date) in
let spacer = if null (titletext ++ authorstext ++ datetext) then "" else rtfPar 0 0 "" in
headerText ++ titletext ++ authorstext ++ datetext ++ spacer
-- | Convert Pandoc block element to RTF.
blockToRTF :: [Block] -- ^ list of note blocks
-> Int -- ^ indent level
-> Block -- ^ block to convert
-> String
blockToRTF notes indent Blank = rtfPar indent 0 ""
blockToRTF notes indent Null = ""
blockToRTF notes indent (Plain lst) = rtfCompact indent 0 (inlineListToRTF notes lst)
blockToRTF notes indent (Para lst) = rtfPar indent 0 (inlineListToRTF notes lst)
blockToRTF notes indent (BlockQuote lst) =
concatMap (blockToRTF notes (indent + indentIncrement)) lst
blockToRTF notes indent (Note ref lst) = "" -- there shouldn't be any after filtering
blockToRTF notes indent (Key _ _) = ""
blockToRTF notes indent (CodeBlock str) = rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str))
blockToRTF notes indent (RawHtml str) = ""
blockToRTF notes indent (BulletList lst) =
spaceAtEnd $ concatMap (listItemToRTF notes indent (bulletMarker indent)) lst
blockToRTF notes indent (OrderedList lst) =
spaceAtEnd $ concat $ zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst
blockToRTF notes indent HorizontalRule =
rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash"
blockToRTF notes indent (Header level lst) =
rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
(inlineListToRTF notes lst))
-- | Ensure that there's the same amount of space after compact
-- lists as after regular lists.
spaceAtEnd :: String -> String
spaceAtEnd str =
if isSuffixOf "\\par}\n" str then
(take ((length str) - 6) str) ++ "\\sa180\\par}\n"
else
str
-- | Convert list item (list of blocks) to RTF.
listItemToRTF :: [Block] -- ^ list of note blocks
-> Int -- ^ indent level
-> String -- ^ list start marker
-> [Block] -- ^ list item (list of blocks)
-> [Char]
listItemToRTF notes indent marker [] =
rtfCompact (indent + listIncrement) (0 - listIncrement)
(marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
listItemToRTF notes indent marker list =
let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in
let modFirst = gsub "\\\\fi-?[0-9]+" ("\\\\fi" ++ (show (0 - listIncrement)) ++
" " ++ marker ++ "\\\\tx" ++ (show listIncrement) ++ "\\\\tab") first in
modFirst ++ (concat rest)
-- | Convert list of inline items to RTF.
inlineListToRTF :: [Block] -- ^ list of note blocks
-> [Inline] -- ^ list of inlines to convert
-> String
inlineListToRTF notes lst = concatMap (inlineToRTF notes) lst
-- | Convert inline item to RTF.
inlineToRTF :: [Block] -- ^ list of note blocks
-> Inline -- ^ inline to convert
-> String
inlineToRTF notes (Emph lst) = "{\\i " ++ (inlineListToRTF notes lst) ++ "} "
inlineToRTF notes (Strong lst) = "{\\b " ++ (inlineListToRTF notes lst) ++ "} "
inlineToRTF notes (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
inlineToRTF notes (Str str) = stringToRTF str
inlineToRTF notes (TeX str) = latexToRTF str
inlineToRTF notes (HtmlInline str) = ""
inlineToRTF notes (LineBreak) = "\\line "
inlineToRTF notes Space = " "
inlineToRTF notes (Link text (Src src tit)) =
"{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ "\"}}{\\fldrslt{\\ul\n"
++ (inlineListToRTF notes text) ++ "\n}}}\n"
inlineToRTF notes (Link text (Ref [])) = "[" ++ (inlineListToRTF notes text) ++ "]"
inlineToRTF notes (Link text (Ref ref)) = "[" ++ (inlineListToRTF notes text) ++ "][" ++
(inlineListToRTF notes ref) ++ "]" -- this is what markdown does, for better or worse
inlineToRTF notes (Image alternate (Src source tit)) = "{\\cf1 [image: " ++ source ++ "]\\cf0}"
inlineToRTF notes (Image alternate (Ref [])) = "![" ++ (inlineListToRTF notes alternate) ++ "]"
inlineToRTF notes (Image alternate (Ref ref)) = "![" ++ (inlineListToRTF notes alternate) ++
"][" ++ (inlineListToRTF notes ref) ++ "]"
inlineToRTF [] (NoteRef ref) = ""
inlineToRTF ((Note firstref firstblocks):rest) (NoteRef ref) =
if firstref == ref then
"{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
(concatMap (blockToRTF rest 0) firstblocks) ++ "}"
else
inlineToRTF rest (NoteRef ref)

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,109 @@
-- | Special parser combinators for Pandoc readers.
module Text.ParserCombinators.Pandoc (
many1Till,
followedBy',
notFollowedBy',
oneOfStrings,
spaceChar,
skipSpaces,
blankline,
blanklines,
escaped,
enclosed,
blankBlock,
nullBlock,
stringAnyCase
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Char ( toUpper, toLower )
-- | Parses a character and returns 'Null' (so that the parser can move on
-- if it gets stuck).
nullBlock :: GenParser Char st Block
nullBlock = do
anyChar
return Null
-- | Parses one or more blank lines; returns 'Blank'.
blankBlock :: GenParser Char st Block
blankBlock = do
blanklines
return Blank
-- | Parses a space or tab.
spaceChar :: CharParser st Char
spaceChar = oneOf " \t"
-- | Skips zero or more spaces or tabs.
skipSpaces :: GenParser Char st ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
blankline :: GenParser Char st Char
blankline = try (do
skipSpaces
newline)
-- | Parses one or more blank lines and returns a string of newlines.
blanklines :: GenParser Char st [Char]
blanklines = try (do
many1 blankline)
-- | Parses backslash, then applies character parser.
escaped :: GenParser Char st Char -- ^ Parser for character to escape
-> GenParser Char st Inline
escaped parser = try (do
char '\\'
result <- parser
return (Str [result]))
-- | Parses material enclosed between start and end parsers.
enclosed :: GenParser Char st t -- ^ start parser
-> GenParser Char st end -- ^ end parser
-> GenParser Char st a -- ^ content parser (to be used repeatedly)
-> GenParser Char st [a]
enclosed start end parser = try (do
start
notFollowedBy space
result <- many1Till parser (try end)
return result)
-- | Like @manyTill@, but reads at least one item.
many1Till :: GenParser tok st a
-> GenParser tok st end
-> GenParser tok st [a]
many1Till p end = try (do
first <- p
rest <- manyTill p end
return (first:rest))
-- | A more general form of @notFollowedBy@. This one allows any type of parser to
-- be specified, and succeeds only if that parser fails. It does not consume any input.
notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
notFollowedBy' parser = try (do{ c <- parser; unexpected (show c) }
<|> return ()
)
-- | The inverse of @notFollowedBy'@. Fails if parser will fail, otherwise
-- returns @()@ (but does not consume any input).
followedBy' :: (Show b) => GenParser a st b -> GenParser a st ()
followedBy' parser = do
isNotFollowed <- option False (do{ notFollowedBy' parser; return True})
if isNotFollowed then
fail "not followed by parser"
else
return ()
-- | Parses one of a list of strings (tried in order).
oneOfStrings :: [String] -> GenParser Char st String
oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-- | Parse string, case insensitive.
stringAnyCase :: [Char] -> CharParser st String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = try (do
firstChar <- choice [ char (toUpper x), char (toLower x) ]
rest <- stringAnyCase xs
return (firstChar:rest))

11
src/headers/HtmlHeader Normal file
View file

@ -0,0 +1,11 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="pandoc" />
<style type="text/css">
div.pandocNote { border-left: 1px solid grey; padding-left: 1em; }
span.pandocNoteRef { vertical-align: super; font-size: 80%; }
span.pandocNoteMarker { }
</style>

10
src/headers/LaTeXHeader Normal file
View file

@ -0,0 +1,10 @@
\documentclass{article}
\usepackage{hyperref}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage{graphicx}
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
% This is needed for code blocks in footnotes:
\usepackage{fancyvrb}
\VerbatimFootnotes

4
src/headers/RTFHeader Normal file
View file

@ -0,0 +1,4 @@
{\rtf1\ansi\deff0{\fonttbl{\f0 Times New Roman;}{\f1 Courier;}}
{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
\widowctrl\hyphauto

8
src/headers/S5Header Normal file
View file

@ -0,0 +1,8 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<!-- configuration parameters -->
<meta name="defaultView" content="slideshow" />
<meta name="controlVis" content="hidden" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="pandoc" />

View file

@ -0,0 +1,7 @@
-- | Definitions for use of Pandoc.ASCIIMathML in HTML.
-- (See <http://www1.chapman.edu/~jipsen/mathml/asciimath.html>.)
module Text.Pandoc.ASCIIMathML ( asciiMathMLScript ) where
-- | String containing Pandoc.ASCIIMathML javascript.
asciiMathMLScript :: String
asciiMathMLScript = "<script type=\"text/javascript\">\n<ASCIIMathML.js></script>\n"

View file

@ -0,0 +1,20 @@
-- | Default headers for Pandoc writers.
module Text.Pandoc.Writers.DefaultHeaders (
defaultLaTeXHeader,
defaultHtmlHeader,
defaultS5Header,
defaultRTFHeader
) where
import Text.Pandoc.Writers.S5
defaultLaTeXHeader :: String
defaultLaTeXHeader = "<LaTeXHeader>"
defaultHtmlHeader :: String
defaultHtmlHeader = "<HtmlHeader>"
defaultS5Header :: String
defaultS5Header = "<S5Header>" ++ s5CSS ++ s5Javascript
defaultRTFHeader :: String
defaultRTFHeader = "<RTFHeader>"

16
src/templates/Makefile Normal file
View file

@ -0,0 +1,16 @@
VPATH := ..
PROCESSOR := ./fillTemplates.pl
TARGETS := Text/Pandoc/ASCIIMathML.hs \
Text/Pandoc/Writers/S5.hs \
Text/Pandoc/Writers/DefaultHeaders.hs
all: $(TARGETS)
Text/Pandoc/ASCIIMathML.hs: ASCIIMathML.hs $(PROCESSOR) $(VPATH)/ASCIIMathML.js
perl $(PROCESSOR) $@ $(VPATH)
Text/Pandoc/Writers/S5.hs: S5.hs $(PROCESSOR) $(VPATH)/headers/*
perl $(PROCESSOR) $@ $(VPATH)
Text/Pandoc/Writers/DefaultHeaders.hs: DefaultHeaders.hs $(PROCESSOR) $(VPATH)/ui/default/*
perl $(PROCESSOR) $@ $(VPATH)

88
src/templates/S5.hs Normal file
View file

@ -0,0 +1,88 @@
-- | Definitions for creation of S5 powerpoint-like HTML.
-- (See <http://meyerweb.com/eric/tools/s5/>.)
module Text.Pandoc.Writers.S5 (
-- * Strings
s5Javascript,
s5CSS,
s5Links,
-- * Functions
writeS5,
insertS5Structure
) where
import Text.Pandoc.Shared ( joinWithSep, WriterOptions )
import Text.Pandoc.Writers.HTML ( writeHtml )
import Text.Pandoc.Definition
s5Javascript :: String
s5Javascript = "<script type=\"text/javascript\">\n<slides.js></script>\n"
s5CoreCSS :: String
s5CoreCSS = "<s5-core.css>"
s5FramingCSS :: String
s5FramingCSS = "<framing.css>"
s5PrettyCSS :: String
s5PrettyCSS = "<pretty.css>"
s5OperaCSS :: String
s5OperaCSS = "<opera.css>"
s5OutlineCSS :: String
s5OutlineCSS = "<outline.css>"
s5PrintCSS :: String
s5PrintCSS = "<print.css>"
s5CSS :: String
s5CSS = "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
s5Links :: String
s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n"
-- | Converts 'Pandoc' to an S5 HTML presentation.
writeS5 :: WriterOptions -> Pandoc -> String
writeS5 options = writeHtml options . insertS5Structure
-- | Inserts HTML needed for an S5 presentation (e.g. around slides).
layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
-> String -- ^ Date of document (for header or footer)
-> [Block] -- ^ List of block elements returned
layoutDiv title date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "</div>\n</div>\n")]
presentationStart = (RawHtml "<div class=\"presentation\">\n\n")
presentationEnd = (RawHtml "</div>\n")
slideStart = (RawHtml "<div class=\"slide\">\n")
slideEnd = (RawHtml "</div>\n")
-- | Returns 'True' if block is a Header 1.
isH1 :: Block -> Bool
isH1 (Header 1 _) = True
isH1 _ = False
-- | Insert HTML around sections to make individual slides.
insertSlides :: Bool -> [Block] -> [Block]
insertSlides beginning blocks =
let (beforeHead, rest) = break isH1 blocks in
if (null rest) then
if beginning then
beforeHead
else
beforeHead ++ [slideEnd]
else
if beginning then
beforeHead ++ slideStart:(head rest):(insertSlides False (tail rest))
else
beforeHead ++ slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
-- | Insert blocks into 'Pandoc' for slide structure.
insertS5Structure :: Pandoc -> Pandoc
insertS5Structure (Pandoc meta []) = Pandoc meta []
insertS5Structure (Pandoc (Meta title authors date) blocks) =
let slides = insertSlides True blocks
firstSlide = if (not (null title)) then [slideStart, (Header 1 title), (Header 3 [Str (joinWithSep ", " authors)]), (Header 4 [Str date]), slideEnd] else [] in
let newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] in
Pandoc (Meta title authors date) newBlocks

View file

@ -0,0 +1,138 @@
#!/usr/bin/env perl
# fills templates in templates directory with haskell-escaped strings
# slurped from input files
use strict;
use warnings;
# Utility routines:
sub slurp {
open FILE, $_[0] or die "couldn't open file '$_[0]': $!";
my $contents = do { local $/; <FILE>;};
close FILE;
return $contents;
}
sub escape_for_haskell {
my ($contents) = @_;
$contents =~ s/\\/\\\\/g;
$contents =~ s/\t/\\t/g;
$contents =~ s/"/\\"/g;
$contents =~ s/\n/\\n/g;
return $contents;
}
# Template processors.
my %processor = (
# --------------------------------------------------------------------------
'Text/Pandoc/Writers/S5.hs' => {
# --------------------------------------------------------------------------
proc => sub {
my ($template) = @_;
my $slides = escape_for_haskell(slurp "ui/default/slides.js");
my $s5core = escape_for_haskell(slurp "ui/default/s5-core.css");
my $framing = escape_for_haskell(slurp "ui/default/framing.css");
my $pretty = escape_for_haskell(slurp "ui/default/pretty.css");
my $opera = escape_for_haskell(slurp "ui/default/opera.css");
my $outline = escape_for_haskell(slurp "ui/default/outline.css");
my $print = escape_for_haskell(slurp "ui/default/print.css");
$template =~ s/<slides\.js>/$slides/;
$template =~ s/<s5-core\.css>/$s5core/;
$template =~ s/<framing\.css>/$framing/;
$template =~ s/<pretty\.css>/$pretty/;
$template =~ s/<opera\.css>/$opera/;
$template =~ s/<outline\.css>/$outline/;
$template =~ s/<print\.css>/$print/;
return $template;
},
},
# --------------------------------------------------------------------------
'Text/Pandoc/ASCIIMathML.hs' => {
# --------------------------------------------------------------------------
proc => sub {
my ($template) = @_;
my $script = escape_for_haskell(slurp "ASCIIMathML.js");
my $acknowledgements =
" ASCIIMathML.js - copyright Peter Jipsen,".
" released under the GPL\\nSee ".
"http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ ";
$script =~ s/\/\*.*?\*\//\/\*$acknowledgements\*\//g; # strip comments
$template =~ s/<ASCIIMathML\.js>/$script/;
return $template;
},
},
# --------------------------------------------------------------------------
'Text/Pandoc/Writers/DefaultHeaders.hs' => {
# --------------------------------------------------------------------------
proc => sub {
my ($template) = @_;
my $html = escape_for_haskell(slurp "headers/HtmlHeader");
my $latex = escape_for_haskell(slurp "headers/LaTeXHeader");
my $rtf = escape_for_haskell(slurp "headers/RTFHeader");
my $s5 = escape_for_haskell(slurp "headers/S5Header");
$template =~ s/<HtmlHeader>/$html/;
$template =~ s/<LaTeXHeader>/$latex/;
$template =~ s/<RTFHeader>/$rtf/;
$template =~ s/<S5Header>/$s5/;
return $template;
},
},
# --------------------------------------------------------------------------
# 'foo/bar/baz' => {
# --------------------------------------------------------------------------
# template => 'optional-template-filename-defaults-to-baz'
# proc => sub {
# my ($template) = @_;
# # Process.
# return $template;
# },
#},
);
# Main.
my $target = shift @ARGV;
if (!defined $target || !length $target) {
print STDERR "Available targets:\n\n" . join "\n", keys %processor;
die "\n\nYou must supply a target!\n";
}
die "No processor exists for '$target'!\n" if ! exists $processor{$target};
my $rootdir = shift @ARGV || '..';
chdir $rootdir or die "Couldn't chdir to '$rootdir': $!";
my $template;
if (exists $processor{$target}->{template}) {
$template = $processor{$target}->{template};
}
else {
($template = $target) =~ s!.*/+!!;
}
$template = "templates/$template";
die "No template exists for '$target'!\n" if ! -f "$template";
open OUTFILE, ">$target" or die "couldn't open file '$target': $!";
print OUTFILE <<END;
----------------------------------------------------
-- Do not edit this file by hand. Edit
-- '$template'
-- and run $0 $target
----------------------------------------------------
END
print OUTFILE $processor{$target}->{proc}->(slurp($template));
print OUTFILE "\n";
close OUTFILE;

BIN
src/ui/default/blank.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 B

BIN
src/ui/default/bodybg.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

View file

@ -0,0 +1,23 @@
/* The following styles size, place, and layer the slide components.
Edit these if you want to change the overall slide layout.
The commented lines can be uncommented (and modified, if necessary)
to help you with the rearrangement process. */
/* target = 1024x768 */
div#header, div#footer, .slide {width: 100%; top: 0; left: 0;}
div#header {top: 0; height: 3em; z-index: 1;}
div#footer {top: auto; bottom: 0; height: 2.5em; z-index: 5;}
.slide {top: 0; width: 92%; padding: 3.5em 4% 4%; z-index: 2; list-style: none;}
div#controls {left: 50%; bottom: 0; width: 50%; z-index: 100;}
div#controls form {position: absolute; bottom: 0; right: 0; width: 100%;
margin: 0;}
#currentSlide {position: absolute; width: 10%; left: 45%; bottom: 1em; z-index: 10;}
html>body #currentSlide {position: fixed;}
/*
div#header {background: #FCC;}
div#footer {background: #CCF;}
div#controls {background: #BBD;}
div#currentSlide {background: #FFC;}
*/

View file

@ -0,0 +1,42 @@
<public:component>
<public:attach event="onpropertychange" onevent="doFix()" />
<script>
// IE5.5+ PNG Alpha Fix v1.0 by Angus Turnbull http://www.twinhelix.com
// Free usage permitted as long as this notice remains intact.
// This must be a path to a blank image. That's all the configuration you need here.
var blankImg = 'ui/default/blank.gif';
var f = 'DXImageTransform.Microsoft.AlphaImageLoader';
function filt(s, m) {
if (filters[f]) {
filters[f].enabled = s ? true : false;
if (s) with (filters[f]) { src = s; sizingMethod = m }
} else if (s) style.filter = 'progid:'+f+'(src="'+s+'",sizingMethod="'+m+'")';
}
function doFix() {
if ((parseFloat(navigator.userAgent.match(/MSIE (\S+)/)[1]) < 5.5) ||
(event && !/(background|src)/.test(event.propertyName))) return;
if (tagName == 'IMG') {
if ((/\.png$/i).test(src)) {
filt(src, 'image'); // was 'scale'
src = blankImg;
} else if (src.indexOf(blankImg) < 0) filt();
} else if (style.backgroundImage) {
if (style.backgroundImage.match(/^url[("']+(.*\.png)[)"']+$/i)) {
var s = RegExp.$1;
style.backgroundImage = '';
filt(s, 'crop');
} else filt();
}
}
doFix();
</script>
</public:component>

7
src/ui/default/opera.css Normal file
View file

@ -0,0 +1,7 @@
/* DO NOT CHANGE THESE unless you really want to break Opera Show */
.slide {
visibility: visible !important;
position: static !important;
page-break-before: always;
}
#slide0 {page-break-before: avoid;}

View file

@ -0,0 +1,15 @@
/* don't change this unless you want the layout stuff to show up in the outline view! */
.layout div, #footer *, #controlForm * {display: none;}
#footer, #controls, #controlForm, #navLinks, #toggle {
display: block; visibility: visible; margin: 0; padding: 0;}
#toggle {float: right; padding: 0.5em;}
html>body #toggle {position: fixed; top: 0; right: 0;}
/* making the outline look pretty-ish */
#slide0 h1, #slide0 h2, #slide0 h3, #slide0 h4 {border: none; margin: 0;}
#slide0 h1 {padding-top: 1.5em;}
.slide h1 {margin: 1.5em 0 0; padding-top: 0.25em;
border-top: 1px solid #888; border-bottom: 1px solid #AAA;}
#toggle {border: 1px solid; border-width: 0 0 1px 1px; background: #FFF;}

86
src/ui/default/pretty.css Normal file
View file

@ -0,0 +1,86 @@
/* Following are the presentation styles -- edit away! */
body {background: #FFF url(bodybg.gif) -16px 0 no-repeat; color: #000; font-size: 2em;}
:link, :visited {text-decoration: none; color: #00C;}
#controls :active {color: #88A !important;}
#controls :focus {outline: 1px dotted #227;}
h1, h2, h3, h4 {font-size: 100%; margin: 0; padding: 0; font-weight: inherit;}
ul, pre {margin: 0; line-height: 1em;}
html, body {margin: 0; padding: 0;}
blockquote, q {font-style: italic;}
blockquote {padding: 0 2em 0.5em; margin: 0 1.5em 0.5em; text-align: center; font-size: 1em;}
blockquote p {margin: 0;}
blockquote i {font-style: normal;}
blockquote b {display: block; margin-top: 0.5em; font-weight: normal; font-size: smaller; font-style: normal;}
blockquote b i {font-style: italic;}
kbd {font-weight: bold; font-size: 1em;}
sup {font-size: smaller; line-height: 1px;}
.slide code {padding: 2px 0.25em; font-weight: bold; color: #533;}
.slide code.bad, code del {color: red;}
.slide code.old {color: silver;}
.slide pre {padding: 0; margin: 0.25em 0 0.5em 0.5em; color: #533; font-size: 90%;}
.slide pre code {display: block;}
.slide ul {margin-left: 5%; margin-right: 7%; list-style: disc;}
.slide li {margin-top: 0.75em; margin-right: 0;}
.slide ul ul {line-height: 1;}
.slide ul ul li {margin: .2em; font-size: 85%; list-style: square;}
.slide img.leader {display: block; margin: 0 auto;}
div#header, div#footer {background: #005; color: #AAB;
font-family: Verdana, Helvetica, sans-serif;}
div#header {background: #005 url(bodybg.gif) -16px 0 no-repeat;
line-height: 1px;}
div#footer {font-size: 0.5em; font-weight: bold; padding: 1em 0;}
#footer h1, #footer h2 {display: block; padding: 0 1em;}
#footer h2 {font-style: italic;}
div.long {font-size: 0.75em;}
.slide h1 {position: absolute; top: 0.7em; left: 87px; z-index: 1;
margin: 0; padding: 0.3em 0 0 50px; white-space: nowrap;
font: bold 150%/1em Helvetica, sans-serif; text-transform: capitalize;
color: #DDE; background: #005;}
.slide h3 {font-size: 130%;}
h1 abbr {font-variant: small-caps;}
div#controls {position: absolute; left: 50%; bottom: 0;
width: 50%;
text-align: right; font: bold 0.9em Verdana, Helvetica, sans-serif;}
html>body div#controls {position: fixed; padding: 0 0 1em 0;
top: auto;}
div#controls form {position: absolute; bottom: 0; right: 0; width: 100%;
margin: 0; padding: 0;}
#controls #navLinks a {padding: 0; margin: 0 0.5em;
background: #005; border: none; color: #779;
cursor: pointer;}
#controls #navList {height: 1em;}
#controls #navList #jumplist {position: absolute; bottom: 0; right: 0; background: #DDD; color: #227;}
#currentSlide {text-align: center; font-size: 0.5em; color: #449;}
#slide0 {padding-top: 3.5em; font-size: 90%;}
#slide0 h1 {position: static; margin: 1em 0 0; padding: 0;
font: bold 2em Helvetica, sans-serif; white-space: normal;
color: #000; background: transparent;}
#slide0 h2 {font: bold italic 1em Helvetica, sans-serif; margin: 0.25em;}
#slide0 h3 {margin-top: 1.5em; font-size: 1.5em;}
#slide0 h4 {margin-top: 0; font-size: 1em;}
ul.urls {list-style: none; display: inline; margin: 0;}
.urls li {display: inline; margin: 0;}
.note {display: none;}
.external {border-bottom: 1px dotted gray;}
html>body .external {border-bottom: none;}
.external:after {content: " \274F"; font-size: smaller; color: #77B;}
.incremental, .incremental *, .incremental *:after {color: #DDE; visibility: visible;}
img.incremental {visibility: hidden;}
.slide .current {color: #B02;}
/* diagnostics
li:after {content: " [" attr(class) "]"; color: #F88;}
*/

1
src/ui/default/print.css Normal file
View file

@ -0,0 +1 @@
/* The following rule is necessary to have all slides appear in print! DO NOT REMOVE IT! */ .slide, ul {page-break-inside: avoid; visibility: visible !important;} h1 {page-break-after: avoid;} body {font-size: 12pt; background: white;} * {color: black;} #slide0 h1 {font-size: 200%; border: none; margin: 0.5em 0 0.25em;} #slide0 h3 {margin: 0; padding: 0;} #slide0 h4 {margin: 0 0 0.5em; padding: 0;} #slide0 {margin-bottom: 3em;} h1 {border-top: 2pt solid gray; border-bottom: 1px dotted silver;} .extra {background: transparent !important;} div.extra, pre.extra, .example {font-size: 10pt; color: #333;} ul.extra a {font-weight: bold;} p.example {display: none;} #header {display: none;} #footer h1 {margin: 0; border-bottom: 1px solid; color: gray; font-style: italic;} #footer h2, #controls {display: none;} /* The following rule keeps the layout stuff out of print. Remove at your own risk! */ .layout, .layout * {display: none !important;}

View file

@ -0,0 +1,9 @@
/* Do not edit or override these styles! The system will likely break if you do. */
div#header, div#footer, div#controls, .slide {position: absolute;}
html>body div#header, html>body div#footer,
html>body div#controls, html>body .slide {position: fixed;}
.handout {display: none;}
.layout {display: block;}
.slide, .hideme, .incremental {visibility: hidden;}
#slide0 {visibility: visible;}

View file

@ -0,0 +1,3 @@
@import url(s5-core.css); /* required to make the slide show run at all */
@import url(framing.css); /* sets basic placement and size of slide components */
@import url(pretty.css); /* stuff that makes the slides look better than blah */

553
src/ui/default/slides.js Normal file
View file

@ -0,0 +1,553 @@
// S5 v1.1 slides.js -- released into the Public Domain
//
// Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information
// about all the wonderful and talented contributors to this code!
var undef;
var slideCSS = '';
var snum = 0;
var smax = 1;
var incpos = 0;
var number = undef;
var s5mode = true;
var defaultView = 'slideshow';
var controlVis = 'visible';
var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0;
var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0;
var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0;
function hasClass(object, className) {
if (!object.className) return false;
return (object.className.search('(^|\\s)' + className + '(\\s|$)') != -1);
}
function hasValue(object, value) {
if (!object) return false;
return (object.search('(^|\\s)' + value + '(\\s|$)') != -1);
}
function removeClass(object,className) {
if (!object) return;
object.className = object.className.replace(new RegExp('(^|\\s)'+className+'(\\s|$)'), RegExp.$1+RegExp.$2);
}
function addClass(object,className) {
if (!object || hasClass(object, className)) return;
if (object.className) {
object.className += ' '+className;
} else {
object.className = className;
}
}
function GetElementsWithClassName(elementName,className) {
var allElements = document.getElementsByTagName(elementName);
var elemColl = new Array();
for (var i = 0; i< allElements.length; i++) {
if (hasClass(allElements[i], className)) {
elemColl[elemColl.length] = allElements[i];
}
}
return elemColl;
}
function isParentOrSelf(element, id) {
if (element == null || element.nodeName=='BODY') return false;
else if (element.id == id) return true;
else return isParentOrSelf(element.parentNode, id);
}
function nodeValue(node) {
var result = "";
if (node.nodeType == 1) {
var children = node.childNodes;
for (var i = 0; i < children.length; ++i) {
result += nodeValue(children[i]);
}
}
else if (node.nodeType == 3) {
result = node.nodeValue;
}
return(result);
}
function slideLabel() {
var slideColl = GetElementsWithClassName('*','slide');
var list = document.getElementById('jumplist');
smax = slideColl.length;
for (var n = 0; n < smax; n++) {
var obj = slideColl[n];
var did = 'slide' + n.toString();
obj.setAttribute('id',did);
if (isOp) continue;
var otext = '';
var menu = obj.firstChild;
if (!menu) continue; // to cope with empty slides
while (menu && menu.nodeType == 3) {
menu = menu.nextSibling;
}
if (!menu) continue; // to cope with slides with only text nodes
var menunodes = menu.childNodes;
for (var o = 0; o < menunodes.length; o++) {
otext += nodeValue(menunodes[o]);
}
list.options[list.length] = new Option(n + ' : ' + otext, n);
}
}
function currentSlide() {
var cs;
if (document.getElementById) {
cs = document.getElementById('currentSlide');
} else {
cs = document.currentSlide;
}
cs.innerHTML = '<span id="csHere">' + snum + '<\/span> ' +
'<span id="csSep">\/<\/span> ' +
'<span id="csTotal">' + (smax-1) + '<\/span>';
if (snum == 0) {
cs.style.visibility = 'hidden';
} else {
cs.style.visibility = 'visible';
}
}
function go(step) {
if (document.getElementById('slideProj').disabled || step == 0) return;
var jl = document.getElementById('jumplist');
var cid = 'slide' + snum;
var ce = document.getElementById(cid);
if (incrementals[snum].length > 0) {
for (var i = 0; i < incrementals[snum].length; i++) {
removeClass(incrementals[snum][i], 'current');
removeClass(incrementals[snum][i], 'incremental');
}
}
if (step != 'j') {
snum += step;
lmax = smax - 1;
if (snum > lmax) snum = lmax;
if (snum < 0) snum = 0;
} else
snum = parseInt(jl.value);
var nid = 'slide' + snum;
var ne = document.getElementById(nid);
if (!ne) {
ne = document.getElementById('slide0');
snum = 0;
}
if (step < 0) {incpos = incrementals[snum].length} else {incpos = 0;}
if (incrementals[snum].length > 0 && incpos == 0) {
for (var i = 0; i < incrementals[snum].length; i++) {
if (hasClass(incrementals[snum][i], 'current'))
incpos = i + 1;
else
addClass(incrementals[snum][i], 'incremental');
}
}
if (incrementals[snum].length > 0 && incpos > 0)
addClass(incrementals[snum][incpos - 1], 'current');
ce.style.visibility = 'hidden';
ne.style.visibility = 'visible';
jl.selectedIndex = snum;
currentSlide();
number = 0;
}
function goTo(target) {
if (target >= smax || target == snum) return;
go(target - snum);
}
function subgo(step) {
if (step > 0) {
removeClass(incrementals[snum][incpos - 1],'current');
removeClass(incrementals[snum][incpos], 'incremental');
addClass(incrementals[snum][incpos],'current');
incpos++;
} else {
incpos--;
removeClass(incrementals[snum][incpos],'current');
addClass(incrementals[snum][incpos], 'incremental');
addClass(incrementals[snum][incpos - 1],'current');
}
}
function toggle() {
var slideColl = GetElementsWithClassName('*','slide');
var slides = document.getElementById('slideProj');
var outline = document.getElementById('outlineStyle');
if (!slides.disabled) {
slides.disabled = true;
outline.disabled = false;
s5mode = false;
fontSize('1em');
for (var n = 0; n < smax; n++) {
var slide = slideColl[n];
slide.style.visibility = 'visible';
}
} else {
slides.disabled = false;
outline.disabled = true;
s5mode = true;
fontScale();
for (var n = 0; n < smax; n++) {
var slide = slideColl[n];
slide.style.visibility = 'hidden';
}
slideColl[snum].style.visibility = 'visible';
}
}
function showHide(action) {
var obj = GetElementsWithClassName('*','hideme')[0];
switch (action) {
case 's': obj.style.visibility = 'visible'; break;
case 'h': obj.style.visibility = 'hidden'; break;
case 'k':
if (obj.style.visibility != 'visible') {
obj.style.visibility = 'visible';
} else {
obj.style.visibility = 'hidden';
}
break;
}
}
// 'keys' code adapted from MozPoint (http://mozpoint.mozdev.org/)
function keys(key) {
if (!key) {
key = event;
key.which = key.keyCode;
}
if (key.which == 84) {
toggle();
return;
}
if (s5mode) {
switch (key.which) {
case 10: // return
case 13: // enter
if (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return;
if (key.target && isParentOrSelf(key.target, 'controls')) return;
if(number != undef) {
goTo(number);
break;
}
case 32: // spacebar
case 34: // page down
case 39: // rightkey
case 40: // downkey
if(number != undef) {
go(number);
} else if (!incrementals[snum] || incpos >= incrementals[snum].length) {
go(1);
} else {
subgo(1);
}
break;
case 33: // page up
case 37: // leftkey
case 38: // upkey
if(number != undef) {
go(-1 * number);
} else if (!incrementals[snum] || incpos <= 0) {
go(-1);
} else {
subgo(-1);
}
break;
case 36: // home
goTo(0);
break;
case 35: // end
goTo(smax-1);
break;
case 67: // c
showHide('k');
break;
}
if (key.which < 48 || key.which > 57) {
number = undef;
} else {
if (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return;
if (key.target && isParentOrSelf(key.target, 'controls')) return;
number = (((number != undef) ? number : 0) * 10) + (key.which - 48);
}
}
return false;
}
function clicker(e) {
number = undef;
var target;
if (window.event) {
target = window.event.srcElement;
e = window.event;
} else target = e.target;
if (target.getAttribute('href') != null || hasValue(target.rel, 'external') || isParentOrSelf(target, 'controls') || isParentOrSelf(target,'embed') || isParentOrSelf(target,'object')) return true;
if (!e.which || e.which == 1) {
if (!incrementals[snum] || incpos >= incrementals[snum].length) {
go(1);
} else {
subgo(1);
}
}
}
function findSlide(hash) {
var target = null;
var slides = GetElementsWithClassName('*','slide');
for (var i = 0; i < slides.length; i++) {
var targetSlide = slides[i];
if ( (targetSlide.name && targetSlide.name == hash)
|| (targetSlide.id && targetSlide.id == hash) ) {
target = targetSlide;
break;
}
}
while(target != null && target.nodeName != 'BODY') {
if (hasClass(target, 'slide')) {
return parseInt(target.id.slice(5));
}
target = target.parentNode;
}
return null;
}
function slideJump() {
if (window.location.hash == null) return;
var sregex = /^#slide(\d+)$/;
var matches = sregex.exec(window.location.hash);
var dest = null;
if (matches != null) {
dest = parseInt(matches[1]);
} else {
dest = findSlide(window.location.hash.slice(1));
}
if (dest != null)
go(dest - snum);
}
function fixLinks() {
var thisUri = window.location.href;
thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length);
var aelements = document.getElementsByTagName('A');
for (var i = 0; i < aelements.length; i++) {
var a = aelements[i].href;
var slideID = a.match('\#slide[0-9]{1,2}');
if ((slideID) && (slideID[0].slice(0,1) == '#')) {
var dest = findSlide(slideID[0].slice(1));
if (dest != null) {
if (aelements[i].addEventListener) {
aelements[i].addEventListener("click", new Function("e",
"if (document.getElementById('slideProj').disabled) return;" +
"go("+dest+" - snum); " +
"if (e.preventDefault) e.preventDefault();"), true);
} else if (aelements[i].attachEvent) {
aelements[i].attachEvent("onclick", new Function("",
"if (document.getElementById('slideProj').disabled) return;" +
"go("+dest+" - snum); " +
"event.returnValue = false;"));
}
}
}
}
}
function externalLinks() {
if (!document.getElementsByTagName) return;
var anchors = document.getElementsByTagName('a');
for (var i=0; i<anchors.length; i++) {
var anchor = anchors[i];
if (anchor.getAttribute('href') && hasValue(anchor.rel, 'external')) {
anchor.target = '_blank';
addClass(anchor,'external');
}
}
}
function createControls() {
var controlsDiv = document.getElementById("controls");
if (!controlsDiv) return;
var hider = ' onmouseover="showHide(\'s\');" onmouseout="showHide(\'h\');"';
var hideDiv, hideList = '';
if (controlVis == 'hidden') {
hideDiv = hider;
} else {
hideList = hider;
}
controlsDiv.innerHTML = '<form action="#" id="controlForm"' + hideDiv + '>' +
'<div id="navLinks">' +
'<a accesskey="t" id="toggle" href="javascript:toggle();">&#216;<\/a>' +
'<a accesskey="z" id="prev" href="javascript:go(-1);">&laquo;<\/a>' +
'<a accesskey="x" id="next" href="javascript:go(1);">&raquo;<\/a>' +
'<div id="navList"' + hideList + '><select id="jumplist" onchange="go(\'j\');"><\/select><\/div>' +
'<\/div><\/form>';
if (controlVis == 'hidden') {
var hidden = document.getElementById('navLinks');
} else {
var hidden = document.getElementById('jumplist');
}
addClass(hidden,'hideme');
}
function fontScale() { // causes layout problems in FireFox that get fixed if browser's Reload is used; same may be true of other Gecko-based browsers
if (!s5mode) return false;
var vScale = 22; // both yield 32 (after rounding) at 1024x768
var hScale = 32; // perhaps should auto-calculate based on theme's declared value?
if (window.innerHeight) {
var vSize = window.innerHeight;
var hSize = window.innerWidth;
} else if (document.documentElement.clientHeight) {
var vSize = document.documentElement.clientHeight;
var hSize = document.documentElement.clientWidth;
} else if (document.body.clientHeight) {
var vSize = document.body.clientHeight;
var hSize = document.body.clientWidth;
} else {
var vSize = 700; // assuming 1024x768, minus chrome and such
var hSize = 1024; // these do not account for kiosk mode or Opera Show
}
var newSize = Math.min(Math.round(vSize/vScale),Math.round(hSize/hScale));
fontSize(newSize + 'px');
if (isGe) { // hack to counter incremental reflow bugs
var obj = document.getElementsByTagName('body')[0];
obj.style.display = 'none';
obj.style.display = 'block';
}
}
function fontSize(value) {
if (!(s5ss = document.getElementById('s5ss'))) {
if (!isIE) {
document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style'));
s5ss.setAttribute('media','screen, projection');
s5ss.setAttribute('id','s5ss');
} else {
document.createStyleSheet();
document.s5ss = document.styleSheets[document.styleSheets.length - 1];
}
}
if (!isIE) {
while (s5ss.lastChild) s5ss.removeChild(s5ss.lastChild);
s5ss.appendChild(document.createTextNode('body {font-size: ' + value + ' !important;}'));
} else {
document.s5ss.addRule('body','font-size: ' + value + ' !important;');
}
}
function notOperaFix() {
slideCSS = document.getElementById('slideProj').href;
var slides = document.getElementById('slideProj');
var outline = document.getElementById('outlineStyle');
slides.setAttribute('media','screen');
outline.disabled = true;
if (isGe) {
slides.setAttribute('href','null'); // Gecko fix
slides.setAttribute('href',slideCSS); // Gecko fix
}
if (isIE && document.styleSheets && document.styleSheets[0]) {
document.styleSheets[0].addRule('img', 'behavior: url(ui/default/iepngfix.htc)');
document.styleSheets[0].addRule('div', 'behavior: url(ui/default/iepngfix.htc)');
document.styleSheets[0].addRule('.slide', 'behavior: url(ui/default/iepngfix.htc)');
}
}
function getIncrementals(obj) {
var incrementals = new Array();
if (!obj)
return incrementals;
var children = obj.childNodes;
for (var i = 0; i < children.length; i++) {
var child = children[i];
if (hasClass(child, 'incremental')) {
if (child.nodeName == 'OL' || child.nodeName == 'UL') {
removeClass(child, 'incremental');
for (var j = 0; j < child.childNodes.length; j++) {
if (child.childNodes[j].nodeType == 1) {
addClass(child.childNodes[j], 'incremental');
}
}
} else {
incrementals[incrementals.length] = child;
removeClass(child,'incremental');
}
}
if (hasClass(child, 'show-first')) {
if (child.nodeName == 'OL' || child.nodeName == 'UL') {
removeClass(child, 'show-first');
if (child.childNodes[isGe].nodeType == 1) {
removeClass(child.childNodes[isGe], 'incremental');
}
} else {
incrementals[incrementals.length] = child;
}
}
incrementals = incrementals.concat(getIncrementals(child));
}
return incrementals;
}
function createIncrementals() {
var incrementals = new Array();
for (var i = 0; i < smax; i++) {
incrementals[i] = getIncrementals(document.getElementById('slide'+i));
}
return incrementals;
}
function defaultCheck() {
var allMetas = document.getElementsByTagName('meta');
for (var i = 0; i< allMetas.length; i++) {
if (allMetas[i].name == 'defaultView') {
defaultView = allMetas[i].content;
}
if (allMetas[i].name == 'controlVis') {
controlVis = allMetas[i].content;
}
}
}
// Key trap fix, new function body for trap()
function trap(e) {
if (!e) {
e = event;
e.which = e.keyCode;
}
try {
modifierKey = e.ctrlKey || e.altKey || e.metaKey;
}
catch(e) {
modifierKey = false;
}
return modifierKey || e.which == 0;
}
function startup() {
defaultCheck();
if (!isOp)
createControls();
slideLabel();
fixLinks();
externalLinks();
fontScale();
if (!isOp) {
notOperaFix();
incrementals = createIncrementals();
slideJump();
if (defaultView == 'outline') {
toggle();
}
document.onkeyup = keys;
document.onkeypress = trap;
document.onclick = clicker;
}
}
window.onload = startup;
window.onresize = function(){setTimeout('fontScale()', 50);}

View file

@ -0,0 +1,176 @@
#!/usr/bin/perl
#
# MarkdownTester -- Run tests for Markdown implementations
#
# Copyright (c) 2004-2005 John Gruber
# <http://daringfireball.net/projects/markdown/>
#
use strict;
use warnings;
use Getopt::Long;
use Benchmark;
our $VERSION = '1.0.2';
# Sat 24 Dec 2005
my $time_start = new Benchmark;
my $test_dir = "Tests";
my $script = "./Markdown.pl";
my $use_tidy = 0;
my ($flag_version);
GetOptions (
"script=s" => \$script,
"testdir=s" => \$test_dir,
"tidy" => \$use_tidy,
"version" => \$flag_version,
);
if($flag_version) {
my $progname = $0;
$progname =~ s{.*/}{};
die "$progname version $VERSION\n";
}
unless (-d $test_dir) { die "'$test_dir' is not a directory.\n"; }
unless (-f $script) { die "$script does not exist.\n"; }
unless (-x $script) { die "$script is not executable.\n"; }
my $tests_passed = 0;
my $tests_failed = 0;
TEST:
foreach my $testfile (glob "$test_dir/*.text") {
my $testname = $testfile;
$testname =~ s{.*/(.+)\.text$}{$1}i;
print "$testname ... ";
# Look for a corresponding .html file for each .text file:
my $resultfile = $testfile;
$resultfile =~ s{\.text$}{\.html}i;
unless (-f $resultfile) {
print "'$resultfile' does not exist.\n\n";
next TEST;
}
# open(TEST, $testfile) || die("Can't open testfile: $!");
open(RESULT, $resultfile) || die("Can't open resultfile: $!");
undef $/;
# my $t_input = <TEST>;
my $t_result = <RESULT>;
my $t_output = `'$script' '$testfile'`;
# Normalize the output and expected result strings:
$t_result =~ s/\s+\z//; # trim trailing whitespace
$t_output =~ s/\s+\z//; # trim trailing whitespace
if ($use_tidy) {
# Escape the strings, pass them through to CLI tidy tool for tag-level equivalency
$t_result =~ s{'}{'\\''}g; # escape ' chars for shell
$t_output =~ s{'}{'\\''}g;
$t_result = `echo '$t_result' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`;
$t_output = `echo '$t_output' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`;
}
if ($t_output eq $t_result) {
print "OK\n";
$tests_passed++;
}
else {
print "FAILED\n\n";
# This part added by JM to print diffs
open(OUT, '>tmp1') or die $!;
print OUT $t_output or die $!;
open(RES, '>tmp2') or die $!;
print RES $t_result or die $!;
print `diff tmp1 tmp2`;
close RES;
close OUT;
print "\n";
`rm tmp?`;
# End of added part
$tests_failed++;
}
}
print "\n\n";
print "$tests_passed passed; $tests_failed failed.\n";
my $time_end = new Benchmark;
my $time_diff = timediff($time_end, $time_start);
print "Benchmark: ", timestr($time_diff), "\n";
__END__
=pod
=head1 NAME
B<MarkdownTest>
=head1 SYNOPSIS
B<MarkdownTest.pl> [ B<--options> ] [ I<file> ... ]
=head1 DESCRIPTION
=head1 OPTIONS
Use "--" to end switch parsing. For example, to open a file named "-z", use:
MarkdownTest.pl -- -z
=over 4
=item B<--script>
Specify the path to the Markdown script to test. Defaults to
"./Markdown.pl". Example:
./MarkdownTest.pl --script ./PHP-Markdown/php-markdown
=item B<--testdir>
Specify the path to a directory containing test data. Defaults to "Tests".
=item B<--tidy>
Flag to turn on using the command line 'tidy' tool to normalize HTML
output before comparing script output to the expected test result.
Assumes that the 'tidy' command is available in your PATH. Defaults to
off.
=back
=head1 BUGS
=head1 VERSION HISTORY
1.0 Mon 13 Dec 2004-2005
1.0.1 Mon 19 Sep 2005
+ Better handling of case when foo.text exists, but foo.html doesn't.
It now prints a message and moves on, rather than dying.
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2004-2005 John Gruber
<http://daringfireball.net/>
All rights reserved.
This is free software; you may redistribute it and/or modify it under
the same terms as Perl itself.
=cut

View file

@ -0,0 +1,17 @@
<p>AT&amp;T has an ampersand in their name.</p>
<p>AT&amp;T is another way to write it.</p>
<p>This &amp; that.</p>
<p>4 &lt; 5.</p>
<p>6 > 5.</p>
<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>
<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>

View file

@ -0,0 +1,21 @@
AT&T has an ampersand in their name.
AT&amp;T is another way to write it.
This & that.
4 < 5.
6 > 5.
Here's a [link] [1] with an ampersand in the URL.
Here's a link with an amersand in the link text: [AT&T] [2].
Here's an inline [link](/script?foo=1&bar=2).
Here's an inline [link](</script?foo=1&bar=2>).
[1]: http://example.com/?foo=1&bar=2
[2]: http://att.com/ "AT&T"

View file

@ -0,0 +1,18 @@
<p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
<p>With an ampersand: <a href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</a></p>
<ul>
<li>In a list?</li>
<li><a href="http://example.com/">http://example.com/</a></li>
<li>It should.</li>
</ul>
<blockquote>
<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
</blockquote>
<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
<pre><code>or here: &lt;http://example.com/&gt;
</code></pre>

View file

@ -0,0 +1,13 @@
Link: <http://example.com/>.
With an ampersand: <http://example.com/?foo=1&bar=2>
* In a list?
* <http://example.com/>
* It should.
> Blockquoted: <http://example.com/>
Auto-links should not occur here: `<http://example.com/>`
or here: <http://example.com/>

View file

@ -0,0 +1,118 @@
<p>These should all get escaped:</p>
<p>Backslash: \</p>
<p>Backtick: `</p>
<p>Asterisk: *</p>
<p>Underscore: _</p>
<p>Left brace: {</p>
<p>Right brace: }</p>
<p>Left bracket: [</p>
<p>Right bracket: ]</p>
<p>Left paren: (</p>
<p>Right paren: )</p>
<p>Greater-than: ></p>
<p>Hash: #</p>
<p>Period: .</p>
<p>Bang: !</p>
<p>Plus: +</p>
<p>Minus: -</p>
<p>These should not, because they occur within a code block:</p>
<pre><code>Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \&gt;
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
</code></pre>
<p>Nor should these, which occur in code spans:</p>
<p>Backslash: <code>\\</code></p>
<p>Backtick: <code>\`</code></p>
<p>Asterisk: <code>\*</code></p>
<p>Underscore: <code>\_</code></p>
<p>Left brace: <code>\{</code></p>
<p>Right brace: <code>\}</code></p>
<p>Left bracket: <code>\[</code></p>
<p>Right bracket: <code>\]</code></p>
<p>Left paren: <code>\(</code></p>
<p>Right paren: <code>\)</code></p>
<p>Greater-than: <code>\&gt;</code></p>
<p>Hash: <code>\#</code></p>
<p>Period: <code>\.</code></p>
<p>Bang: <code>\!</code></p>
<p>Plus: <code>\+</code></p>
<p>Minus: <code>\-</code></p>
<p>These should get escaped, even though they're matching pairs for
other Markdown constructs:</p>
<p>*asterisks*</p>
<p>_underscores_</p>
<p>`backticks`</p>
<p>This is a code span with a literal backslash-backtick sequence: <code>\`</code></p>
<p>This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.</p>
<p>This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.</p>

View file

@ -0,0 +1,120 @@
These should all get escaped:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
These should not, because they occur within a code block:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
Nor should these, which occur in code spans:
Backslash: `\\`
Backtick: `` \` ``
Asterisk: `\*`
Underscore: `\_`
Left brace: `\{`
Right brace: `\}`
Left bracket: `\[`
Right bracket: `\]`
Left paren: `\(`
Right paren: `\)`
Greater-than: `\>`
Hash: `\#`
Period: `\.`
Bang: `\!`
Plus: `\+`
Minus: `\-`
These should get escaped, even though they're matching pairs for
other Markdown constructs:
\*asterisks\*
\_underscores\_
\`backticks\`
This is a code span with a literal backslash-backtick sequence: `` \` ``
This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.
This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.

View file

@ -0,0 +1,15 @@
<blockquote>
<p>Example:</p>
<pre><code>sub status {
print "working";
}
</code></pre>
<p>Or:</p>
<pre><code>sub status {
return "working";
}
</code></pre>
</blockquote>

View file

@ -0,0 +1,11 @@
> Example:
>
> sub status {
> print "working";
> }
>
> Or:
>
> sub status {
> return "working";
> }

View file

@ -0,0 +1,18 @@
<pre><code>code block on the first line
</code></pre>
<p>Regular text.</p>
<pre><code>code block indented by spaces
</code></pre>
<p>Regular text.</p>
<pre><code>the lines in this block
all contain trailing spaces
</code></pre>
<p>Regular Text.</p>
<pre><code>code block on the last line
</code></pre>

View file

@ -0,0 +1,14 @@
code block on the first line
Regular text.
code block indented by spaces
Regular text.
the lines in this block
all contain trailing spaces
Regular Text.
code block on the last line

View file

@ -0,0 +1,6 @@
<p><code>&lt;test a="</code> content of attribute <code>"&gt;</code></p>
<p>Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span></p>
<p>Here's how you put <code>`backticks`</code> in a code span.</p>

View file

@ -0,0 +1,6 @@
`<test a="` content of attribute `">`
Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
Here's how you put `` `backticks` `` in a code span.

View file

@ -0,0 +1,8 @@
<p>In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.</p>
<p>Here's one with a bullet.
* criminey.</p>

View file

@ -0,0 +1,8 @@
In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.
Here's one with a bullet.
* criminey.

View file

@ -0,0 +1,71 @@
<p>Dashes:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>---
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>- - -
</code></pre>
<p>Asterisks:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>***
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>* * *
</code></pre>
<p>Underscores:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>___
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>_ _ _
</code></pre>

View file

@ -0,0 +1,67 @@
Dashes:
---
---
---
---
---
- - -
- - -
- - -
- - -
- - -
Asterisks:
***
***
***
***
***
* * *
* * *
* * *
* * *
* * *
Underscores:
___
___
___
___
___
_ _ _
_ _ _
_ _ _
_ _ _
_ _ _

View file

@ -0,0 +1,15 @@
<p>Simple block on one line:</p>
<div>foo</div>
<p>And nested without indentation:</p>
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View file

@ -0,0 +1,15 @@
Simple block on one line:
<div>foo</div>
And nested without indentation:
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View file

@ -0,0 +1,72 @@
<p>Here's a simple block:</p>
<div>
foo
</div>
<p>This should be a code block, though:</p>
<pre><code>&lt;div&gt;
foo
&lt;/div&gt;
</code></pre>
<p>As should this:</p>
<pre><code>&lt;div&gt;foo&lt;/div&gt;
</code></pre>
<p>Now, nested:</p>
<div>
<div>
<div>
foo
</div>
</div>
</div>
<p>This should just be an HTML comment:</p>
<!-- Comment -->
<p>Multiline:</p>
<!--
Blah
Blah
-->
<p>Code block:</p>
<pre><code>&lt;!-- Comment --&gt;
</code></pre>
<p>Just plain comment, with trailing spaces on the line:</p>
<!-- foo -->
<p>Code:</p>
<pre><code>&lt;hr /&gt;
</code></pre>
<p>Hr's:</p>
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View file

@ -0,0 +1,69 @@
Here's a simple block:
<div>
foo
</div>
This should be a code block, though:
<div>
foo
</div>
As should this:
<div>foo</div>
Now, nested:
<div>
<div>
<div>
foo
</div>
</div>
</div>
This should just be an HTML comment:
<!-- Comment -->
Multiline:
<!--
Blah
Blah
-->
Code block:
<!-- Comment -->
Just plain comment, with trailing spaces on the line:
<!-- foo -->
Code:
<hr />
Hr's:
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View file

@ -0,0 +1,13 @@
<p>Paragraph one.</p>
<!-- This is a simple comment -->
<!--
This is another comment.
-->
<p>Paragraph two.</p>
<!-- one comment block -- -- with two comments -->
<p>The end.</p>

View file

@ -0,0 +1,13 @@
Paragraph one.
<!-- This is a simple comment -->
<!--
This is another comment.
-->
Paragraph two.
<!-- one comment block -- -- with two comments -->
The end.

View file

@ -0,0 +1,11 @@
<p>Just a <a href="/url/">URL</a>.</p>
<p><a href="/url/" title="title">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
<p><a href="/url/" title="title has spaces afterward">URL and title</a>.</p>
<p><a href="">Empty</a>.</p>

View file

@ -0,0 +1,12 @@
Just a [URL](/url/).
[URL and title](/url/ "title").
[URL and title](/url/ "title preceded by two spaces").
[URL and title](/url/ "title preceded by a tab").
[URL and title](/url/ "title has spaces afterward" ).
[Empty]().

View file

@ -0,0 +1,52 @@
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>With <a href="/url/">embedded [brackets]</a>.</p>
<p>Indented <a href="/url">once</a>.</p>
<p>Indented <a href="/url">twice</a>.</p>
<p>Indented <a href="/url">thrice</a>.</p>
<p>Indented [four][] times.</p>
<pre><code>[four]: /url
</code></pre>
<hr />
<p><a href="foo">this</a> should work</p>
<p>So should <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>But not [that] [].</p>
<p>Nor [that][].</p>
<p>Nor [that].</p>
<p>[Something in brackets like <a href="foo">this</a> should work]</p>
<p>[Same with <a href="foo">this</a>.]</p>
<p>In this case, <a href="/somethingelse/">this</a> points to something else.</p>
<p>Backslashing should suppress [this] and [this].</p>
<hr />
<p>Here's one where the <a href="/url/">link
breaks</a> across lines.</p>
<p>Here's another where the <a href="/url/">link
breaks</a> across lines, but with a line-ending space.</p>

View file

@ -0,0 +1,71 @@
Foo [bar] [1].
Foo [bar][1].
Foo [bar]
[1].
[1]: /url/ "Title"
With [embedded [brackets]] [b].
Indented [once][].
Indented [twice][].
Indented [thrice][].
Indented [four][] times.
[once]: /url
[twice]: /url
[thrice]: /url
[four]: /url
[b]: /url/
* * *
[this] [this] should work
So should [this][this].
And [this] [].
And [this][].
And [this].
But not [that] [].
Nor [that][].
Nor [that].
[Something in brackets like [this][] should work]
[Same with [this].]
In this case, [this](/somethingelse/) points to something else.
Backslashing should suppress \[this] and [this\].
[this]: foo
* * *
Here's one where the [link
breaks] across lines.
Here's another where the [link
breaks] across lines, but with a line-ending space.
[link breaks]: /url/

View file

@ -0,0 +1,9 @@
<p>This is the <a href="/simple">simple case</a>.</p>
<p>This one has a <a href="/foo">line
break</a>.</p>
<p>This one has a <a href="/foo">line
break</a> with a line-ending space.</p>
<p><a href="/that">this</a> and the <a href="/other">other</a></p>

View file

@ -0,0 +1,20 @@
This is the [simple case].
[simple case]: /simple
This one has a [line
break].
This one has a [line
break] with a line-ending space.
[line break]: /foo
[this] [that] and the [other]
[this]: /this
[that]: /that
[other]: /other

View file

@ -0,0 +1,3 @@
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>

View file

@ -0,0 +1,7 @@
Foo [bar][].
Foo [bar](/url/ "Title with "quotes" inside").
[bar]: /url/ "Title with "quotes" inside"

View file

@ -0,0 +1,314 @@
<h1>Markdown: Basics</h1>
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a class="selected" title="Markdown Basics">Basics</a></li>
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
<h2>Getting the Gist of Markdown's Formatting Syntax</h2>
<p>This page offers a brief overview of what it's like to use Markdown.
The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
every feature, but Markdown should be very easy to pick up simply by
looking at a few examples of it in action. The examples on this page
are written in a before/after style, showing example syntax and the
HTML output produced by Markdown.</p>
<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a
web application that allows you type your own Markdown-formatted text
and translate it to XHTML.</p>
<p><strong>Note:</strong> This document is itself written using Markdown; you
can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p>
<h2>Paragraphs, Headers, Blockquotes</h2>
<p>A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>.
Setext-style headers for <code>&lt;h1&gt;</code> and <code>&lt;h2&gt;</code> are created by
"underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
beginning of the line -- the number of hashes equals the resulting
HTML header level.</p>
<p>Blockquotes are indicated using email-style '<code>&gt;</code>' angle brackets.</p>
<p>Markdown:</p>
<pre><code>A First Level Header
====================
A Second Level Header
---------------------
Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.
The quick brown fox jumped over the lazy
dog's back.
### Header 3
&gt; This is a blockquote.
&gt;
&gt; This is the second paragraph in the blockquote.
&gt;
&gt; ## This is an H2 in a blockquote
</code></pre>
<p>Output:</p>
<pre><code>&lt;h1&gt;A First Level Header&lt;/h1&gt;
&lt;h2&gt;A Second Level Header&lt;/h2&gt;
&lt;p&gt;Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.&lt;/p&gt;
&lt;p&gt;The quick brown fox jumped over the lazy
dog's back.&lt;/p&gt;
&lt;h3&gt;Header 3&lt;/h3&gt;
&lt;blockquote&gt;
&lt;p&gt;This is a blockquote.&lt;/p&gt;
&lt;p&gt;This is the second paragraph in the blockquote.&lt;/p&gt;
&lt;h2&gt;This is an H2 in a blockquote&lt;/h2&gt;
&lt;/blockquote&gt;
</code></pre>
<h3>Phrase Emphasis</h3>
<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p>
<p>Markdown:</p>
<pre><code>Some of these words *are emphasized*.
Some of these words _are emphasized also_.
Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
Some of these words &lt;em&gt;are emphasized also&lt;/em&gt;.&lt;/p&gt;
&lt;p&gt;Use two asterisks for &lt;strong&gt;strong emphasis&lt;/strong&gt;.
Or, if you prefer, &lt;strong&gt;use two underscores instead&lt;/strong&gt;.&lt;/p&gt;
</code></pre>
<h2>Lists</h2>
<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
<code>+</code>, and <code>-</code>) as list markers. These three markers are
interchangable; this:</p>
<pre><code>* Candy.
* Gum.
* Booze.
</code></pre>
<p>this:</p>
<pre><code>+ Candy.
+ Gum.
+ Booze.
</code></pre>
<p>and this:</p>
<pre><code>- Candy.
- Gum.
- Booze.
</code></pre>
<p>all produce the same output:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;Candy.&lt;/li&gt;
&lt;li&gt;Gum.&lt;/li&gt;
&lt;li&gt;Booze.&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>Ordered (numbered) lists use regular numbers, followed by periods, as
list markers:</p>
<pre><code>1. Red
2. Green
3. Blue
</code></pre>
<p>Output:</p>
<pre><code>&lt;ol&gt;
&lt;li&gt;Red&lt;/li&gt;
&lt;li&gt;Green&lt;/li&gt;
&lt;li&gt;Blue&lt;/li&gt;
&lt;/ol&gt;
</code></pre>
<p>If you put blank lines between items, you'll get <code>&lt;p&gt;</code> tags for the
list item text. You can create multi-paragraph list items by indenting
the paragraphs by 4 spaces or 1 tab:</p>
<pre><code>* A list item.
With multiple paragraphs.
* Another item in the list.
</code></pre>
<p>Output:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;A list item.&lt;/p&gt;
&lt;p&gt;With multiple paragraphs.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Another item in the list.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<h3>Links</h3>
<p>Markdown supports two styles for creating links: <em>inline</em> and
<em>reference</em>. With both styles, you use square brackets to delimit the
text you want to turn into a link.</p>
<p>Inline-style links use parentheses immediately after the link text.
For example:</p>
<pre><code>This is an [example link](http://example.com/).
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>Optionally, you may include a title attribute in the parentheses:</p>
<pre><code>This is an [example link](http://example.com/ "With a Title").
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/" title="With a Title"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>Reference-style links allow you to refer to your links by names, which
you define elsewhere in your document:</p>
<pre><code>I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from &lt;a href="http://search.yahoo.com/"
title="Yahoo Search"&gt;Yahoo&lt;/a&gt; or &lt;a href="http://search.msn.com/"
title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>The title attribute is optional. Link names may contain letters,
numbers and spaces, but are <em>not</em> case sensitive:</p>
<pre><code>I start my morning with a cup of coffee and
[The New York Times][NY Times].
[ny times]: http://www.nytimes.com/
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I start my morning with a cup of coffee and
&lt;a href="http://www.nytimes.com/"&gt;The New York Times&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<h3>Images</h3>
<p>Image syntax is very much like link syntax.</p>
<p>Inline (titles are optional):</p>
<pre><code>![alt text](/path/to/img.jpg "Title")
</code></pre>
<p>Reference-style:</p>
<pre><code>![alt text][id]
[id]: /path/to/img.jpg "Title"
</code></pre>
<p>Both of the above examples produce the same output:</p>
<pre><code>&lt;img src="/path/to/img.jpg" alt="alt text" title="Title" /&gt;
</code></pre>
<h3>Code</h3>
<p>In a regular paragraph, you can create code span by wrapping text in
backtick quotes. Any ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> or
<code>&gt;</code>) will automatically be translated into HTML entities. This makes
it easy to use Markdown to write about HTML example code:</p>
<pre><code>I strongly recommend against using any `&lt;blink&gt;` tags.
I wish SmartyPants used named entities like `&amp;mdash;`
instead of decimal-encoded entites like `&amp;#8212;`.
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I strongly recommend against using any
&lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
&lt;p&gt;I wish SmartyPants used named entities like
&lt;code&gt;&amp;amp;mdash;&lt;/code&gt; instead of decimal-encoded
entites like &lt;code&gt;&amp;amp;#8212;&lt;/code&gt;.&lt;/p&gt;
</code></pre>
<p>To specify an entire block of pre-formatted code, indent every line of
the block by 4 spaces or 1 tab. Just like with code spans, <code>&amp;</code>, <code>&lt;</code>,
and <code>&gt;</code> characters will be escaped automatically.</p>
<p>Markdown:</p>
<pre><code>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:
&lt;blockquote&gt;
&lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;&amp;lt;blockquote&amp;gt;
&amp;lt;p&amp;gt;For example.&amp;lt;/p&amp;gt;
&amp;lt;/blockquote&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
</code></pre>

Some files were not shown because too many files have changed in this diff Show more