From ddaec05d8f49308363c694718240bdaf58466659 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sun, 27 Jul 2008 03:25:51 +0000 Subject: [PATCH] Added MediaWiki writer. + Added Text/Pandoc/Writers/MediaWiki.hs + Added to pandoc.cabal + Added to Main.hs and Text/Pandoc.hs exports. + Added tests for mediawiki writer & table writer. + Added information on MediaWiki writer to README. + Added mediawiki markup to list of formats in pandoc(1) man page. + Updated debian/control with mediawiki output format. + Added mediawiki markup to description in macports portfile. + Updated freebsd package description to include mediawiki format. + Mention MediaWiki output format in web page index. + Added mediawiki demo to website. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1337 788f1e2b-df1e-0410-8736-df70ead52e1b --- Main.hs | 1 + README | 28 +- Text/Pandoc.hs | 2 + Text/Pandoc/Writers/MediaWiki.hs | 394 ++++++++++++++++++++ debian/control | 15 +- freebsd/pkg-descr | 4 +- macports/Portfile.in | 4 +- man/man1/pandoc.1.md | 10 +- pandoc.cabal | 4 +- tests/generate.sh | 1 + tests/runtests.pl | 2 +- tests/tables.mediawiki | 123 +++++++ tests/writer.mediawiki | 605 +++++++++++++++++++++++++++++++ web/demos | 4 + web/index.txt.in | 5 +- 15 files changed, 1172 insertions(+), 30 deletions(-) create mode 100644 Text/Pandoc/Writers/MediaWiki.hs create mode 100644 tests/tables.mediawiki create mode 100644 tests/writer.mediawiki diff --git a/Main.hs b/Main.hs index a1871106c..87ae17ce6 100644 --- a/Main.hs +++ b/Main.hs @@ -88,6 +88,7 @@ writers = [("native" , (writeDoc, "")) ,("man" , (writeMan, "")) ,("markdown" , (writeMarkdown, "")) ,("rst" , (writeRST, "")) + ,("mediawiki" , (writeMediaWiki, "")) ,("rtf" , (writeRTF, defaultRTFHeader)) ] diff --git a/README b/README index 5728b1b5e..1fbdf199b 100644 --- a/README +++ b/README @@ -6,9 +6,9 @@ Pandoc is a [Haskell] library for converting from one markup format to another, and a command-line tool that uses this library. It can read [markdown] and (subsets of) [reStructuredText], [HTML], and [LaTeX], and it can write [markdown], [reStructuredText], [HTML], [LaTeX], [ConTeXt], -[RTF], [DocBook XML], [OpenDocument XML], [GNU Texinfo], [groff man] -pages, and [S5] HTML slide shows. Pandoc's version of markdown contains -some enhancements, like footnotes and embedded LaTeX. +[RTF], [DocBook XML], [OpenDocument XML], [GNU Texinfo], [MediaWiki markup], +[groff man] pages, and [S5] HTML slide shows. Pandoc's version of +markdown contains some enhancements, like footnotes and embedded LaTeX. In contrast to existing tools for converting markdown to HTML, which use regex substitutions, Pandoc has a modular design: it consists of a @@ -26,6 +26,7 @@ or output format requires only adding a reader or writer. [RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format [DocBook XML]: http://www.docbook.org/ [OpenDocument XML]: http://opendocument.xml.org/ +[MediaWiki markup]: http://www.mediawiki.org/wiki/Help:Formatting [groff man]: http://developer.apple.com/DOCUMENTATION/Darwin/Reference/ManPages/man7/groff_man.7.html [Haskell]: http://www.haskell.org/ [GNU Texinfo]: http://www.gnu.org/software/texinfo/ @@ -73,15 +74,16 @@ To convert `hello.html` from html to markdown: Supported output formats include `markdown`, `latex`, `context` (ConTeXt), `html`, `rtf` (rich text format), `rst` (reStructuredText), `docbook` (DocBook XML), `opendocument` (OpenDocument XML), `texinfo`, -`man` (groff man), and `s5` (which produces an HTML file that acts -like powerpoint). Supported input formats include `markdown`, `html`, -`latex`, and `rst`. Note that the `rst` reader only parses a subset of -reStructuredText syntax. For example, it doesn't handle tables, option -lists, or footnotes. But for simple documents it should be adequate. The -`latex` and `html` readers are also limited in what they can do. Because -the `html` reader is picky about the HTML it parses, it is recommended -that you pipe HTML through [HTML Tidy] before sending it to `pandoc`, or -use the `html2markdown` script described below. +`mediawiki` (MediaWiki markup), `man` (groff man), and `s5` (which +produces an HTML file that acts like powerpoint). Supported input +formats include `markdown`, `html`, `latex`, and `rst`. Note that the +`rst` reader only parses a subset of reStructuredText syntax. For +example, it doesn't handle tables, option lists, or footnotes. But for +simple documents it should be adequate. The `latex` and `html` readers +are also limited in what they can do. Because the `html` reader is picky +about the HTML it parses, it is recommended that you pipe HTML through +[HTML Tidy] before sending it to `pandoc`, or use the `html2markdown` +script described below. If you don't specify a reader or writer explicitly, `pandoc` will try to determine the input and output format from the extensions of @@ -927,6 +929,8 @@ In Texinfo output, it will be rendered inside a `@math` command. In groff man output, it will be rendered verbatim without $'s. +In MediaWiki output, it will be rendered inside `` tags. + In RTF, Docbook, and OpenDocument output, it will be rendered, as far as possible, using unicode characters, and will otherwise appear verbatim. Unknown commands and symbols, and commands that cannot be dealt with diff --git a/Text/Pandoc.hs b/Text/Pandoc.hs index 336e2bd7c..d5026587d 100644 --- a/Text/Pandoc.hs +++ b/Text/Pandoc.hs @@ -76,6 +76,7 @@ module Text.Pandoc , writeDocbook , writeOpenDocument , writeMan + , writeMediaWiki , writeRTF , prettyPandoc -- * Writer options used in writers @@ -105,6 +106,7 @@ import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man import Text.Pandoc.Writers.RTF +import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.DefaultHeaders import Text.Pandoc.UTF8 import Text.Pandoc.Shared diff --git a/Text/Pandoc/Writers/MediaWiki.hs b/Text/Pandoc/Writers/MediaWiki.hs new file mode 100644 index 000000000..14df15bfe --- /dev/null +++ b/Text/Pandoc/Writers/MediaWiki.hs @@ -0,0 +1,394 @@ +{- +Copyright (C) 2008 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.MediaWiki + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to MediaWiki markup. + +MediaWiki: +-} +module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.XML ( escapeStringForXML ) +import Data.List ( intersect ) +import Network.URI ( isURI ) +import Control.Monad.State + +data WriterState = WriterState { + stNotes :: Bool -- True if there are notes + , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + } + +-- | Convert Pandoc to MediaWiki. +writeMediaWiki :: WriterOptions -> Pandoc -> String +writeMediaWiki opts document = + evalState (pandocToMediaWiki opts document) + (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + +-- | Return MediaWiki representation of document. +pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String +pandocToMediaWiki opts (Pandoc _ blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + let head' = if writerStandalone opts + then writerHeader opts + else "" + let toc = if writerTableOfContents opts + then "__TOC__\n" + else "" + body <- blockListToMediaWiki opts blocks + notesExist <- get >>= return . stNotes + let notes = if notesExist + then "\n== Notes ==\n" + else "" + return $ head' ++ before ++ toc ++ body ++ after ++ notes + +-- | Escape special characters for MediaWiki. +escapeString :: String -> String +escapeString = escapeStringForXML + +-- | Convert Pandoc block element to MediaWiki. +blockToMediaWiki :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState String + +blockToMediaWiki _ Null = return "" + +blockToMediaWiki opts (Plain inlines) = + inlineListToMediaWiki opts inlines + +blockToMediaWiki opts (Para inlines) = do + useTags <- get >>= return . stUseTags + listLevel <- get >>= return . stListLevel + contents <- inlineListToMediaWiki opts inlines + return $ if useTags + then "

" ++ contents ++ "

" + else contents ++ if null listLevel then "\n" else "" + +blockToMediaWiki _ (RawHtml str) = return str + +blockToMediaWiki _ HorizontalRule = return "\n-----\n" + +blockToMediaWiki opts (Header level inlines) = do + contents <- inlineListToMediaWiki opts inlines + let eqs = replicate (level + 1) '=' + return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + +blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do + let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", + "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", + "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", + "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", + "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", + "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", + "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", + "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", + "visualfoxpro", "winbatch", "xml", "xpp", "z80"] + let (beg, end) = if null at + then ("
", "
") + else ("", "") + return $ beg ++ escapeString str ++ end + +blockToMediaWiki opts (BlockQuote blocks) = do + contents <- blockListToMediaWiki opts blocks + return $ "
" ++ contents ++ "
" + +blockToMediaWiki opts (Table caption aligns widths headers rows) = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null caption + then return "" + else do + c <- inlineListToMediaWiki opts caption + return $ "" ++ c ++ "" + colHeads <- colHeadsToMediaWiki opts alignStrings widths headers + rows' <- mapM (tableRowToMediaWiki opts alignStrings) rows + return $ "\n" ++ captionDoc ++ colHeads ++ vcat rows' ++ "\n
" + +blockToMediaWiki opts x@(BulletList items) = do + oldUseTags <- get >>= return . stUseTags + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "\n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ "*" } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents + +blockToMediaWiki opts x@(OrderedList attribs items) = do + oldUseTags <- get >>= return . stUseTags + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "\n" ++ vcat contents ++ "\n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ "#" } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents + +blockToMediaWiki opts x@(DefinitionList items) = do + oldUseTags <- get >>= return . stUseTags + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (definitionListItemToMediaWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "
\n" ++ vcat contents ++ "
\n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ ";" } + contents <- mapM (definitionListItemToMediaWiki opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents + +-- Auxiliary functions for lists: + +-- | Convert ordered list attributes to HTML attribute string +listAttribsToString :: ListAttributes -> String +listAttribsToString (startnum, numstyle, _) = + let numstyle' = camelCaseToHyphenated $ show numstyle + in (if startnum /= 1 + then " start=\"" ++ show startnum ++ "\"" + else "") ++ + (if numstyle /= DefaultStyle + then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + else "") + +-- | Convert bullet or ordered list item (list of blocks) to MediaWiki. +listItemToMediaWiki :: WriterOptions -> [Block] -> State WriterState String +listItemToMediaWiki opts items = do + contents <- blockListToMediaWiki opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "
  • " ++ contents ++ "
  • " + else do + marker <- get >>= return . stListLevel + return $ marker ++ " " ++ contents + +-- | Convert definition list item (label, list of blocks) to MediaWiki. +definitionListItemToMediaWiki :: WriterOptions + -> ([Inline],[Block]) + -> State WriterState String +definitionListItemToMediaWiki opts (label, items) = do + labelText <- inlineListToMediaWiki opts label + contents <- blockListToMediaWiki opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "
    " ++ labelText ++ "
    \n
    " ++ contents ++ "
    " + else do + marker <- get >>= return . stListLevel + return $ marker ++ " " ++ labelText ++ "\n" ++ (init marker ++ ": ") ++ contents + +-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. +isSimpleList :: Block -> Bool +isSimpleList x = + case x of + BulletList items -> all isSimpleListItem items + OrderedList (num, sty, _) items -> all isSimpleListItem items && + num == 1 && sty `elem` [DefaultStyle, Decimal] + DefinitionList items -> all isSimpleListItem $ map snd items + _ -> False + +-- | True if list item can be handled with the simple wiki syntax. False if +-- HTML tags will be needed. +isSimpleListItem :: [Block] -> Bool +isSimpleListItem [] = True +isSimpleListItem [x] = + case x of + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False +isSimpleListItem [x, y] | isPlainOrPara x = + case y of + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + _ -> False +isSimpleListItem _ = False + +isPlainOrPara :: Block -> Bool +isPlainOrPara (Plain _) = True +isPlainOrPara (Para _) = True +isPlainOrPara _ = False + +tr :: String -> String +tr x = "\n" ++ x ++ "\n" + +-- | Concatenates strings with line breaks between them. +vcat :: [String] -> String +vcat [] = "" +vcat [x] = x +vcat (x:xs) = x ++ "\n" ++ vcat xs + +-- Auxiliary functions for tables: + +colHeadsToMediaWiki :: WriterOptions + -> [[Char]] + -> [Float] + -> [[Block]] + -> State WriterState String +colHeadsToMediaWiki opts alignStrings widths headers = do + heads <- sequence $ zipWith3 + (\alignment columnwidth item -> tableItemToMediaWiki opts "th" alignment columnwidth item) + alignStrings widths headers + return $ tr $ vcat heads + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToMediaWiki :: WriterOptions + -> [[Char]] + -> [[Block]] + -> State WriterState String +tableRowToMediaWiki opts aligns columns = + (sequence $ zipWith3 (tableItemToMediaWiki opts "td") aligns (repeat 0) columns) >>= + return . tr . vcat + +tableItemToMediaWiki :: WriterOptions + -> [Char] + -> [Char] + -> Float + -> [Block] + -> State WriterState String +tableItemToMediaWiki opts tag' align' width' item = do + contents <- blockListToMediaWiki opts item + let attrib = " align=\"" ++ align' ++ "\"" ++ + if width' /= 0 + then " style=\"width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;\"" + else "" + return $ "<" ++ tag' ++ attrib ++ ">" ++ contents ++ "<" ++ tag' ++ ">" + +-- | Convert list of Pandoc block elements to MediaWiki. +blockListToMediaWiki :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState String +blockListToMediaWiki opts blocks = + mapM (blockToMediaWiki opts) blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to MediaWiki. +inlineListToMediaWiki :: WriterOptions -> [Inline] -> State WriterState String +inlineListToMediaWiki opts lst = + mapM (inlineToMediaWiki opts) lst >>= return . concat + +-- | Convert Pandoc inline element to MediaWiki. +inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String + +inlineToMediaWiki opts (Emph lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "''" ++ contents ++ "''" + +inlineToMediaWiki opts (Strong lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "'''" ++ contents ++ "'''" + +inlineToMediaWiki opts (Strikeout lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "" ++ contents ++ "" + +inlineToMediaWiki opts (Superscript lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "" ++ contents ++ "" + +inlineToMediaWiki opts (Subscript lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "" ++ contents ++ "" + +inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst + +inlineToMediaWiki opts (Quoted SingleQuote lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "‘" ++ contents ++ "’" + +inlineToMediaWiki opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "“" ++ contents ++ "”" + +inlineToMediaWiki _ EmDash = return "—" + +inlineToMediaWiki _ EnDash = return "–" + +inlineToMediaWiki _ Apostrophe = return "’" + +inlineToMediaWiki _ Ellipses = return "…" + +inlineToMediaWiki _ (Code str) = + return $ "" ++ (escapeString str) ++ "" + +inlineToMediaWiki _ (Str str) = return $ escapeString str + +inlineToMediaWiki _ (Math str) = return $ "" ++ str ++ "" + -- note: str should NOT be escaped + +inlineToMediaWiki _ (TeX _) = return "" + +inlineToMediaWiki _ (HtmlInline str) = return str + +inlineToMediaWiki _ (LineBreak) = return "
    \n" + +inlineToMediaWiki _ Space = return " " + +inlineToMediaWiki opts (Link txt (src, _)) = do + link <- inlineListToMediaWiki opts txt + let useAuto = txt == [Code src] + let src' = if isURI src + then src + else if take 1 src == "/" + then "http://{{SERVERNAME}}" ++ src + else "http://{{SERVERNAME}}/" ++ src + return $ if useAuto + then src' + else "[" ++ src' ++ " " ++ link ++ "]" + +inlineToMediaWiki opts (Image alt (source, tit)) = do + alt' <- inlineListToMediaWiki opts alt + let txt = if (null tit) + then if null alt + then "" + else "|" ++ alt' + else "|" ++ tit + return $ "[[Image:" ++ source ++ txt ++ "]]" + +inlineToMediaWiki opts (Note contents) = do + contents' <- blockListToMediaWiki opts contents + modify (\s -> s { stNotes = True }) + return $ "" ++ contents' ++ "" + -- note - may not work for notes with multiple blocks diff --git a/debian/control b/debian/control index 01271f88c..cd5c39884 100644 --- a/debian/control +++ b/debian/control @@ -18,8 +18,9 @@ Description: general markup converter Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it - can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, DocBook, - OpenDocument XML, RTF, GNU Texinfo, groff man pages, and S5 HTML slide shows. + can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, DocBook XML, + OpenDocument XML, RTF, GNU Texinfo, MediaWiki markup, groff man pages, + and S5 HTML slide shows. . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, tables, definition lists, and other features. A compatibility mode is @@ -43,8 +44,9 @@ Description: general markup converter Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it - can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, DocBook, - OpenDocument XML, RTF, GNU Texinfo, groff man pages, and S5 HTML slide shows. + can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, DocBook XML, + OpenDocument XML, RTF, GNU Texinfo, MediaWiki markup, groff man pages, + and S5 HTML slide shows. . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, tables, definition lists, and other features. A compatibility mode is @@ -68,8 +70,9 @@ Description: general markup converter Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it - can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, DocBook, - OpenDocument XML, RTF, GNU Texinfo, groff man pages, and S5 HTML slide shows. + can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, DocBook XML, + OpenDocument XML, RTF, GNU Texinfo, MediaWiki markup, groff man pages, + and S5 HTML slide shows. . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, tables, definition lists, and other features. A compatibility mode is diff --git a/freebsd/pkg-descr b/freebsd/pkg-descr index dfef9a3d8..344c5adcf 100644 --- a/freebsd/pkg-descr +++ b/freebsd/pkg-descr @@ -1,8 +1,8 @@ Pandoc is a command-line tool for converting from one markup format to another. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it can write markdown, reStructuredText, HTML, -LaTeX, ConTeXt, DocBook, OpenDocument XML, RTF, GNU Texinfo, groff man -pages, and S5 HTML slide shows. +LaTeX, ConTeXt, DocBook XML, OpenDocument XML, RTF, GNU Texinfo, +MediaWiki markup, groff man pages, and S5 HTML slide shows. Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, and other features. A compatibility mode is provided for those who diff --git a/macports/Portfile.in b/macports/Portfile.in index 41a03dd83..9c588ee24 100644 --- a/macports/Portfile.in +++ b/macports/Portfile.in @@ -11,8 +11,8 @@ long_description \ Pandoc is a command-line tool for converting from one markup format \ to another. It can read markdown and (subsets of) reStructuredText, \ HTML, and LaTeX, and it can write markdown, reStructuredText, HTML, \ - LaTeX, ConTeXt, DocBook, OpenDocument XML, RTF, Texinfo, groff man, \ - and S5 HTML slide shows. + LaTeX, ConTeXt, DocBook XML, OpenDocument XML, RTF, Texinfo, \ + MediaWiki markup, groff man, and S5 HTML slide shows. homepage http://johnmacfarlane.net/pandoc/ platforms darwin diff --git a/man/man1/pandoc.1.md b/man/man1/pandoc.1.md index 144e23bbd..513fb00e1 100644 --- a/man/man1/pandoc.1.md +++ b/man/man1/pandoc.1.md @@ -15,7 +15,8 @@ pandoc [*options*] [*input-file*]... Pandoc converts files from one markup format to another. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Texinfo, -groff man, RTF, OpenDocument XML, DocBook XML, and S5 HTML slide shows. +groff man, MediaWiki markup, RTF, OpenDocument XML, DocBook XML, +and S5 HTML slide shows. If no *input-file* is specified, input is read from STDIN. Otherwise, the *input-files* are concatenated (with a blank @@ -67,9 +68,10 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`. : Specify output format. *FORMAT* can be `native` (native Haskell), `markdown` (markdown or plain text), `rst` (reStructuredText), `html` (HTML), `latex` (LaTeX), `context` (ConTeXt), `man` (groff man), - `texinfo` (GNU Texinfo), `docbook` (DocBook XML), - `opendocument` (OpenDocument XML), `s5` (S5 HTML and javascript slide - show), or `rtf` (rich text format). + `mediawiki` (MediaWiki markup), `texinfo` (GNU Texinfo), + `docbook` (DocBook XML), `opendocument` (OpenDocument XML), + `s5` (S5 HTML and javascript slide show), + or `rtf` (rich text format). -s, \--standalone : Produce output with an appropriate header and footer (e.g. a diff --git a/pandoc.cabal b/pandoc.cabal index 4e3ba5fe8..911252e11 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -17,7 +17,8 @@ Description: Pandoc is a Haskell library for converting from one markup this library. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, - OpenDocument, RTF, groff man pages, and S5 HTML slide shows. + OpenDocument, RTF, MediaWiki, groff man pages, and + S5 HTML slide shows. . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, definition lists, tables, and other @@ -71,6 +72,7 @@ Library Text.Pandoc.Writers.Man, Text.Pandoc.Writers.Markdown, Text.Pandoc.Writers.RST, + Text.Pandoc.Writers.MediaWiki, Text.Pandoc.Writers.RTF, Text.Pandoc.Writers.S5 Other-Modules: Text.Pandoc.XML diff --git a/tests/generate.sh b/tests/generate.sh index 70e23969f..fd99caa33 100755 --- a/tests/generate.sh +++ b/tests/generate.sh @@ -8,6 +8,7 @@ ../pandoc -r native -s -w texinfo testsuite.native > writer.texinfo ../pandoc -r native -s -w rtf testsuite.native > writer.rtf ../pandoc -r native -s -w man testsuite.native > writer.man +../pandoc -r native -s -w mediawiki testsuite.native > writer.mediawiki sed -e '/^, Header 1 \[Str "HTML",Space,Str "Blocks"\]/,/^, HorizontalRule/d' testsuite.native | ../pandoc -r native -w docbook -s > writer.docbook sed -e '/^, Header 1 \[Str "LaTeX"\]/,/^, HorizontalRule/d' testsuite.native | ../pandoc -r native -w context -s > writer.context diff --git a/tests/runtests.pl b/tests/runtests.pl index 38ce00fdc..9457b3eb3 100644 --- a/tests/runtests.pl +++ b/tests/runtests.pl @@ -14,7 +14,7 @@ unless (-x $script) { die "$script is not executable.\n"; } print "Writer tests:\n"; -my @writeformats = ("html", "latex", "texinfo", "rst", "rtf", "markdown", "man", "native"); # docbook, context, and s5 handled separately +my @writeformats = ("html", "latex", "texinfo", "rst", "rtf", "markdown", "mediawiki", "man", "native"); # docbook, context, and s5 handled separately my $source = "testsuite.native"; sub test_results diff --git a/tests/tables.mediawiki b/tests/tables.mediawiki new file mode 100644 index 000000000..abe1cf530 --- /dev/null +++ b/tests/tables.mediawiki @@ -0,0 +1,123 @@ +Simple table with caption: + + + + + + + + + +
    Demonstration of simple table syntax.
    Right +Left +Center +Default +
    12 +12 +12 +12 +
    123 +123 +123 +123 +
    1 +1 +1 +1 +
    +Simple table without caption: + + + + + + + + + +
    Right +Left +Center +Default +
    12 +12 +12 +12 +
    123 +123 +123 +123 +
    1 +1 +1 +1 +
    +Simple table indented two spaces: + + + + + + + + + +
    Demonstration of simple table syntax.
    Right +Left +Center +Default +
    12 +12 +12 +12 +
    123 +123 +123 +123 +
    1 +1 +1 +1 +
    +Multiline table with caption: + + + + + + + +
    Here's the caption. It may span multiple lines.
    Centered Header +Left Aligned +Right Aligned +Default aligned +
    First +row +12.0 +Example of a row that spans multiple lines. +
    Second +row +5.0 +Here's another one. Note the blank line between rows. +
    +Multiline table without caption: + + + + + + + +
    Centered Header +Left Aligned +Right Aligned +Default aligned +
    First +row +12.0 +Example of a row that spans multiple lines. +
    Second +row +5.0 +Here's another one. Note the blank line between rows. +
    diff --git a/tests/writer.mediawiki b/tests/writer.mediawiki new file mode 100644 index 000000000..39def56d8 --- /dev/null +++ b/tests/writer.mediawiki @@ -0,0 +1,605 @@ +This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. + + +----- + +== Headers == + +=== Level 2 with an [http://{{SERVERNAME}}/url embedded link] === + +==== Level 3 with ''emphasis'' ==== + +===== Level 4 ===== + +====== Level 5 ====== + +== Level 1 == + +=== Level 2 with ''emphasis'' === + +==== Level 3 ==== + +with no blank line + +=== Level 2 === + +with no blank line + + +----- + +== Paragraphs == + +Here’s a regular paragraph. + +In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item. + +Here’s one with a bullet. * criminey. + +There should be a hard line break
    +here. + + +----- + +== Block Quotes == + +E-mail style: + +
    This is a block quote. It is pretty short. +
    +
    Code in a block quote: + +
    sub status {
    +    print "working";
    +}
    +A list: + +# item one +# item two +Nested block quotes: + +
    nested +
    +
    nested +
    +This should not be a block quote: 2 > 1. + +And a following paragraph. + + +----- + +== Code Blocks == + +Code: + +
    ---- (should be four hyphens)
    +
    +sub status {
    +    print "working";
    +}
    +
    +this code block is indented by one tab
    +And: + +
        this code block is indented by two tabs
    +
    +These should not be escaped:  \$ \\ \> \[ \{
    + +----- + +== Lists == + +=== Unordered === + +Asterisks tight: + +* asterisk 1 +* asterisk 2 +* asterisk 3 +Asterisks loose: + +* asterisk 1 +* asterisk 2 +* asterisk 3 +Pluses tight: + +* Plus 1 +* Plus 2 +* Plus 3 +Pluses loose: + +* Plus 1 +* Plus 2 +* Plus 3 +Minuses tight: + +* Minus 1 +* Minus 2 +* Minus 3 +Minuses loose: + +* Minus 1 +* Minus 2 +* Minus 3 +=== Ordered === + +Tight: + +# First +# Second +# Third +and: + +# One +# Two +# Three +Loose using tabs: + +# First +# Second +# Third +and using spaces: + +# One +# Two +# Three +Multiple paragraphs: + +
      +
    1. Item 1, graf one.

      +

      Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

    2. +
    3. Item 2.

    4. +
    5. Item 3.

    + +=== Nested === + +* Tab +** Tab +*** Tab +Here’s another: + +# First +# Second: +#* Fee +#* Fie +#* Foe +# Third +Same thing but with paragraphs: + +# First +# Second: +#* Fee +#* Fie +#* Foe +# Third +=== Tabs and spaces === + +* this is a list item indented with tabs +* this is a list item indented with spaces +** this is an example list item indented with tabs +** this is an example list item indented with spaces +=== Fancy list markers === + +
      +
    1. begins with 2
    2. +
    3. and now 3

      +

      with a continuation

      +
        +
      1. sublist with roman numerals, starting with 4
      2. +
      3. more items +
          +
        1. a subsublist
        2. +
        3. a subsublist
        +
      +
    + +Nesting: + +
      +
    1. Upper Alpha +
        +
      1. Upper Roman. +
          +
        1. Decimal start with 6 +
            +
          1. Lower alpha with paren
          +
        +
      +
    + +Autonumbering: + +# Autonumber. +# More. +## Nested. +Should not be a list item: + +M.A. 2007 + +B. Williams + + +----- + +== Definition Lists == + +Tight using spaces: + +; apple +: red fruit +; orange +: orange fruit +; banana +: yellow fruit +Tight using tabs: + +; apple +: red fruit +; orange +: orange fruit +; banana +: yellow fruit +Loose: + +; apple +: red fruit +; orange +: orange fruit +; banana +: yellow fruit +Multiple blocks with italics: + +
    +
    ''apple''
    +

    red fruit

    +

    contains seeds, crisp, pleasant to taste

    +
    ''orange''
    +

    orange fruit

    +
    { orange code block }
    +

    orange block quote

    + +== HTML Blocks == + +Simple block on one line: + +
    +foo +
    + +And nested without indentation: + +
    +
    +
    +foo +
    +
    +
    +bar +
    +
    + +Interpreted markdown in a table: + + + + + + +
    +This is ''emphasized'' + +And this is '''strong''' +
    + + + +Here’s a simple block: + +
    + +foo +
    + +This should be a code block, though: + +
    <div>
    +    foo
    +</div>
    +As should this: + +
    <div>foo</div>
    +Now, nested: + +
    +
    +
    + +foo +
    +
    +
    + +This should just be an HTML comment: + + + +Multiline: + + + + + +Code block: + +
    <!-- Comment -->
    +Just plain comment, with trailing spaces on the line: + + + +Code: + +
    <hr />
    +Hr’s: + +
    + +
    + +
    + +
    + +
    + +
    + +
    + +
    + +
    + + +----- + +== Inline Markup == + +This is ''emphasized'', and so ''is this''. + +This is '''strong''', and so '''is this'''. + +An ''[http://{{SERVERNAME}}/url emphasized link]''. + +'''''This is strong and em.''''' + +So is '''''this''''' word. + +'''''This is strong and em.''''' + +So is '''''this''''' word. + +This is code: >, $, \, \$, <html>. + +This is ''strikeout''. + +Superscripts: abcd a''hello'' ahello there. + +Subscripts: H2O, H23O, Hmany of themO. + +These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. + + +----- + +== Smart quotes, ellipses, dashes == + +“Hello,” said the spider. “‘Shelob’ is my name.” + +‘A’, ‘B’, and ‘C’ are letters. + +‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’ + +‘He said, “I want to go.”’ Were you alive in the 70’s? + +Here is some quoted ‘code’ and a “[http://example.com/?foo=1&bar=2 quoted link]”. + +Some dashes: one—two — three—four — five. + +Dashes between numbers: 5–7, 255–66, 1987–1999. + +Ellipses…and…and…. + + +----- + +== LaTeX == + +* +* +* 2+2=4 +* x \in y +* \alpha \wedge \omega +* 223 +* p-Tree +* \frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h} +* Here’s one that has a line break in it: \alpha + \omega \times x^2. +These shouldn’t be math: + +* To get the famous equation, write $e = mc^2$. +* $22,000 is a ''lot'' of money. So is $34,000. (It worked if “lot” is emphasized.) +* Shoes ($20) and socks ($5). +* Escaped $: $73 ''this should be emphasized'' 23$. +Here’s a LaTeX table: + + + + +----- + +== Special Characters == + +Here is some unicode: + +* I hat: Î +* o umlaut: ö +* section: § +* set membership: ∈ +* copyright: © +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 < 5. + +6 > 5. + +Backslash: \ + +Backtick: ` + +Asterisk: * + +Underscore: _ + +Left brace: { + +Right brace: } + +Left bracket: [ + +Right bracket: ] + +Left paren: ( + +Right paren: ) + +Greater-than: > + +Hash: # + +Period: . + +Bang: ! + +Plus: + + +Minus: - + + +----- + +== Links == + +=== Explicit === + +Just a [http://{{SERVERNAME}}/url/ URL]. + +[http://{{SERVERNAME}}/url/ URL and title]. + +[http://{{SERVERNAME}}/url/ URL and title]. + +[http://{{SERVERNAME}}/url/ URL and title]. + +[http://{{SERVERNAME}}/url/ URL and title] + +[http://{{SERVERNAME}}/url/ URL and title] + +[http://{{SERVERNAME}}/url/with_underscore with_underscore] + +[mailto:nobody@nowhere.net Email link] + +[http://{{SERVERNAME}}/ Empty]. + +=== Reference === + +Foo [http://{{SERVERNAME}}/url/ bar]. + +Foo [http://{{SERVERNAME}}/url/ bar]. + +Foo [http://{{SERVERNAME}}/url/ bar]. + +With [http://{{SERVERNAME}}/url/ embedded [brackets]]. + +[http://{{SERVERNAME}}/url/ b] by itself should be a link. + +Indented [http://{{SERVERNAME}}/url once]. + +Indented [http://{{SERVERNAME}}/url twice]. + +Indented [http://{{SERVERNAME}}/url thrice]. + +This should [not][] be a link. + +
    [not]: /url
    +Foo [http://{{SERVERNAME}}/url/ bar]. + +Foo [http://{{SERVERNAME}}/url/ biz]. + +=== With ampersands === + +Here’s a [http://example.com/?foo=1&bar=2 link with an ampersand in the URL]. + +Here’s a link with an amersand in the link text: [http://att.com/ AT&T]. + +Here’s an [http://{{SERVERNAME}}/script?foo=1&bar=2 inline link]. + +Here’s an [http://{{SERVERNAME}}/script?foo=1&bar=2 inline link in pointy braces]. + +=== Autolinks === + +With an ampersand: http://example.com/?foo=1&bar=2 + +* In a list? +* http://example.com/ +* It should. +An e-mail address: [mailto:nobody@nowhere.net nobody@nowhere.net] + +
    Blockquoted: http://example.com/ +
    +Auto-links should not occur here: <http://example.com/> + +
    or here: <http://example.com/>
    + +----- + +== Images == + +From “Voyage dans la Lune” by Georges Melies (1902): + +[[Image:lalune.jpg|Voyage dans la Lune]] + +Here is a movie [[Image:movie.jpg|movie]] icon. + + +----- + +== Footnotes == + +Here is a footnote reference,Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. + and another.Here’s the long note. This one contains multiple blocks. + +Subsequent blocks are indented to show that they belong to the footnote (as with list items). + +
      { <code> }
    +If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. +
    This should ''not'' be a footnote reference, because it contains a space.[^my note] Here is an inline note.This is ''easier'' to type. Inline notes may contain [http://google.com links] and ] verbatim characters, as well as [bracketed text]. + + +
    Notes can go in quotes.In quote. + +
    +# And in list items.In list. +This paragraph should not be part of the note, as it is not indented. + +== Notes == + diff --git a/web/demos b/web/demos index 2d9ab4340..127242b40 100644 --- a/web/demos +++ b/web/demos @@ -102,3 +102,7 @@ click on the name of the output file: @ markdown2odt @@README@@ -o @@example21.odt@@ +22. MediaWiki markup: + +@ pandoc -s -S -w mediawiki --toc @@README@@ -o @@example22.wiki@@ + diff --git a/web/index.txt.in b/web/index.txt.in index cf9048225..d4c91c586 100644 --- a/web/index.txt.in +++ b/web/index.txt.in @@ -6,8 +6,8 @@ Pandoc is a [Haskell] library for converting from one markup format to another, and a command-line tool that uses this library. It can read [markdown] and (subsets of) [reStructuredText], [HTML], and [LaTeX], and it can write [markdown], [reStructuredText], [HTML], [LaTeX], [ConTeXt], -[RTF], [DocBook XML], [OpenDocument XML], [GNU Texinfo], [groff man] -pages, and [S5] HTML slide shows. +[RTF], [DocBook XML], [OpenDocument XML], [GNU Texinfo], [MediaWiki markup], +[groff man] pages, and [S5] HTML slide shows. Pandoc features @@ -170,6 +170,7 @@ kind. [RTF]: http://en.wikipedia.org/wiki/Rich_Text_Format [DocBook XML]: http://www.docbook.org/ [OpenDocument XML]: http://opendocument.xml.org/ +[MediaWiki markup]: http://www.mediawiki.org/wiki/Help:Formatting [groff man]: http://developer.apple.com/DOCUMENTATION/Darwin/Reference/ManPages/man7/groff_man.7.html [GNU Texinfo]: http://www.gnu.org/software/texinfo/ [Haskell]: http://www.haskell.org/