pandoc/src/Text/Pandoc/Readers/MediaWiki.hs

{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
{-
  Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Readers.MediaWiki
   Copyright   : Copyright (C) 2012 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Conversion of mediawiki text to 'Pandoc' document.
-}
{-
TODO:
_ support HTML lists
_ support list style attributes and start values in ol lists, also
  value attribute on li
_ support internal links http://www.mediawiki.org/wiki/Help:Links
_ support external links (partially implemented)
_ support images http://www.mediawiki.org/wiki/Help:Images
_ support tables http://www.mediawiki.org/wiki/Help:Tables
_ raw mediawiki:
  _ templates or anything in {{}} (can be postprocessed)
  _ category links
_ gallery tag?
-}
module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where

import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import Text.Pandoc.Options
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,
                                  isBlockTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing
import Text.Pandoc.Generic ( bottomUp )
import Text.Pandoc.Shared ( stripTrailingNewlines )
import Data.Monoid (mconcat, mempty)
import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
import Data.List (intersperse)
import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))

-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
               -> String        -- ^ String to parse (assuming @'\n'@ line endings)
               -> Pandoc
readMediaWiki opts s =
  (readWith parseMediaWiki) def{ stateOptions = opts } (s ++ "\n\n")

type MWParser = Parser [Char] ParserState

--
-- auxiliary functions
--

specialChars :: [Char]
specialChars = "'[]<=&*"

spaceChars :: [Char]
spaceChars = " \n\t"

sym :: String -> MWParser ()
sym s = () <$ try (string s)

htmlComment :: MWParser ()
htmlComment = () <$ htmlTag isCommentTag

inlinesInTags :: String -> MWParser Inlines
inlinesInTags tag = trimInlines . mconcat <$> try
  (htmlTag (~== TagOpen tag []) *>
   manyTill inline (htmlTag (~== TagClose tag)))

blocksInTags :: String -> MWParser Blocks
blocksInTags tag = mconcat <$> try
  (htmlTag (~== TagOpen tag []) *>
   manyTill block (htmlTag (~== TagClose tag)))

charsInTags :: String -> MWParser [Char]
charsInTags tag = innerText . parseTags <$> try
  (htmlTag (~== TagOpen tag []) *>
   manyTill anyChar (htmlTag (~== TagClose tag)))

--
-- main parser
--

parseMediaWiki :: MWParser Pandoc
parseMediaWiki = do
  bs <- mconcat <$> many block
  spaces
  eof
  return $ B.doc bs

--
-- block parsers
--

block :: MWParser Blocks
block = header
     <|> hrule
     <|> bulletList
     <|> orderedList
     <|> definitionList
     <|> mempty <$ try (spaces *> htmlComment)
     <|> preformatted
     <|> blockquote
     <|> codeblock
     <|> syntaxhighlight
     <|> haskell
     <|> mempty <$ skipMany1 blankline
     <|> pTag
     <|> blockHtml
     <|> para

para :: MWParser Blocks
para = B.para . trimInlines . mconcat <$> many1 inline

-- We can just skip pTags, as contents will be treated as paragraphs
pTag :: MWParser Blocks
pTag = mempty <$ (htmlTag (\t -> t ~== TagOpen "p" [] || t ~== TagClose "p"))

blockHtml :: MWParser Blocks
blockHtml = (B.rawBlock "html" . snd <$> htmlTag isBlockTag)

hrule :: MWParser Blocks
hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)

preformatted :: MWParser Blocks
preformatted = do
  char ' '
  let endline' = B.linebreak <$ (try $ newline <* char ' ')
  let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
  let spToNbsp ' ' = '\160'
      spToNbsp x   = x
  let nowiki' = mconcat . intersperse B.linebreak . map B.str .
                lines . fromEntities . map spToNbsp <$> try
                  (htmlTag (~== TagOpen "nowiki" []) *>
                   manyTill anyChar (htmlTag (~== TagClose "nowiki")))
  let inline' = whitespace' <|> endline' <|> nowiki' <|> inline
  let strToCode (Str s) = Code ("",[],[]) s
      strToCode  x      = x
  B.para . bottomUp strToCode . mconcat <$> many1 inline'

blockquote :: MWParser Blocks
blockquote = B.blockQuote <$> blocksInTags "blockquote"

codeblock :: MWParser Blocks
codeblock = B.codeBlock . trimCode <$> charsInTags "pre"

trimCode :: String -> String
trimCode ('\n':xs) = stripTrailingNewlines xs
trimCode xs        = stripTrailingNewlines xs

syntaxhighlight :: MWParser Blocks
syntaxhighlight = try $ do
  (TagOpen _ attrs, _) <- lookAhead
                          $ htmlTag (~== TagOpen "syntaxhighlight" [])
  let mblang = lookup "lang" attrs
  let mbstart = lookup "start" attrs
  let mbline = lookup "line" attrs
  let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline
  let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
  contents <- charsInTags "syntaxhighlight"
  return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents

haskell :: MWParser Blocks
haskell = B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
             charsInTags "haskell"

header :: MWParser Blocks
header = try $ do
  col <- sourceColumn <$> getPosition
  guard $ col == 1  -- header must be at beginning of line
  eqs <- many1 (char '=')
  let lev = length eqs
  guard $ lev <= 6
  contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
  return $ B.header lev contents

bulletList :: MWParser Blocks
bulletList = B.bulletList <$> many1 (listItem '*')

orderedList :: MWParser Blocks
orderedList = B.orderedList <$> many1 (listItem '#')

definitionList :: MWParser Blocks
definitionList = B.definitionList <$> many1 defListItem

defListItem :: MWParser (Inlines, [Blocks])
defListItem = try $ do
  terms <- mconcat . intersperse B.linebreak <$> many defListTerm
  defs  <- many1 $ listItem ':'
  return (terms, defs)

defListTerm  :: MWParser Inlines
defListTerm = char ';' >> skipMany spaceChar >> manyTill anyChar newline >>=
  parseFromString (trimInlines . mconcat <$> many inline)

listStart :: Char -> MWParser ()
listStart c = char c *> notFollowedBy listStartChar

listStartChar :: MWParser Char
listStartChar = oneOf "*#;:"

anyListStart :: MWParser Char
anyListStart =  char '*'
            <|> char '#'
            <|> char ':'
            <|> char ';'

listItem :: Char -> MWParser Blocks
listItem c = try $ do
  extras <- many (try $ char c <* lookAhead listStartChar)
  if null extras
     then listItem' c
     else do
       skipMany spaceChar
       first <- manyTill anyChar newline
       rest <- many (try $ string extras *> manyTill anyChar newline)
       contents <- parseFromString (many1 $ listItem' c)
                          (unlines (first : rest))
       case c of
           '*'  -> return $ B.bulletList contents
           '#'  -> return $ B.orderedList contents
           ':'  -> return $ B.definitionList [(mempty, contents)]
           _    -> mzero

listItem' :: Char -> MWParser Blocks
listItem' c = try $ do
  listStart c
  skipMany spaceChar
  first <- manyTill anyChar newline
  rest <- many (try $ char c *> lookAhead listStartChar *>
                   manyTill anyChar newline)
  contents <- parseFromString (mconcat <$> many1 block)
               $ unlines $ first : rest
  case viewl (B.unMany contents) of
       (Para xs) :< ys -> return $ B.Many $ (Plain xs) <| ys
       _               -> return contents

--
-- inline parsers
--

inline :: MWParser Inlines
inline =  whitespace
      <|> url
      <|> str
      <|> strong
      <|> emph
      <|> nowiki
      <|> linebreak
      <|> externalLink
      <|> strikeout
      <|> subscript
      <|> superscript
      <|> math
      <|> code
      <|> hask
      <|> B.singleton <$> charRef
      <|> inlineHtml
      <|> special

str :: MWParser Inlines
str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)

special :: MWParser Inlines
special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag) *>
                             oneOf specialChars)

inlineHtml :: MWParser Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag

whitespace :: MWParser Inlines
whitespace = B.space <$ (skipMany1 spaceChar <|> endline <|> htmlComment)

endline :: MWParser ()
endline = () <$ try (newline <*
                     notFollowedBy blankline <*
                     notFollowedBy' hrule <*
                     notFollowedBy anyListStart)

linebreak :: MWParser Inlines
linebreak = B.linebreak <$
  (htmlTag (~== TagOpen "br" []) *>
   optional (htmlTag (~== TagClose "br")) *>
   optional blankline)

externalLink :: MWParser Inlines
externalLink = try $ do
  char '['
  (_, src) <- uri
  skipMany1 spaceChar
  lab <- manyTill inline (char ']')
  let lab' = if null lab
                then [B.str "1"] -- TODO generate sequentially from state
                else lab
  return $ B.link src "" $ trimInlines $ mconcat lab'

url :: MWParser Inlines
url = do
  (orig, src) <- uri
  return $ B.link src "" (B.str orig)

nowiki :: MWParser Inlines
nowiki = B.text . fromEntities <$> try
  (htmlTag (~== TagOpen "nowiki" []) *>
   manyTill anyChar (htmlTag (~== TagClose "nowiki")))

strikeout :: MWParser Inlines
strikeout = B.strikeout <$> (inlinesInTags "strike" <|> inlinesInTags "del")

superscript :: MWParser Inlines
superscript = B.superscript <$> inlinesInTags "sup"

subscript :: MWParser Inlines
subscript = B.subscript <$> inlinesInTags "sub"

math :: MWParser Inlines
math = B.math <$> charsInTags "math"

code :: MWParser Inlines
code = B.code <$> (charsInTags "code" <|> charsInTags "tt")

hask :: MWParser Inlines
hask = B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"

-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
inlinesBetween start end =
  (trimInlines . mconcat) <$> try (start >> many1Till inner end)
    where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
          innerSpace = try $ whitespace >>~ notFollowedBy' end

emph :: MWParser Inlines
emph = B.emph <$> nested (inlinesBetween start end)
    where start = sym "''" >> lookAhead nonspaceChar
          end   = try $ notFollowedBy' (() <$ strong) >> sym "''"

strong :: MWParser Inlines
strong = B.strong <$> nested (inlinesBetween start end)
    where start = sym "'''" >> lookAhead nonspaceChar
          end   = try $ sym "'''"
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7`
			`{-`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00
			`This program is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation; either version 2 of the License, or`
			`(at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program; if not, write to the Free Software`
			`Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`
			`-}`

			`{- \|`
			`Module : Text.Pandoc.Readers.MediaWiki`
			`Copyright : Copyright (C) 2012 John MacFarlane`
			`License : GNU GPL, version 2 or above`

			`Maintainer : John MacFarlane <jgm@berkeley.edu>`
			`Stability : alpha`
			`Portability : portable`

			`Conversion of mediawiki text to 'Pandoc' document.`
			`-}`
			`{-`
			`TODO:`
			`_ support HTML lists`
			`_ support list style attributes and start values in ol lists, also`
			`value attribute on li`
			`_ support internal links http://www.mediawiki.org/wiki/Help:Links`
MediaWiki reader: Updated todo comments. 2012-09-12 19:09:45 -07:00			`_ support external links (partially implemented)`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`_ support images http://www.mediawiki.org/wiki/Help:Images`
			`_ support tables http://www.mediawiki.org/wiki/Help:Tables`
MediaWiki reader: Updated todo comments. 2012-09-12 19:09:45 -07:00			`_ raw mediawiki:`
			`_ templates or anything in {{}} (can be postprocessed)`
			`_ category links`
			`_ gallery tag?`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`-}`
			`module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where`

			`import Text.Pandoc.Definition`
			`import qualified Text.Pandoc.Builder as B`
			`import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))`
			`import Text.Pandoc.Options`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,`
			`isBlockTag, isCommentTag )`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`import Text.Pandoc.XML ( fromEntities )`
			`import Text.Pandoc.Parsing`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`import Text.Pandoc.Generic ( bottomUp )`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`import Text.Pandoc.Shared ( stripTrailingNewlines )`
			`import Data.Monoid (mconcat, mempty)`
			`import Control.Applicative ((<$>), (<), (>), (<$))`
			`import Control.Monad`
			`import Data.List (intersperse)`
			`import Text.HTML.TagSoup`
MediaWiki reader: Improvements to list parsing and HTML tag handling. 2012-09-12 17:15:21 -07:00			`import Data.Sequence (viewl, ViewL(..), (<\|))`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00
			`-- \| Read mediawiki from an input string and return a Pandoc document.`
			`readMediaWiki :: ReaderOptions -- ^ Reader options`
			`-> String -- ^ String to parse (assuming @'\n'@ line endings)`
			`-> Pandoc`
			`readMediaWiki opts s =`
			`(readWith parseMediaWiki) def{ stateOptions = opts } (s ++ "\n\n")`

			`type MWParser = Parser [Char] ParserState`

			`--`
			`-- auxiliary functions`
			`--`

			`specialChars :: [Char]`
			`specialChars = "'[]<=&*"`

			`spaceChars :: [Char]`
			`spaceChars = " \n\t"`

			`sym :: String -> MWParser ()`
			`sym s = () <$ try (string s)`

			`htmlComment :: MWParser ()`
			`htmlComment = () <$ htmlTag isCommentTag`

			`inlinesInTags :: String -> MWParser Inlines`
			`inlinesInTags tag = trimInlines . mconcat <$> try`
			`(htmlTag (~== TagOpen tag []) *>`
			`manyTill inline (htmlTag (~== TagClose tag)))`

			`blocksInTags :: String -> MWParser Blocks`
			`blocksInTags tag = mconcat <$> try`
			`(htmlTag (~== TagOpen tag []) *>`
			`manyTill block (htmlTag (~== TagClose tag)))`

			`charsInTags :: String -> MWParser [Char]`
MediaWiki reader: Fixed charsInTags parser to use innerText. 2012-09-12 16:15:52 -07:00			`charsInTags tag = innerText . parseTags <$> try`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`(htmlTag (~== TagOpen tag []) *>`
			`manyTill anyChar (htmlTag (~== TagClose tag)))`

			`--`
			`-- main parser`
			`--`

			`parseMediaWiki :: MWParser Pandoc`
			`parseMediaWiki = do`
			`bs <- mconcat <$> many block`
			`spaces`
			`eof`
			`return $ B.doc bs`

			`--`
			`-- block parsers`
			`--`

			`block :: MWParser Blocks`
			`block = header`
			`<\|> hrule`
			`<\|> bulletList`
			`<\|> orderedList`
			`<\|> definitionList`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`<\|> mempty <$ try (spaces *> htmlComment)`
			`<\|> preformatted`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`<\|> blockquote`
			`<\|> codeblock`
MediaWiki reader: Support `<syntaxhighlight>` tag. 2012-09-12 19:05:15 -07:00			`<\|> syntaxhighlight`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`<\|> haskell`
			`<\|> mempty <$ skipMany1 blankline`
MediaWiki reader: Improvements to list parsing and HTML tag handling. 2012-09-12 17:15:21 -07:00			`<\|> pTag`
			`<\|> blockHtml`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`<\|> para`

			`para :: MWParser Blocks`
			`para = B.para . trimInlines . mconcat <$> many1 inline`

MediaWiki reader: Improvements to list parsing and HTML tag handling. 2012-09-12 17:15:21 -07:00			`-- We can just skip pTags, as contents will be treated as paragraphs`
			`pTag :: MWParser Blocks`
			`pTag = mempty <$ (htmlTag (\t -> t ~== TagOpen "p" [] \|\| t ~== TagClose "p"))`

			`blockHtml :: MWParser Blocks`
			`blockHtml = (B.rawBlock "html" . snd <$> htmlTag isBlockTag)`

Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`hrule :: MWParser Blocks`
			`hrule = B.horizontalRule <$ try (string "----" > many (char '-') > newline)`

MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`preformatted :: MWParser Blocks`
			`preformatted = do`
			`char ' '`
			`let endline' = B.linebreak <$ (try $ newline <* char ' ')`
			`let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)`
			`let spToNbsp ' ' = '\160'`
			`spToNbsp x = x`
			`let nowiki' = mconcat . intersperse B.linebreak . map B.str .`
			`lines . fromEntities . map spToNbsp <$> try`
			`(htmlTag (~== TagOpen "nowiki" []) *>`
			`manyTill anyChar (htmlTag (~== TagClose "nowiki")))`
			`let inline' = whitespace' <\|> endline' <\|> nowiki' <\|> inline`
			`let strToCode (Str s) = Code ("",[],[]) s`
			`strToCode x = x`
			`B.para . bottomUp strToCode . mconcat <$> many1 inline'`

Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`blockquote :: MWParser Blocks`
			`blockquote = B.blockQuote <$> blocksInTags "blockquote"`

			`codeblock :: MWParser Blocks`
			`codeblock = B.codeBlock . trimCode <$> charsInTags "pre"`

			`trimCode :: String -> String`
			`trimCode ('\n':xs) = stripTrailingNewlines xs`
			`trimCode xs = stripTrailingNewlines xs`

MediaWiki reader: Support `<syntaxhighlight>` tag. 2012-09-12 19:05:15 -07:00			`syntaxhighlight :: MWParser Blocks`
			`syntaxhighlight = try $ do`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`(TagOpen _ attrs, _) <- lookAhead`
			`$ htmlTag (~== TagOpen "syntaxhighlight" [])`
MediaWiki reader: Support `<syntaxhighlight>` tag. 2012-09-12 19:05:15 -07:00			`let mblang = lookup "lang" attrs`
			`let mbstart = lookup "start" attrs`
			`let mbline = lookup "line" attrs`
			`let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline`
			`let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart`
			`contents <- charsInTags "syntaxhighlight"`
More mediawiki reader tests. 2012-09-12 19:35:06 -07:00			`return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents`
MediaWiki reader: Support `<syntaxhighlight>` tag. 2012-09-12 19:05:15 -07:00
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`haskell :: MWParser Blocks`
			`haskell = B.codeBlockWith ("",["haskell"],[]) . trimCode <$>`
			`charsInTags "haskell"`

			`header :: MWParser Blocks`
			`header = try $ do`
			`col <- sourceColumn <$> getPosition`
			`guard $ col == 1 -- header must be at beginning of line`
			`eqs <- many1 (char '=')`
			`let lev = length eqs`
			`guard $ lev <= 6`
			`contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')`
			`return $ B.header lev contents`

			`bulletList :: MWParser Blocks`
			`bulletList = B.bulletList <$> many1 (listItem '*')`

			`orderedList :: MWParser Blocks`
			`orderedList = B.orderedList <$> many1 (listItem '#')`

			`definitionList :: MWParser Blocks`
			`definitionList = B.definitionList <$> many1 defListItem`

			`defListItem :: MWParser (Inlines, [Blocks])`
			`defListItem = try $ do`
MediaWiki reader: Handle def lists without terms (: lines). 2012-09-12 17:29:51 -07:00			`terms <- mconcat . intersperse B.linebreak <$> many defListTerm`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`defs <- many1 $ listItem ':'`
			`return (terms, defs)`

			`defListTerm :: MWParser Inlines`
			`defListTerm = char ';' >> skipMany spaceChar >> manyTill anyChar newline >>=`
			`parseFromString (trimInlines . mconcat <$> many inline)`

			`listStart :: Char -> MWParser ()`
			`listStart c = char c *> notFollowedBy listStartChar`

			`listStartChar :: MWParser Char`
			`listStartChar = oneOf "*#;:"`

MediaWiki reader: Properly handle :, ::, :::... 2012-09-12 17:40:15 -07:00			`anyListStart :: MWParser Char`
			`anyListStart = char '*'`
			`<\|> char '#'`
			`<\|> char ':'`
			`<\|> char ';'`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00
			`listItem :: Char -> MWParser Blocks`
			`listItem c = try $ do`
MediaWiki reader: Support skipped level in lists. For example: # one # two ### skip! ### skip 2012-09-12 16:47:30 -07:00			`extras <- many (try $ char c <* lookAhead listStartChar)`
			`if null extras`
			`then listItem' c`
			`else do`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`skipMany spaceChar`
MediaWiki reader: Support skipped level in lists. For example: # one # two ### skip! ### skip 2012-09-12 16:47:30 -07:00			`first <- manyTill anyChar newline`
			`rest <- many (try $ string extras *> manyTill anyChar newline)`
			`contents <- parseFromString (many1 $ listItem' c)`
			`(unlines (first : rest))`
			`case c of`
			`'*' -> return $ B.bulletList contents`
			`'#' -> return $ B.orderedList contents`
MediaWiki reader: Properly handle :, ::, :::... 2012-09-12 17:40:15 -07:00			`':' -> return $ B.definitionList [(mempty, contents)]`
MediaWiki reader: Support skipped level in lists. For example: # one # two ### skip! ### skip 2012-09-12 16:47:30 -07:00			`_ -> mzero`

			`listItem' :: Char -> MWParser Blocks`
			`listItem' c = try $ do`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`listStart c`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`skipMany spaceChar`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`first <- manyTill anyChar newline`
			`rest <- many (try $ char c > lookAhead listStartChar >`
			`manyTill anyChar newline)`
MediaWiki reader: Improvements to list parsing and HTML tag handling. 2012-09-12 17:15:21 -07:00			`contents <- parseFromString (mconcat <$> many1 block)`
			`$ unlines $ first : rest`
			`case viewl (B.unMany contents) of`
MediaWiki reader: preformatted blocks and tests. 2012-09-12 22:44:11 -07:00			`(Para xs) :< ys -> return $ B.Many $ (Plain xs) <\| ys`
			`_ -> return contents`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00
			`--`
			`-- inline parsers`
			`--`

			`inline :: MWParser Inlines`
			`inline = whitespace`
			`<\|> url`
			`<\|> str`
			`<\|> strong`
			`<\|> emph`
			`<\|> nowiki`
			`<\|> linebreak`
			`<\|> externalLink`
			`<\|> strikeout`
			`<\|> subscript`
			`<\|> superscript`
MediaWiki reader: Support `<math>` tag. 2012-09-12 19:54:56 -07:00			`<\|> math`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`<\|> code`
			`<\|> hask`
			`<\|> B.singleton <$> charRef`
			`<\|> inlineHtml`
			`<\|> special`

			`str :: MWParser Inlines`
			`str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)`

			`special :: MWParser Inlines`
			`special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag) *>`
			`oneOf specialChars)`

			`inlineHtml :: MWParser Inlines`
			`inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag`

			`whitespace :: MWParser Inlines`
			`whitespace = B.space <$ (skipMany1 spaceChar <\|> endline <\|> htmlComment)`

			`endline :: MWParser ()`
			`endline = () <$ try (newline <*`
			`notFollowedBy blankline <*`
			`notFollowedBy' hrule <*`
			`notFollowedBy anyListStart)`

			`linebreak :: MWParser Inlines`
			`linebreak = B.linebreak <$`
			`(htmlTag (~== TagOpen "br" []) *>`
			`optional (htmlTag (~== TagClose "br")) *>`
			`optional blankline)`

			`externalLink :: MWParser Inlines`
			`externalLink = try $ do`
			`char '['`
Minor fixes to MediaWiki reader. 2012-09-12 09:29:00 -07:00			`(_, src) <- uri`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`skipMany1 spaceChar`
			`lab <- manyTill inline (char ']')`
			`let lab' = if null lab`
			`then [B.str "1"] -- TODO generate sequentially from state`
			`else lab`
			`return $ B.link src "" $ trimInlines $ mconcat lab'`

			`url :: MWParser Inlines`
			`url = do`
Minor fixes to MediaWiki reader. 2012-09-12 09:29:00 -07:00			`(orig, src) <- uri`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`return $ B.link src "" (B.str orig)`

			`nowiki :: MWParser Inlines`
Added tests for mediawiki lists. 2012-09-12 19:48:11 -07:00			`nowiki = B.text . fromEntities <$> try`
			`(htmlTag (~== TagOpen "nowiki" []) *>`
			`manyTill anyChar (htmlTag (~== TagClose "nowiki")))`
Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00
			`strikeout :: MWParser Inlines`
			`strikeout = B.strikeout <$> (inlinesInTags "strike" <\|> inlinesInTags "del")`

			`superscript :: MWParser Inlines`
			`superscript = B.superscript <$> inlinesInTags "sup"`

			`subscript :: MWParser Inlines`
			`subscript = B.subscript <$> inlinesInTags "sub"`

MediaWiki reader: Support `<math>` tag. 2012-09-12 19:54:56 -07:00			`math :: MWParser Inlines`
			`math = B.math <$> charsInTags "math"`

Added basic mediawiki reader. Text.Pandoc.Readers.MediaWiki module, tests/mediawiki-reader.{txt,native}. 2012-09-10 10:02:12 -07:00			`code :: MWParser Inlines`
			`code = B.code <$> (charsInTags "code" <\|> charsInTags "tt")`

			`hask :: MWParser Inlines`
			`hask = B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"`

			`-- \| Parses a list of inlines between start and end delimiters.`
			`inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines`
			`inlinesBetween start end =`
			`(trimInlines . mconcat) <$> try (start >> many1Till inner end)`
			`where inner = innerSpace <\|> (notFollowedBy' (() <$ whitespace) >> inline)`
			`innerSpace = try $ whitespace >>~ notFollowedBy' end`

			`emph :: MWParser Inlines`
			`emph = B.emph <$> nested (inlinesBetween start end)`
			`where start = sym "''" >> lookAhead nonspaceChar`
			`end = try $ notFollowedBy' (() <$ strong) >> sym "''"`

			`strong :: MWParser Inlines`
			`strong = B.strong <$> nested (inlinesBetween start end)`
			`where start = sym "'''" >> lookAhead nonspaceChar`
			`end = try $ sym "'''"`