MediaWiki reader: Add automatic header identifiers.
@ -1,4 +1,5 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
Copyright (C) 2012 John MacFarlane <>
@ -51,6 +52,7 @@ import Data.List (intersperse, intercalate, isPrefixOf )
import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))
import qualified Data.Foldable as F
import qualified Data.Map as M
import Data.Char (isDigit, isSpace)
-- | Read mediawiki from an input string and return a Pandoc document.
@ -62,6 +64,8 @@ readMediaWiki opts s =
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = []
"source" (s ++ "\n") of
Left err' -> error $ "\nError:\n" ++ show err'
@ -71,10 +75,23 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
, mwIdentifierList :: [String]
type MWParser = Parser [Char] MWState
instance HasReaderOptions MWParser where
askReaderOption f = (f . mwOptions) `fmap` getState
instance HasHeaderMap MWParser where
getHeaderMap = fmap mwHeaderMap getState
putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm }
instance HasIdentifierList MWParser where
getIdentifierList = fmap mwIdentifierList getState
putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l }
-- auxiliary functions
@ -351,7 +368,8 @@ header = try $ do
let lev = length eqs
guard $ lev <= 6
contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
return $ B.header lev contents
attr <- registerHeader nullAttr contents
return $ B.headerWith attr lev contents
bulletList :: MWParser Blocks
bulletList = B.bulletList <$>
@ -1,39 +1,39 @@
Pandoc (Meta {unMeta = fromList []})
[Header 1 ("",[],[]) [Str "header"]
,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"]
,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"]
,Header 4 ("",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"]
,Header 5 ("",[],[]) [Str "header",Space,Str "level",Space,Str "5"]
,Header 6 ("",[],[]) [Str "header",Space,Str "level",Space,Str "6"]
[Header 1 ("header",[],[]) [Str "header"]
,Header 2 ("header-level-two",[],[]) [Str "header",Space,Str "level",Space,Str "two"]
,Header 3 ("header-level-3",[],[]) [Str "header",Space,Str "level",Space,Str "3"]
,Header 4 ("header-level-four",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"]
,Header 5 ("header-level-5",[],[]) [Str "header",Space,Str "level",Space,Str "5"]
,Header 6 ("header-level-6",[],[]) [Str "header",Space,Str "level",Space,Str "6"]
,Para [Str "=======",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "========"]
,Para [Code ("",[],[]) "==\160not\160a\160header\160=="]
,Header 2 ("",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"]
,Header 2 ("emph-and-strong",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"]
,Para [Emph [Str "emph"],Space,Strong [Str "strong"]]
,Para [Strong [Emph [Str "strong",Space,Str "and",Space,Str "emph"]]]
,Para [Strong [Emph [Str "emph",Space,Str "inside"],Space,Str "strong"]]
,Para [Strong [Str "strong",Space,Str "with",Space,Emph [Str "emph"]]]
,Para [Emph [Strong [Str "strong",Space,Str "inside"],Space,Str "emph"]]
,Header 2 ("",[],[]) [Str "horizontal",Space,Str "rule"]
,Header 2 ("horizontal-rule",[],[]) [Str "horizontal",Space,Str "rule"]
,Para [Str "top"]
,Para [Str "bottom"]
,Header 2 ("",[],[]) [Str "nowiki"]
,Header 2 ("nowiki",[],[]) [Str "nowiki"]
,Para [Str "''not",Space,Str "emph''"]
,Header 2 ("",[],[]) [Str "strikeout"]
,Header 2 ("strikeout",[],[]) [Str "strikeout"]
,Para [Strikeout [Str "This",Space,Str "is",Space,Emph [Str "struck",Space,Str "out"]]]
,Header 2 ("",[],[]) [Str "entities"]
,Header 2 ("entities",[],[]) [Str "entities"]
,Para [Str "hi",Space,Str "&",Space,Str "low"]
,Para [Str "hi",Space,Str "&",Space,Str "low"]
,Para [Str "G\246del"]
,Para [Str "\777\2730"]
,Header 2 ("",[],[]) [Str "comments"]
,Header 2 ("comments",[],[]) [Str "comments"]
,Para [Str "inline",Space,Str "comment"]
,Para [Str "between",Space,Str "blocks"]
,Header 2 ("",[],[]) [Str "linebreaks"]
,Header 2 ("linebreaks",[],[]) [Str "linebreaks"]
,Para [Str "hi",LineBreak,Str "there"]
,Para [Str "hi",LineBreak,Str "there"]
,Header 2 ("",[],[]) [Str ":",Space,Str "indents"]
,Header 2 ("indents",[],[]) [Str ":",Space,Str "indents"]
,Para [Str "hi"]
@ -46,36 +46,36 @@ Pandoc (Meta {unMeta = fromList []})
[[Plain [Str "there"]]])]]])]
,Para [Str "bud"]
,Header 2 ("",[],[]) [Str "p",Space,Str "tags"]
,Header 2 ("p-tags",[],[]) [Str "p",Space,Str "tags"]
,Para [Str "hi",Space,Str "there"]
,Para [Str "bud"]
,Para [Str "another"]
,Header 2 ("",[],[]) [Str "raw",Space,Str "html"]
,Header 2 ("raw-html",[],[]) [Str "raw",Space,Str "html"]
,Para [Str "hi",Space,RawInline (Format "html") "<span style=\"color:red\">",Emph [Str "there"],RawInline (Format "html") "</span>",Str "."]
,Para [RawInline (Format "html") "<ins>",Str "inserted",RawInline (Format "html") "</ins>"]
,RawBlock (Format "html") "<div class=\"special\">"
,Para [Str "hi",Space,Emph [Str "there"]]
,RawBlock (Format "html") "</div>"
,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"]
,Header 2 ("sup-sub-del",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"]
,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]]
,Header 2 ("",[],[]) [Str "inline",Space,Str "code"]
,Header 2 ("inline-code",[],[]) [Str "inline",Space,Str "code"]
,Para [Code ("",[],[]) "*\8594*",Space,Code ("",[],[]) "typed",Space,Code ("",["haskell"],[]) ">>="]
,Header 2 ("",[],[]) [Str "code",Space,Str "blocks"]
,Header 2 ("code-blocks",[],[]) [Str "code",Space,Str "blocks"]
,CodeBlock ("",[],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']"
,CodeBlock ("",["haskell"],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']"
,CodeBlock ("",["ruby","numberLines"],[("startFrom","100")]) "widgets.each do |w|\n print w.price\nend"
,Header 2 ("",[],[]) [Str "block",Space,Str "quotes"]
,Header 2 ("block-quotes",[],[]) [Str "block",Space,Str "quotes"]
,Para [Str "Regular",Space,Str "paragraph"]
[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."]
,Para [Str "With",Space,Str "two",Space,Str "paragraphs."]]
,Para [Str "Nother",Space,Str "paragraph."]
,Header 2 ("",[],[]) [Str "external",Space,Str "links"]
,Header 2 ("external-links",[],[]) [Str "external",Space,Str "links"]
,Para [Link [Emph [Str "Google"],Space,Str "search",Space,Str "engine"] ("","")]
,Para [Link [Str ""] ("","")]
,Para [Link [Str "1"] ("",""),Space,Link [Str "2"] ("","")]
,Para [Link [Str "email",Space,Str "me"] ("","")]
,Header 2 ("",[],[]) [Str "internal",Space,Str "links"]
,Header 2 ("internal-links",[],[]) [Str "internal",Space,Str "links"]
,Para [Link [Str "Help"] ("Help","wikilink")]
,Para [Link [Str "the",Space,Str "help",Space,Str "page"] ("Help","wikilink")]
,Para [Link [Str "Helpers"] ("Help","wikilink")]
@ -83,12 +83,12 @@ Pandoc (Meta {unMeta = fromList []})
,Para [Link [Str "Contents"] ("Help:Contents","wikilink")]
,Para [Link [Str "#My",Space,Str "anchor"] ("#My_anchor","wikilink")]
,Para [Link [Str "and",Space,Str "text"] ("Page#with_anchor","wikilink")]
,Header 2 ("",[],[]) [Str "images"]
,Header 2 ("images",[],[]) [Str "images"]
,Para [Image [Str "caption"] ("example.jpg","fig:caption")]
,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("","")] ("example.jpg","fig:the caption with external link")]
,Para [Image [Str "caption"] ("example.jpg","fig:caption")]
,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")]
,Header 2 ("",[],[]) [Str "lists"]
,Header 2 ("lists",[],[]) [Str "lists"]
[[Plain [Str "Start",Space,Str "each",Space,Str "line"]]
,[Plain [Str "with",Space,Str "an",Space,Str "asterisk",Space,Str "(*)."]
@ -161,10 +161,10 @@ Pandoc (Meta {unMeta = fromList []})
[[Plain [Str "Amsterdam"]]
,[Plain [Str "Rotterdam"]]
,[Plain [Str "The",Space,Str "Hague"]]]
,Header 2 ("",[],[]) [Str "math"]
,Header 2 ("math",[],[]) [Str "math"]
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
,Para [Str "With",Space,Str "spaces:",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
,Header 2 ("",[],[]) [Str "preformatted",Space,Str "blocks"]
,Header 2 ("preformatted-blocks",[],[]) [Str "preformatted",Space,Str "blocks"]
,Para [Code ("",[],[]) "Start\160each\160line\160with\160a\160space.",LineBreak,Code ("",[],[]) "Text\160is\160",Strong [Code ("",[],[]) "preformatted"],Code ("",[],[]) "\160and",LineBreak,Emph [Code ("",[],[]) "markups"],Code ("",[],[]) "\160",Strong [Emph [Code ("",[],[]) "can"]],Code ("",[],[]) "\160be\160done."]
,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"]
,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160<nowiki>).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"]
@ -174,12 +174,12 @@ Pandoc (Meta {unMeta = fromList []})
,Para [Str "Don't",Space,Str "need"]
,Para [Code ("",[],[]) "a\160blank\160line"]
,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."]
,Header 2 ("",[],[]) [Str "templates"]
,Header 2 ("templates",[],[]) [Str "templates"]
,RawBlock (Format "mediawiki") "{{Welcome}}"
,RawBlock (Format "mediawiki") "{{Foo:Bar}}"
,RawBlock (Format "mediawiki") "{{Thankyou|all your effort|Me}}"
,Para [Str "Written",Space,RawInline (Format "mediawiki") "{{{date}}}",Space,Str "by",Space,RawInline (Format "mediawiki") "{{{name}}}",Str "."]
,Header 2 ("",[],[]) [Str "tables"]
,Header 2 ("tables",[],[]) [Str "tables"]
,Table [] [AlignDefault,AlignDefault] [0.0,0.0]
@ -245,6 +245,6 @@ Pandoc (Meta {unMeta = fromList []})
[[[Para [Str "Orange"]]]]
,Para [Str "Paragraph",Space,Str "after",Space,Str "the",Space,Str "table."]
,Header 2 ("",[],[]) [Str "notes"]
,Header 2 ("notes",[],[]) [Str "notes"]
,Para [Str "My",Space,Str "note!",Note [Plain [Str "This."]]]
,Para [Str "URL",Space,Str "note.",Note [Plain [Link [Str ""] ("","")]]]]
