MediaWiki reader: Add automatic header identifiers.
This commit is contained in:
parent
d5660275a3
commit
0fd2176e29
2 changed files with 49 additions and 31 deletions
|
@ -1,4 +1,5 @@
|
|||
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
|
||||
{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
|
||||
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
|
||||
{-
|
||||
Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
|
@ -51,6 +52,7 @@ import Data.List (intersperse, intercalate, isPrefixOf )
|
|||
import Text.HTML.TagSoup
|
||||
import Data.Sequence (viewl, ViewL(..), (<|))
|
||||
import qualified Data.Foldable as F
|
||||
import qualified Data.Map as M
|
||||
import Data.Char (isDigit, isSpace)
|
||||
|
||||
-- | Read mediawiki from an input string and return a Pandoc document.
|
||||
|
@ -62,6 +64,8 @@ readMediaWiki opts s =
|
|||
, mwMaxNestingLevel = 4
|
||||
, mwNextLinkNumber = 1
|
||||
, mwCategoryLinks = []
|
||||
, mwHeaderMap = M.empty
|
||||
, mwIdentifierList = []
|
||||
}
|
||||
"source" (s ++ "\n") of
|
||||
Left err' -> error $ "\nError:\n" ++ show err'
|
||||
|
@ -71,10 +75,23 @@ data MWState = MWState { mwOptions :: ReaderOptions
|
|||
, mwMaxNestingLevel :: Int
|
||||
, mwNextLinkNumber :: Int
|
||||
, mwCategoryLinks :: [Inlines]
|
||||
, mwHeaderMap :: M.Map Inlines String
|
||||
, mwIdentifierList :: [String]
|
||||
}
|
||||
|
||||
type MWParser = Parser [Char] MWState
|
||||
|
||||
instance HasReaderOptions MWParser where
|
||||
askReaderOption f = (f . mwOptions) `fmap` getState
|
||||
|
||||
instance HasHeaderMap MWParser where
|
||||
getHeaderMap = fmap mwHeaderMap getState
|
||||
putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm }
|
||||
|
||||
instance HasIdentifierList MWParser where
|
||||
getIdentifierList = fmap mwIdentifierList getState
|
||||
putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l }
|
||||
|
||||
--
|
||||
-- auxiliary functions
|
||||
--
|
||||
|
@ -351,7 +368,8 @@ header = try $ do
|
|||
let lev = length eqs
|
||||
guard $ lev <= 6
|
||||
contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
|
||||
return $ B.header lev contents
|
||||
attr <- registerHeader nullAttr contents
|
||||
return $ B.headerWith attr lev contents
|
||||
|
||||
bulletList :: MWParser Blocks
|
||||
bulletList = B.bulletList <$>
|
||||
|
|
|
@ -1,39 +1,39 @@
|
|||
Pandoc (Meta {unMeta = fromList []})
|
||||
[Header 1 ("",[],[]) [Str "header"]
|
||||
,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"]
|
||||
,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"]
|
||||
,Header 4 ("",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"]
|
||||
,Header 5 ("",[],[]) [Str "header",Space,Str "level",Space,Str "5"]
|
||||
,Header 6 ("",[],[]) [Str "header",Space,Str "level",Space,Str "6"]
|
||||
[Header 1 ("header",[],[]) [Str "header"]
|
||||
,Header 2 ("header-level-two",[],[]) [Str "header",Space,Str "level",Space,Str "two"]
|
||||
,Header 3 ("header-level-3",[],[]) [Str "header",Space,Str "level",Space,Str "3"]
|
||||
,Header 4 ("header-level-four",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"]
|
||||
,Header 5 ("header-level-5",[],[]) [Str "header",Space,Str "level",Space,Str "5"]
|
||||
,Header 6 ("header-level-6",[],[]) [Str "header",Space,Str "level",Space,Str "6"]
|
||||
,Para [Str "=======",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "========"]
|
||||
,Para [Code ("",[],[]) "==\160not\160a\160header\160=="]
|
||||
,Header 2 ("",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"]
|
||||
,Header 2 ("emph-and-strong",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"]
|
||||
,Para [Emph [Str "emph"],Space,Strong [Str "strong"]]
|
||||
,Para [Strong [Emph [Str "strong",Space,Str "and",Space,Str "emph"]]]
|
||||
,Para [Strong [Emph [Str "emph",Space,Str "inside"],Space,Str "strong"]]
|
||||
,Para [Strong [Str "strong",Space,Str "with",Space,Emph [Str "emph"]]]
|
||||
,Para [Emph [Strong [Str "strong",Space,Str "inside"],Space,Str "emph"]]
|
||||
,Header 2 ("",[],[]) [Str "horizontal",Space,Str "rule"]
|
||||
,Header 2 ("horizontal-rule",[],[]) [Str "horizontal",Space,Str "rule"]
|
||||
,Para [Str "top"]
|
||||
,HorizontalRule
|
||||
,Para [Str "bottom"]
|
||||
,HorizontalRule
|
||||
,Header 2 ("",[],[]) [Str "nowiki"]
|
||||
,Header 2 ("nowiki",[],[]) [Str "nowiki"]
|
||||
,Para [Str "''not",Space,Str "emph''"]
|
||||
,Header 2 ("",[],[]) [Str "strikeout"]
|
||||
,Header 2 ("strikeout",[],[]) [Str "strikeout"]
|
||||
,Para [Strikeout [Str "This",Space,Str "is",Space,Emph [Str "struck",Space,Str "out"]]]
|
||||
,Header 2 ("",[],[]) [Str "entities"]
|
||||
,Header 2 ("entities",[],[]) [Str "entities"]
|
||||
,Para [Str "hi",Space,Str "&",Space,Str "low"]
|
||||
,Para [Str "hi",Space,Str "&",Space,Str "low"]
|
||||
,Para [Str "G\246del"]
|
||||
,Para [Str "\777\2730"]
|
||||
,Header 2 ("",[],[]) [Str "comments"]
|
||||
,Header 2 ("comments",[],[]) [Str "comments"]
|
||||
,Para [Str "inline",Space,Str "comment"]
|
||||
,Para [Str "between",Space,Str "blocks"]
|
||||
,Header 2 ("",[],[]) [Str "linebreaks"]
|
||||
,Header 2 ("linebreaks",[],[]) [Str "linebreaks"]
|
||||
,Para [Str "hi",LineBreak,Str "there"]
|
||||
,Para [Str "hi",LineBreak,Str "there"]
|
||||
,Header 2 ("",[],[]) [Str ":",Space,Str "indents"]
|
||||
,Header 2 ("indents",[],[]) [Str ":",Space,Str "indents"]
|
||||
,Para [Str "hi"]
|
||||
,DefinitionList
|
||||
[([],
|
||||
|
@ -46,36 +46,36 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
[([],
|
||||
[[Plain [Str "there"]]])]]])]
|
||||
,Para [Str "bud"]
|
||||
,Header 2 ("",[],[]) [Str "p",Space,Str "tags"]
|
||||
,Header 2 ("p-tags",[],[]) [Str "p",Space,Str "tags"]
|
||||
,Para [Str "hi",Space,Str "there"]
|
||||
,Para [Str "bud"]
|
||||
,Para [Str "another"]
|
||||
,Header 2 ("",[],[]) [Str "raw",Space,Str "html"]
|
||||
,Header 2 ("raw-html",[],[]) [Str "raw",Space,Str "html"]
|
||||
,Para [Str "hi",Space,RawInline (Format "html") "<span style=\"color:red\">",Emph [Str "there"],RawInline (Format "html") "</span>",Str "."]
|
||||
,Para [RawInline (Format "html") "<ins>",Str "inserted",RawInline (Format "html") "</ins>"]
|
||||
,RawBlock (Format "html") "<div class=\"special\">"
|
||||
,Para [Str "hi",Space,Emph [Str "there"]]
|
||||
,RawBlock (Format "html") "</div>"
|
||||
,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"]
|
||||
,Header 2 ("sup-sub-del",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"]
|
||||
,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]]
|
||||
,Header 2 ("",[],[]) [Str "inline",Space,Str "code"]
|
||||
,Header 2 ("inline-code",[],[]) [Str "inline",Space,Str "code"]
|
||||
,Para [Code ("",[],[]) "*\8594*",Space,Code ("",[],[]) "typed",Space,Code ("",["haskell"],[]) ">>="]
|
||||
,Header 2 ("",[],[]) [Str "code",Space,Str "blocks"]
|
||||
,Header 2 ("code-blocks",[],[]) [Str "code",Space,Str "blocks"]
|
||||
,CodeBlock ("",[],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']"
|
||||
,CodeBlock ("",["haskell"],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']"
|
||||
,CodeBlock ("",["ruby","numberLines"],[("startFrom","100")]) "widgets.each do |w|\n print w.price\nend"
|
||||
,Header 2 ("",[],[]) [Str "block",Space,Str "quotes"]
|
||||
,Header 2 ("block-quotes",[],[]) [Str "block",Space,Str "quotes"]
|
||||
,Para [Str "Regular",Space,Str "paragraph"]
|
||||
,BlockQuote
|
||||
[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."]
|
||||
,Para [Str "With",Space,Str "two",Space,Str "paragraphs."]]
|
||||
,Para [Str "Nother",Space,Str "paragraph."]
|
||||
,Header 2 ("",[],[]) [Str "external",Space,Str "links"]
|
||||
,Header 2 ("external-links",[],[]) [Str "external",Space,Str "links"]
|
||||
,Para [Link [Emph [Str "Google"],Space,Str "search",Space,Str "engine"] ("http://google.com","")]
|
||||
,Para [Link [Str "http://johnmacfarlane.net/pandoc/"] ("http://johnmacfarlane.net/pandoc/","")]
|
||||
,Para [Link [Str "1"] ("http://google.com",""),Space,Link [Str "2"] ("http://yahoo.com","")]
|
||||
,Para [Link [Str "email",Space,Str "me"] ("mailto:info@example.org","")]
|
||||
,Header 2 ("",[],[]) [Str "internal",Space,Str "links"]
|
||||
,Header 2 ("internal-links",[],[]) [Str "internal",Space,Str "links"]
|
||||
,Para [Link [Str "Help"] ("Help","wikilink")]
|
||||
,Para [Link [Str "the",Space,Str "help",Space,Str "page"] ("Help","wikilink")]
|
||||
,Para [Link [Str "Helpers"] ("Help","wikilink")]
|
||||
|
@ -83,12 +83,12 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
,Para [Link [Str "Contents"] ("Help:Contents","wikilink")]
|
||||
,Para [Link [Str "#My",Space,Str "anchor"] ("#My_anchor","wikilink")]
|
||||
,Para [Link [Str "and",Space,Str "text"] ("Page#with_anchor","wikilink")]
|
||||
,Header 2 ("",[],[]) [Str "images"]
|
||||
,Header 2 ("images",[],[]) [Str "images"]
|
||||
,Para [Image [Str "caption"] ("example.jpg","fig:caption")]
|
||||
,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","fig:the caption with external link")]
|
||||
,Para [Image [Str "caption"] ("example.jpg","fig:caption")]
|
||||
,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")]
|
||||
,Header 2 ("",[],[]) [Str "lists"]
|
||||
,Header 2 ("lists",[],[]) [Str "lists"]
|
||||
,BulletList
|
||||
[[Plain [Str "Start",Space,Str "each",Space,Str "line"]]
|
||||
,[Plain [Str "with",Space,Str "an",Space,Str "asterisk",Space,Str "(*)."]
|
||||
|
@ -161,10 +161,10 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
[[Plain [Str "Amsterdam"]]
|
||||
,[Plain [Str "Rotterdam"]]
|
||||
,[Plain [Str "The",Space,Str "Hague"]]]
|
||||
,Header 2 ("",[],[]) [Str "math"]
|
||||
,Header 2 ("math",[],[]) [Str "math"]
|
||||
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
|
||||
,Para [Str "With",Space,Str "spaces:",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
|
||||
,Header 2 ("",[],[]) [Str "preformatted",Space,Str "blocks"]
|
||||
,Header 2 ("preformatted-blocks",[],[]) [Str "preformatted",Space,Str "blocks"]
|
||||
,Para [Code ("",[],[]) "Start\160each\160line\160with\160a\160space.",LineBreak,Code ("",[],[]) "Text\160is\160",Strong [Code ("",[],[]) "preformatted"],Code ("",[],[]) "\160and",LineBreak,Emph [Code ("",[],[]) "markups"],Code ("",[],[]) "\160",Strong [Emph [Code ("",[],[]) "can"]],Code ("",[],[]) "\160be\160done."]
|
||||
,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"]
|
||||
,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160<nowiki>).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"]
|
||||
|
@ -174,12 +174,12 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
,Para [Str "Don't",Space,Str "need"]
|
||||
,Para [Code ("",[],[]) "a\160blank\160line"]
|
||||
,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."]
|
||||
,Header 2 ("",[],[]) [Str "templates"]
|
||||
,Header 2 ("templates",[],[]) [Str "templates"]
|
||||
,RawBlock (Format "mediawiki") "{{Welcome}}"
|
||||
,RawBlock (Format "mediawiki") "{{Foo:Bar}}"
|
||||
,RawBlock (Format "mediawiki") "{{Thankyou|all your effort|Me}}"
|
||||
,Para [Str "Written",Space,RawInline (Format "mediawiki") "{{{date}}}",Space,Str "by",Space,RawInline (Format "mediawiki") "{{{name}}}",Str "."]
|
||||
,Header 2 ("",[],[]) [Str "tables"]
|
||||
,Header 2 ("tables",[],[]) [Str "tables"]
|
||||
,Table [] [AlignDefault,AlignDefault] [0.0,0.0]
|
||||
[[]
|
||||
,[]]
|
||||
|
@ -245,6 +245,6 @@ Pandoc (Meta {unMeta = fromList []})
|
|||
[[]]
|
||||
[[[Para [Str "Orange"]]]]
|
||||
,Para [Str "Paragraph",Space,Str "after",Space,Str "the",Space,Str "table."]
|
||||
,Header 2 ("",[],[]) [Str "notes"]
|
||||
,Header 2 ("notes",[],[]) [Str "notes"]
|
||||
,Para [Str "My",Space,Str "note!",Note [Plain [Str "This."]]]
|
||||
,Para [Str "URL",Space,Str "note.",Note [Plain [Link [Str "http://docs.python.org/library/functions.html#range"] ("http://docs.python.org/library/functions.html#range","")]]]]
|
||||
|
|
Loading…
Add table
Reference in a new issue