MediaWiki reader: Support HTML lists.

This commit is contained in:
John MacFarlane 2012-09-13 14:47:11 -07:00
parent 3fe6ea4c41
commit 81bec8558c
3 changed files with 66 additions and 12 deletions

View file

@ -30,9 +30,6 @@ Conversion of mediawiki text to 'Pandoc' document.
-}
{-
TODO:
_ support HTML lists
_ support list style attributes and start values in ol lists, also
value attribute on li
_ support internal links http://www.mediawiki.org/wiki/Help:Links
_ support external links (partially implemented)
_ support images http://www.mediawiki.org/wiki/Help:Images
@ -50,7 +47,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,
import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing
import Text.Pandoc.Generic ( bottomUp )
import Text.Pandoc.Shared ( stripTrailingNewlines )
import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
import Data.Monoid (mconcat, mempty)
import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
@ -121,8 +118,8 @@ block :: MWParser Blocks
block = mempty <$ skipMany1 blankline
<|> header
<|> hrule
<|> bulletList
<|> orderedList
<|> bulletList
<|> definitionList
<|> mempty <$ try (spaces *> htmlComment)
<|> preformatted
@ -151,7 +148,7 @@ blockTag = do
"pre" -> B.codeBlock . trimCode <$> charsInTags "pre"
"syntaxhighlight" -> syntaxhighlight attrs
"haskell" -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
charsInTags "haskell"
charsInTags "haskell"
"p" -> return mempty
_ -> return $ B.rawBlock "html" raw
@ -207,10 +204,23 @@ header = try $ do
return $ B.header lev contents
bulletList :: MWParser Blocks
bulletList = B.bulletList <$> many1 (listItem '*')
bulletList = B.bulletList <$>
( many1 (listItem '*')
<|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
optional (htmlTag (~== TagClose "ul"))) )
orderedList :: MWParser Blocks
orderedList = B.orderedList <$> many1 (listItem '#')
orderedList =
(B.orderedList <$> many1 (listItem '#'))
<|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *>
many (listItem '#' <|> li) <*
optional (htmlTag (~== TagClose "ul"))))
<|> do (tag,_) <- htmlTag (~== TagOpen "ol" [])
spaces
items <- many (listItem '#' <|> li)
optional (htmlTag (~== TagClose "ol"))
let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
definitionList :: MWParser Blocks
definitionList = B.definitionList <$> many1 defListItem
@ -237,6 +247,10 @@ anyListStart = char '*'
<|> char ':'
<|> char ';'
li :: MWParser Blocks
li = htmlTag (~== TagOpen "li" []) *>
(firstParaToPlain <$> blocksInTags "li") <* spaces
listItem :: Char -> MWParser Blocks
listItem c = try $ do
extras <- many (try $ char c <* lookAhead listStartChar)
@ -261,11 +275,14 @@ listItem' c = try $ do
first <- manyTill anyChar newline
rest <- many (try $ char c *> lookAhead listStartChar *>
manyTill anyChar newline)
contents <- parseFromString (mconcat <$> many1 block)
$ unlines $ first : rest
parseFromString (firstParaToPlain . mconcat <$> many1 block)
$ unlines $ first : rest
firstParaToPlain :: Blocks -> Blocks
firstParaToPlain contents =
case viewl (B.unMany contents) of
(Para xs) :< ys -> return $ B.Many $ (Plain xs) <| ys
_ -> return contents
(Para xs) :< ys -> B.Many $ (Plain xs) <| ys
_ -> contents
--
-- inline parsers

View file

@ -130,6 +130,21 @@ Pandoc (Meta {docTitle = [], docAuthors = [], docDate = []})
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Plain [Str "five",Space,Str "sub",Space,Str "1",Space,Str "sub",Space,Str "1"]]]]
,[Plain [Str "five",Space,Str "sub",Space,Str "2"]]]]]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Plain [Str "list",Space,Str "item",Space,Emph [Str "emph"]]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Plain [Str "list",Space,Str "item",Space,Str "B1"]]
,[Plain [Str "list",Space,Str "item",Space,Str "B2"]]]
,Para [Str "continuing",Space,Str "list",Space,Str "item",Space,Str "A1"]]
,[Plain [Str "list",Space,Str "item",Space,Str "A2"]]]
,OrderedList (1,DefaultStyle,DefaultDelim)
[[Plain [Str "abc"]]
,[Plain [Str "def"]]
,[Plain [Str "ghi"]]]
,OrderedList (9,DefaultStyle,DefaultDelim)
[[Plain [Str "Amsterdam"]]
,[Plain [Str "Rotterdam"]]
,[Plain [Str "The",Space,Str "Hague"]]]
,Header 2 [Str "math"]
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
,Header 2 [Str "preformatted",Space,Str "blocks"]

View file

@ -189,6 +189,28 @@ ends the list.
### five sub 1 sub 1
## five sub 2
<ol>
<li>list item ''emph''
<ol>
<li>list item B1</li>
<li>list item B2</li>
</ol>continuing list item A1
</li>
<li>list item A2</li>
</ol>
<ul>
#abc
#def
#ghi
</ul>
<ol start="9">
<li>Amsterdam</li>
<li>Rotterdam</li>
<li>The Hague</li>
</ol>
== math ==
Here is some <math>x=\frac{y^\pi}{z}</math>.