MediaWiki reader: Support HTML lists.
This commit is contained in:
parent
3fe6ea4c41
commit
81bec8558c
3 changed files with 66 additions and 12 deletions
|
@ -30,9 +30,6 @@ Conversion of mediawiki text to 'Pandoc' document.
|
|||
-}
|
||||
{-
|
||||
TODO:
|
||||
_ support HTML lists
|
||||
_ support list style attributes and start values in ol lists, also
|
||||
value attribute on li
|
||||
_ support internal links http://www.mediawiki.org/wiki/Help:Links
|
||||
_ support external links (partially implemented)
|
||||
_ support images http://www.mediawiki.org/wiki/Help:Images
|
||||
|
@ -50,7 +47,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,
|
|||
import Text.Pandoc.XML ( fromEntities )
|
||||
import Text.Pandoc.Parsing
|
||||
import Text.Pandoc.Generic ( bottomUp )
|
||||
import Text.Pandoc.Shared ( stripTrailingNewlines )
|
||||
import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
|
||||
import Data.Monoid (mconcat, mempty)
|
||||
import Control.Applicative ((<$>), (<*), (*>), (<$))
|
||||
import Control.Monad
|
||||
|
@ -121,8 +118,8 @@ block :: MWParser Blocks
|
|||
block = mempty <$ skipMany1 blankline
|
||||
<|> header
|
||||
<|> hrule
|
||||
<|> bulletList
|
||||
<|> orderedList
|
||||
<|> bulletList
|
||||
<|> definitionList
|
||||
<|> mempty <$ try (spaces *> htmlComment)
|
||||
<|> preformatted
|
||||
|
@ -151,7 +148,7 @@ blockTag = do
|
|||
"pre" -> B.codeBlock . trimCode <$> charsInTags "pre"
|
||||
"syntaxhighlight" -> syntaxhighlight attrs
|
||||
"haskell" -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
|
||||
charsInTags "haskell"
|
||||
charsInTags "haskell"
|
||||
"p" -> return mempty
|
||||
_ -> return $ B.rawBlock "html" raw
|
||||
|
||||
|
@ -207,10 +204,23 @@ header = try $ do
|
|||
return $ B.header lev contents
|
||||
|
||||
bulletList :: MWParser Blocks
|
||||
bulletList = B.bulletList <$> many1 (listItem '*')
|
||||
bulletList = B.bulletList <$>
|
||||
( many1 (listItem '*')
|
||||
<|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
|
||||
optional (htmlTag (~== TagClose "ul"))) )
|
||||
|
||||
orderedList :: MWParser Blocks
|
||||
orderedList = B.orderedList <$> many1 (listItem '#')
|
||||
orderedList =
|
||||
(B.orderedList <$> many1 (listItem '#'))
|
||||
<|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *>
|
||||
many (listItem '#' <|> li) <*
|
||||
optional (htmlTag (~== TagClose "ul"))))
|
||||
<|> do (tag,_) <- htmlTag (~== TagOpen "ol" [])
|
||||
spaces
|
||||
items <- many (listItem '#' <|> li)
|
||||
optional (htmlTag (~== TagClose "ol"))
|
||||
let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
|
||||
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
|
||||
|
||||
definitionList :: MWParser Blocks
|
||||
definitionList = B.definitionList <$> many1 defListItem
|
||||
|
@ -237,6 +247,10 @@ anyListStart = char '*'
|
|||
<|> char ':'
|
||||
<|> char ';'
|
||||
|
||||
li :: MWParser Blocks
|
||||
li = htmlTag (~== TagOpen "li" []) *>
|
||||
(firstParaToPlain <$> blocksInTags "li") <* spaces
|
||||
|
||||
listItem :: Char -> MWParser Blocks
|
||||
listItem c = try $ do
|
||||
extras <- many (try $ char c <* lookAhead listStartChar)
|
||||
|
@ -261,11 +275,14 @@ listItem' c = try $ do
|
|||
first <- manyTill anyChar newline
|
||||
rest <- many (try $ char c *> lookAhead listStartChar *>
|
||||
manyTill anyChar newline)
|
||||
contents <- parseFromString (mconcat <$> many1 block)
|
||||
$ unlines $ first : rest
|
||||
parseFromString (firstParaToPlain . mconcat <$> many1 block)
|
||||
$ unlines $ first : rest
|
||||
|
||||
firstParaToPlain :: Blocks -> Blocks
|
||||
firstParaToPlain contents =
|
||||
case viewl (B.unMany contents) of
|
||||
(Para xs) :< ys -> return $ B.Many $ (Plain xs) <| ys
|
||||
_ -> return contents
|
||||
(Para xs) :< ys -> B.Many $ (Plain xs) <| ys
|
||||
_ -> contents
|
||||
|
||||
--
|
||||
-- inline parsers
|
||||
|
|
|
@ -130,6 +130,21 @@ Pandoc (Meta {docTitle = [], docAuthors = [], docDate = []})
|
|||
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||
[[Plain [Str "five",Space,Str "sub",Space,Str "1",Space,Str "sub",Space,Str "1"]]]]
|
||||
,[Plain [Str "five",Space,Str "sub",Space,Str "2"]]]]]
|
||||
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||
[[Plain [Str "list",Space,Str "item",Space,Emph [Str "emph"]]
|
||||
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||
[[Plain [Str "list",Space,Str "item",Space,Str "B1"]]
|
||||
,[Plain [Str "list",Space,Str "item",Space,Str "B2"]]]
|
||||
,Para [Str "continuing",Space,Str "list",Space,Str "item",Space,Str "A1"]]
|
||||
,[Plain [Str "list",Space,Str "item",Space,Str "A2"]]]
|
||||
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||
[[Plain [Str "abc"]]
|
||||
,[Plain [Str "def"]]
|
||||
,[Plain [Str "ghi"]]]
|
||||
,OrderedList (9,DefaultStyle,DefaultDelim)
|
||||
[[Plain [Str "Amsterdam"]]
|
||||
,[Plain [Str "Rotterdam"]]
|
||||
,[Plain [Str "The",Space,Str "Hague"]]]
|
||||
,Header 2 [Str "math"]
|
||||
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
|
||||
,Header 2 [Str "preformatted",Space,Str "blocks"]
|
||||
|
|
|
@ -189,6 +189,28 @@ ends the list.
|
|||
### five sub 1 sub 1
|
||||
## five sub 2
|
||||
|
||||
<ol>
|
||||
<li>list item ''emph''
|
||||
<ol>
|
||||
<li>list item B1</li>
|
||||
<li>list item B2</li>
|
||||
</ol>continuing list item A1
|
||||
</li>
|
||||
<li>list item A2</li>
|
||||
</ol>
|
||||
|
||||
<ul>
|
||||
#abc
|
||||
#def
|
||||
#ghi
|
||||
</ul>
|
||||
|
||||
<ol start="9">
|
||||
<li>Amsterdam</li>
|
||||
<li>Rotterdam</li>
|
||||
<li>The Hague</li>
|
||||
</ol>
|
||||
|
||||
== math ==
|
||||
|
||||
Here is some <math>x=\frac{y^\pi}{z}</math>.
|
||||
|
|
Loading…
Reference in a new issue