MediaWiki reader: Support HTML lists.
This commit is contained in:
parent
3fe6ea4c41
commit
81bec8558c
3 changed files with 66 additions and 12 deletions
|
@ -30,9 +30,6 @@ Conversion of mediawiki text to 'Pandoc' document.
|
||||||
-}
|
-}
|
||||||
{-
|
{-
|
||||||
TODO:
|
TODO:
|
||||||
_ support HTML lists
|
|
||||||
_ support list style attributes and start values in ol lists, also
|
|
||||||
value attribute on li
|
|
||||||
_ support internal links http://www.mediawiki.org/wiki/Help:Links
|
_ support internal links http://www.mediawiki.org/wiki/Help:Links
|
||||||
_ support external links (partially implemented)
|
_ support external links (partially implemented)
|
||||||
_ support images http://www.mediawiki.org/wiki/Help:Images
|
_ support images http://www.mediawiki.org/wiki/Help:Images
|
||||||
|
@ -50,7 +47,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,
|
||||||
import Text.Pandoc.XML ( fromEntities )
|
import Text.Pandoc.XML ( fromEntities )
|
||||||
import Text.Pandoc.Parsing
|
import Text.Pandoc.Parsing
|
||||||
import Text.Pandoc.Generic ( bottomUp )
|
import Text.Pandoc.Generic ( bottomUp )
|
||||||
import Text.Pandoc.Shared ( stripTrailingNewlines )
|
import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
|
||||||
import Data.Monoid (mconcat, mempty)
|
import Data.Monoid (mconcat, mempty)
|
||||||
import Control.Applicative ((<$>), (<*), (*>), (<$))
|
import Control.Applicative ((<$>), (<*), (*>), (<$))
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
@ -121,8 +118,8 @@ block :: MWParser Blocks
|
||||||
block = mempty <$ skipMany1 blankline
|
block = mempty <$ skipMany1 blankline
|
||||||
<|> header
|
<|> header
|
||||||
<|> hrule
|
<|> hrule
|
||||||
<|> bulletList
|
|
||||||
<|> orderedList
|
<|> orderedList
|
||||||
|
<|> bulletList
|
||||||
<|> definitionList
|
<|> definitionList
|
||||||
<|> mempty <$ try (spaces *> htmlComment)
|
<|> mempty <$ try (spaces *> htmlComment)
|
||||||
<|> preformatted
|
<|> preformatted
|
||||||
|
@ -151,7 +148,7 @@ blockTag = do
|
||||||
"pre" -> B.codeBlock . trimCode <$> charsInTags "pre"
|
"pre" -> B.codeBlock . trimCode <$> charsInTags "pre"
|
||||||
"syntaxhighlight" -> syntaxhighlight attrs
|
"syntaxhighlight" -> syntaxhighlight attrs
|
||||||
"haskell" -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
|
"haskell" -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
|
||||||
charsInTags "haskell"
|
charsInTags "haskell"
|
||||||
"p" -> return mempty
|
"p" -> return mempty
|
||||||
_ -> return $ B.rawBlock "html" raw
|
_ -> return $ B.rawBlock "html" raw
|
||||||
|
|
||||||
|
@ -207,10 +204,23 @@ header = try $ do
|
||||||
return $ B.header lev contents
|
return $ B.header lev contents
|
||||||
|
|
||||||
bulletList :: MWParser Blocks
|
bulletList :: MWParser Blocks
|
||||||
bulletList = B.bulletList <$> many1 (listItem '*')
|
bulletList = B.bulletList <$>
|
||||||
|
( many1 (listItem '*')
|
||||||
|
<|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
|
||||||
|
optional (htmlTag (~== TagClose "ul"))) )
|
||||||
|
|
||||||
orderedList :: MWParser Blocks
|
orderedList :: MWParser Blocks
|
||||||
orderedList = B.orderedList <$> many1 (listItem '#')
|
orderedList =
|
||||||
|
(B.orderedList <$> many1 (listItem '#'))
|
||||||
|
<|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *>
|
||||||
|
many (listItem '#' <|> li) <*
|
||||||
|
optional (htmlTag (~== TagClose "ul"))))
|
||||||
|
<|> do (tag,_) <- htmlTag (~== TagOpen "ol" [])
|
||||||
|
spaces
|
||||||
|
items <- many (listItem '#' <|> li)
|
||||||
|
optional (htmlTag (~== TagClose "ol"))
|
||||||
|
let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
|
||||||
|
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
|
||||||
|
|
||||||
definitionList :: MWParser Blocks
|
definitionList :: MWParser Blocks
|
||||||
definitionList = B.definitionList <$> many1 defListItem
|
definitionList = B.definitionList <$> many1 defListItem
|
||||||
|
@ -237,6 +247,10 @@ anyListStart = char '*'
|
||||||
<|> char ':'
|
<|> char ':'
|
||||||
<|> char ';'
|
<|> char ';'
|
||||||
|
|
||||||
|
li :: MWParser Blocks
|
||||||
|
li = htmlTag (~== TagOpen "li" []) *>
|
||||||
|
(firstParaToPlain <$> blocksInTags "li") <* spaces
|
||||||
|
|
||||||
listItem :: Char -> MWParser Blocks
|
listItem :: Char -> MWParser Blocks
|
||||||
listItem c = try $ do
|
listItem c = try $ do
|
||||||
extras <- many (try $ char c <* lookAhead listStartChar)
|
extras <- many (try $ char c <* lookAhead listStartChar)
|
||||||
|
@ -261,11 +275,14 @@ listItem' c = try $ do
|
||||||
first <- manyTill anyChar newline
|
first <- manyTill anyChar newline
|
||||||
rest <- many (try $ char c *> lookAhead listStartChar *>
|
rest <- many (try $ char c *> lookAhead listStartChar *>
|
||||||
manyTill anyChar newline)
|
manyTill anyChar newline)
|
||||||
contents <- parseFromString (mconcat <$> many1 block)
|
parseFromString (firstParaToPlain . mconcat <$> many1 block)
|
||||||
$ unlines $ first : rest
|
$ unlines $ first : rest
|
||||||
|
|
||||||
|
firstParaToPlain :: Blocks -> Blocks
|
||||||
|
firstParaToPlain contents =
|
||||||
case viewl (B.unMany contents) of
|
case viewl (B.unMany contents) of
|
||||||
(Para xs) :< ys -> return $ B.Many $ (Plain xs) <| ys
|
(Para xs) :< ys -> B.Many $ (Plain xs) <| ys
|
||||||
_ -> return contents
|
_ -> contents
|
||||||
|
|
||||||
--
|
--
|
||||||
-- inline parsers
|
-- inline parsers
|
||||||
|
|
|
@ -130,6 +130,21 @@ Pandoc (Meta {docTitle = [], docAuthors = [], docDate = []})
|
||||||
,OrderedList (1,DefaultStyle,DefaultDelim)
|
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||||
[[Plain [Str "five",Space,Str "sub",Space,Str "1",Space,Str "sub",Space,Str "1"]]]]
|
[[Plain [Str "five",Space,Str "sub",Space,Str "1",Space,Str "sub",Space,Str "1"]]]]
|
||||||
,[Plain [Str "five",Space,Str "sub",Space,Str "2"]]]]]
|
,[Plain [Str "five",Space,Str "sub",Space,Str "2"]]]]]
|
||||||
|
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||||
|
[[Plain [Str "list",Space,Str "item",Space,Emph [Str "emph"]]
|
||||||
|
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||||
|
[[Plain [Str "list",Space,Str "item",Space,Str "B1"]]
|
||||||
|
,[Plain [Str "list",Space,Str "item",Space,Str "B2"]]]
|
||||||
|
,Para [Str "continuing",Space,Str "list",Space,Str "item",Space,Str "A1"]]
|
||||||
|
,[Plain [Str "list",Space,Str "item",Space,Str "A2"]]]
|
||||||
|
,OrderedList (1,DefaultStyle,DefaultDelim)
|
||||||
|
[[Plain [Str "abc"]]
|
||||||
|
,[Plain [Str "def"]]
|
||||||
|
,[Plain [Str "ghi"]]]
|
||||||
|
,OrderedList (9,DefaultStyle,DefaultDelim)
|
||||||
|
[[Plain [Str "Amsterdam"]]
|
||||||
|
,[Plain [Str "Rotterdam"]]
|
||||||
|
,[Plain [Str "The",Space,Str "Hague"]]]
|
||||||
,Header 2 [Str "math"]
|
,Header 2 [Str "math"]
|
||||||
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
|
,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."]
|
||||||
,Header 2 [Str "preformatted",Space,Str "blocks"]
|
,Header 2 [Str "preformatted",Space,Str "blocks"]
|
||||||
|
|
|
@ -189,6 +189,28 @@ ends the list.
|
||||||
### five sub 1 sub 1
|
### five sub 1 sub 1
|
||||||
## five sub 2
|
## five sub 2
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li>list item ''emph''
|
||||||
|
<ol>
|
||||||
|
<li>list item B1</li>
|
||||||
|
<li>list item B2</li>
|
||||||
|
</ol>continuing list item A1
|
||||||
|
</li>
|
||||||
|
<li>list item A2</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
#abc
|
||||||
|
#def
|
||||||
|
#ghi
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<ol start="9">
|
||||||
|
<li>Amsterdam</li>
|
||||||
|
<li>Rotterdam</li>
|
||||||
|
<li>The Hague</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
== math ==
|
== math ==
|
||||||
|
|
||||||
Here is some <math>x=\frac{y^\pi}{z}</math>.
|
Here is some <math>x=\frac{y^\pi}{z}</math>.
|
||||||
|
|
Loading…
Add table
Reference in a new issue