Add DokuWiki reader (#5108)

Closes #1792
This commit is contained in:
Alexander 2019-01-06 23:06:32 +00:00 committed by John MacFarlane
parent a5f5002eef
commit 40c30a9d88
6 changed files with 855 additions and 2 deletions

View file

@ -238,6 +238,7 @@ General options {.options}
- `creole` ([Creole 1.0])
- `docbook` ([DocBook])
- `docx` ([Word docx])
- `dokuwiki` ([DokuWiki markup])
- `epub` ([EPUB])
- `fb2` ([FictionBook2] e-book)
- `gfm` ([GitHub-Flavored Markdown]),

View file

@ -18,8 +18,9 @@ description: Pandoc is a Haskell library for converting from one markup
format to another, and a command-line tool that uses
this library. It can read several dialects of Markdown and
(subsets of) HTML, reStructuredText, LaTeX, DocBook, JATS,
MediaWiki markup, TWiki markup, TikiWiki markup, Creole 1.0,
Haddock markup, OPML, Emacs Org-Mode, Emacs Muse, txt2tags,
MediaWiki markup, DokuWiki markup, TWiki markup,
TikiWiki markup, Creole 1.0, Haddock markup, OPML,
Emacs Org-Mode, Emacs Muse, txt2tags,
Vimwiki, Word Docx, ODT, EPUB, FictionBook2, roff man,
and Textile, and it can write Markdown, reStructuredText,
XHTML, HTML 5, LaTeX, ConTeXt, DocBook, JATS, OPML, TEI,
@ -467,6 +468,7 @@ library
Text.Pandoc.Readers.Muse,
Text.Pandoc.Readers.Man,
Text.Pandoc.Readers.FB2,
Text.Pandoc.Readers.DokuWiki,
Text.Pandoc.Writers,
Text.Pandoc.Writers.Native,
Text.Pandoc.Writers.Docbook,
@ -696,6 +698,7 @@ test-suite test-pandoc
Tests.Readers.Creole
Tests.Readers.Man
Tests.Readers.FB2
Tests.Readers.DokuWiki
Tests.Writers.Native
Tests.Writers.ConTeXt
Tests.Writers.Docbook

View file

@ -47,6 +47,7 @@ module Text.Pandoc.Readers
, readMarkdown
, readCommonMark
, readCreole
, readDokuWiki
, readMediaWiki
, readVimwiki
, readRST
@ -86,6 +87,7 @@ import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.DokuWiki
import Text.Pandoc.Readers.EPUB
import Text.Pandoc.Readers.FB2
import Text.Pandoc.Readers.Haddock
@ -123,6 +125,7 @@ readers = [ ("native" , TextReader readNative)
,("markdown_mmd", TextReader readMarkdown)
,("commonmark" , TextReader readCommonMark)
,("creole" , TextReader readCreole)
,("dokuwiki" , TextReader readDokuWiki)
,("gfm" , TextReader readCommonMark)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)

View file

@ -0,0 +1,529 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2018-2019 Alexander Krotov <ilabdsf@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.DokuWiki
Copyright : Copyright (C) 2018 Alexander Krotov
License : GNU GPL, version 2 or above
Maintainer : Alexander Krotov <ilabdsf@gmail.com>
Stability : alpha
Portability : portable
Conversion of DokuWiki text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.DokuWiki (readDokuWiki) where
import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isDigit)
import qualified Data.Foldable as F
import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
import Data.List.Split (splitOn)
import Data.Maybe (fromMaybe, catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
-- | Read DokuWiki from an input string and return a Pandoc document.
readDokuWiki :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readDokuWiki opts s = do
let input = crFilter s
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
case res of
Left e -> throwError $ PandocParsecError (T.unpack input) e
Right d -> return d
type DWParser = ParserT Text ParserState
-- * Utility functions
-- | Parse end-of-line, which can be either a newline or end-of-file.
eol :: Stream s m Char => ParserT s st m ()
eol = void newline <|> eof
nested :: PandocMonad m => DWParser m a -> DWParser m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
res <- p
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
guardColumnOne :: PandocMonad m => DWParser m ()
guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-- | Parse DokuWiki document.
parseDokuWiki :: PandocMonad m => DWParser m Pandoc
parseDokuWiki =
B.doc . mconcat <$> many block <* spaces <* eof
-- | Parse <code> and <file> attributes
codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
codeLanguage = try $ do
rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
let attr = case rawLang of
"-" -> []
l -> [l]
return ("", attr, [])
-- | Generic parser for <code> and <file> tags
codeTag :: PandocMonad m
=> ((String, [String], [(String, String)]) -> String -> a)
-> String
-> DWParser m a
codeTag f tag = try $ f
<$ char '<'
<* string tag
<*> codeLanguage
<* manyTill anyChar (char '>')
<* optional (manyTill spaceChar eol)
<*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
-- * Inline parsers
-- | Parse any inline element but softbreak.
inline' :: PandocMonad m => DWParser m B.Inlines
inline' = whitespace
<|> br
<|> bold
<|> italic
<|> underlined
<|> nowiki
<|> percent
<|> link
<|> image
<|> monospaced
<|> subscript
<|> superscript
<|> deleted
<|> footnote
<|> inlineCode
<|> inlineFile
<|> inlineHtml
<|> inlinePhp
<|> autoLink
<|> autoEmail
<|> notoc
<|> nocache
<|> str
<|> symbol
<?> "inline"
-- | Parse any inline element, including soft break.
inline :: PandocMonad m => DWParser m B.Inlines
inline = endline <|> inline'
endline :: PandocMonad m => DWParser m B.Inlines
endline = try $ B.softbreak <$ skipMany spaceChar <* linebreak
whitespace :: PandocMonad m => DWParser m B.Inlines
whitespace = try $ B.space <$ skipMany1 spaceChar
br :: PandocMonad m => DWParser m B.Inlines
br = try $ B.linebreak <$ string "\\\\" <* space
linebreak :: PandocMonad m => DWParser m B.Inlines
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
where lastNewline = mempty <$ eof
innerNewline = pure B.space
between :: (Monoid c, PandocMonad m, Show b)
=> DWParser m a -> DWParser m b -> (DWParser m b -> DWParser m c)
-> DWParser m c
between start end p =
mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
enclosed :: (Monoid b, PandocMonad m, Show a)
=> DWParser m a -> (DWParser m a -> DWParser m b) -> DWParser m b
enclosed sep p = between sep (try sep) p
nestedInlines :: (Show a, PandocMonad m)
=> DWParser m a -> DWParser m B.Inlines
nestedInlines end = innerSpace <|> nestedInline
where
innerSpace = try $ whitespace <* notFollowedBy end
nestedInline = notFollowedBy whitespace >> nested inline
bold :: PandocMonad m => DWParser m B.Inlines
bold = try $ B.strong <$> enclosed (string "**") nestedInlines
italic :: PandocMonad m => DWParser m B.Inlines
italic = try $ B.emph <$> enclosed (string "//") nestedInlines
underlined :: PandocMonad m => DWParser m B.Inlines
underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
nowiki :: PandocMonad m => DWParser m B.Inlines
nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
percent :: PandocMonad m => DWParser m B.Inlines
percent = try $ B.text <$> enclosed (string "%%") nestedString
nestedString :: (Show a, PandocMonad m)
=> DWParser m a -> DWParser m String
nestedString end = innerSpace <|> count 1 nonspaceChar
where
innerSpace = try $ many1 spaceChar <* notFollowedBy end
monospaced :: PandocMonad m => DWParser m B.Inlines
monospaced = try $ B.code <$> enclosed (string "''") nestedString
subscript :: PandocMonad m => DWParser m B.Inlines
subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
superscript :: PandocMonad m => DWParser m B.Inlines
superscript = try $ B.superscript <$> between (string "<sup>") (try $ string "</sup>") nestedInlines
deleted :: PandocMonad m => DWParser m B.Inlines
deleted = try $ B.strikeout <$> between (string "<del>") (try $ string "</del>") nestedInlines
-- | Parse a footnote.
footnote :: PandocMonad m => DWParser m B.Inlines
footnote = try $ B.note . B.para <$> between (string "((") (try $ string "))") nestedInlines
inlineCode :: PandocMonad m => DWParser m B.Inlines
inlineCode = codeTag B.codeWith "code"
inlineFile :: PandocMonad m => DWParser m B.Inlines
inlineFile = codeTag B.codeWith "file"
inlineHtml :: PandocMonad m => DWParser m B.Inlines
inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
inlinePhp :: PandocMonad m => DWParser m B.Inlines
inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
makeLink :: (String, String) -> B.Inlines
makeLink (text, url) = B.link url "" $ B.str text
autoEmail :: PandocMonad m => DWParser m B.Inlines
autoEmail = try $ do
state <- getState
guard $ stateAllowLinks state
makeLink <$ char '<' <*> emailAddress <* char '>'
autoLink :: PandocMonad m => DWParser m B.Inlines
autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- uri
guard $ checkLink (last url)
return $ makeLink (text, url)
where
checkLink c
| c == '/' = True
| otherwise = isAlphaNum c
notoc :: PandocMonad m => DWParser m B.Inlines
notoc = try $ mempty <$ string "~~NOTOC~~"
nocache :: PandocMonad m => DWParser m B.Inlines
nocache = try $ mempty <$ string "~~NOCACHE~~"
str :: PandocMonad m => DWParser m B.Inlines
str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
symbol :: PandocMonad m => DWParser m B.Inlines
symbol = B.str <$> count 1 nonspaceChar
link :: PandocMonad m => DWParser m B.Inlines
link = try $ do
st <- getState
guard $ stateAllowLinks st
setState $ st{ stateAllowLinks = False }
l <- linkText
setState $ st{ stateAllowLinks = True }
return l
isExternalLink :: String -> Bool
isExternalLink s =
case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
(':':'/':'/':_) -> True
_ -> False
isAbsolutePath :: String -> Bool
isAbsolutePath ('.':_) = False
isAbsolutePath s = ':' `elem` s
normalizeDots :: String -> String
normalizeDots path@('.':_) =
case dropWhile (== '.') path of
':':_ -> path
_ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
normalizeDots path = path
normalizeInternalPath :: String -> String
normalizeInternalPath path =
if isAbsolutePath path
then ensureAbsolute normalizedPath
else normalizedPath
where
normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
ensureAbsolute s@('/':_) = s
ensureAbsolute s = '/':s
normalizePath :: String -> String
normalizePath path =
if isExternalLink path
then path
else normalizeInternalPath path
urlToText :: String -> String
urlToText url =
if isExternalLink url
then url
else reverse $ takeWhile (/= ':') $ reverse url
-- Parse link or image
parseLink :: PandocMonad m
=> (String -> Maybe B.Inlines -> B.Inlines)
-> String
-> String
-> DWParser m B.Inlines
parseLink f l r = f
<$ string l
<*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
<*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
<* string r
-- | Split Interwiki link into left and right part
-- | Return Nothing if it is not Interwiki link
splitInterwiki :: String -> Maybe (String, String)
splitInterwiki path =
case span (\c -> isAlphaNum c || c == '.') path of
(l, '>':r) -> Just (l, r)
_ -> Nothing
interwikiToUrl :: String -> String -> String
interwikiToUrl "callto" page = "callto://" ++ page
interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
interwikiToUrl "tel" page = "tel:" ++ page
interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
linkText :: PandocMonad m => DWParser m B.Inlines
linkText = parseLink fromRaw "[[" "]]"
where
fromRaw path description =
B.link normalizedPath "" (fromMaybe (B.str defaultDescription) description)
where
path' = trim path
interwiki = splitInterwiki path'
normalizedPath =
case interwiki of
Nothing -> normalizePath path'
Just (l, r) -> interwikiToUrl l r
defaultDescription =
case interwiki of
Nothing -> urlToText path'
Just (_, r) -> r
-- Matches strings like "100x100" (width x height) and "50" (width)
isWidthHeightParameter :: String -> Bool
isWidthHeightParameter s =
case s of
(x:xs) ->
isDigit x && case dropWhile isDigit xs of
('x':ys@(_:_)) -> all isDigit ys
"" -> True
_ -> False
_ -> False
parseWidthHeight :: String -> (Maybe String, Maybe String)
parseWidthHeight s = (width, height)
where
width = Just $ takeWhile isDigit s
height =
case dropWhile isDigit s of
('x':xs) -> Just xs
_ -> Nothing
image :: PandocMonad m => DWParser m B.Inlines
image = parseLink fromRaw "{{" "}}"
where
fromRaw path description =
if linkOnly
then B.link normalizedPath "" (fromMaybe defaultDescription description)
else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
where
(path', parameters) = span (/= '?') $ trim path
normalizedPath = normalizePath path'
leftPadding = " " `isPrefixOf` path
rightPadding = " " `isSuffixOf` path
classes =
case (leftPadding, rightPadding) of
(False, False) -> []
(False, True) -> ["align-left"]
(True, False) -> ["align-right"]
(True, True) -> ["align-center"]
parameterList = splitOn "&" $ drop 1 parameters
linkOnly = "linkonly" `elem` parameterList
(width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
defaultDescription = B.str $ urlToText path'
-- * Block parsers
block :: PandocMonad m => DWParser m B.Blocks
block = do
res <- mempty <$ skipMany1 blankline
<|> blockElements
<|> para
skipMany blankline
trace (take 60 $ show $ B.toList res)
return res
blockElements :: PandocMonad m => DWParser m B.Blocks
blockElements = horizontalLine
<|> header
<|> list " "
<|> indentedCode
<|> quote
<|> blockCode
<|> blockFile
<|> blockHtml
<|> blockPhp
<|> table
horizontalLine :: PandocMonad m => DWParser m B.Blocks
horizontalLine = try $ B.horizontalRule <$ string "---" <* many1 (char '-') <* eol
header :: PandocMonad m => DWParser m B.Blocks
header = try $ do
guardColumnOne
eqs <- many1 (char '=')
let lev = length eqs
guard $ lev < 7
contents <- B.trimInlines . mconcat <$> manyTill inline (try $ char '=' *> many1 (char '='))
attr <- registerHeader nullAttr contents
return $ B.headerWith attr (7 - lev) contents
list :: PandocMonad m => String -> DWParser m B.Blocks
list prefix = bulletList prefix <|> orderedList prefix
bulletList :: PandocMonad m => String -> DWParser m B.Blocks
bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
orderedList :: PandocMonad m => String -> DWParser m B.Blocks
orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
parseList :: PandocMonad m
=> String
-> Char
-> DWParser m [B.Blocks]
parseList prefix marker =
many1 ((<>) <$> item <*> fmap mconcat (many continuation))
where
continuation = try $ list (" " ++ prefix)
item = try $ string prefix *> char marker *> char ' ' *> itemContents
itemContents = B.plain . mconcat <$> many1Till inline' eol
indentedCode :: PandocMonad m => DWParser m B.Blocks
indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
where
indentedLine = try $ string " " *> manyTill anyChar eol
quote :: PandocMonad m => DWParser m B.Blocks
quote = try $ nestedQuote 0
where
prefix level = count level (char '>')
contents level = nestedQuote level <|> quoteLine
quoteLine = try $ B.plain . B.trimInlines . mconcat <$> many1Till inline' eol
quoteContents level = (<>) <$> contents level <*> quoteContinuation level
quoteContinuation level = mconcat <$> many (try $ prefix level *> contents level)
nestedQuote level = B.blockQuote <$ char '>' <*> quoteContents (level + 1 :: Int)
blockHtml :: PandocMonad m => DWParser m B.Blocks
blockHtml = try $ B.rawBlock "html"
<$ string "<HTML>"
<* optional (manyTill spaceChar eol)
<*> manyTill anyChar (try $ string "</HTML>")
blockPhp :: PandocMonad m => DWParser m B.Blocks
blockPhp = try $ B.codeBlockWith ("", ["php"], [])
<$ string "<PHP>"
<* optional (manyTill spaceChar eol)
<*> manyTill anyChar (try $ string "</PHP>")
table :: PandocMonad m => DWParser m B.Blocks
table = do
firstSeparator <- lookAhead tableCellSeparator
rows <- tableRows
let (headerRow, body) = if firstSeparator == '^'
then (head rows, tail rows)
else ([], rows)
let attrs = const (AlignDefault, 0.0) <$> transpose rows
pure $ B.table mempty attrs headerRow body
tableRows :: PandocMonad m => DWParser m [[B.Blocks]]
tableRows = many1 tableRow
tableRow :: PandocMonad m => DWParser m [B.Blocks]
tableRow = many1Till tableCell tableRowEnd
tableRowEnd :: PandocMonad m => DWParser m Char
tableRowEnd = try $ tableCellSeparator <* manyTill spaceChar eol
tableCellSeparator :: PandocMonad m => DWParser m Char
tableCellSeparator = char '|' <|> char '^'
tableCell :: PandocMonad m => DWParser m B.Blocks
tableCell = try $ B.plain . B.trimInlines . mconcat <$> (normalCell <|> headerCell)
where
normalCell = char '|' *> manyTill inline' (lookAhead tableCellSeparator)
headerCell = char '^' *> manyTill inline' (lookAhead tableCellSeparator)
blockCode :: PandocMonad m => DWParser m B.Blocks
blockCode = codeTag B.codeBlockWith "code"
blockFile :: PandocMonad m => DWParser m B.Blocks
blockFile = codeTag B.codeBlockWith "file"
para :: PandocMonad m => DWParser m B.Blocks
para = result . mconcat <$> many1Till inline endOfParaElement
where
endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
endOfInput = try $ skipMany blankline >> skipSpaces >> eof
endOfPara = try $ blankline >> skipMany1 blankline
newBlockElement = try $ blankline >> void blockElements
result content = if F.all (==Space) content
then mempty
else B.para $ B.trimInlines content

View file

@ -0,0 +1,315 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Tests.Readers.DokuWiki (tests) where
import Prelude
import Data.Text (Text)
import qualified Data.Text as T
import Test.Tasty
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
import Text.Pandoc.Shared (underlineSpan)
dokuwiki :: Text -> Pandoc
dokuwiki = purely $ readDokuWiki def{ readerStandalone = True }
infix 4 =:
(=:) :: ToString c
=> String -> (Text, c) -> TestTree
(=:) = test dokuwiki
tests :: [TestTree]
tests = [ testGroup "inlines"
[ "Bold" =:
"**bold**" =?>
para (strong "bold")
, "Italic" =:
"//italic//" =?>
para (emph "italic")
, "Underlined" =:
"__underlined__" =?>
para (underlineSpan "underlined")
, "Monospaced" =:
"''monospaced''" =?>
para (code "monospaced")
, "Combined" =:
"**__//''combine''//__**" =?>
para (strong $ underlineSpan $ emph $ code "combine")
, "Nowiki" =:
T.unlines [ "<nowiki>"
, "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
, "</nowiki>"
] =?>
para "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
, "Percent" =:
"The same is true for %%//__this__ text// with a smiley ;-)%%." =?>
para "The same is true for //__this__ text// with a smiley ;-)."
, "Subscript" =:
"<sub>subscript</sub>" =?>
para (subscript "subscript")
, "Superscript" =:
"<sup>superscript</sup>" =?>
para (superscript "superscript")
, "Deleted" =:
"<del>deleted</del>" =?>
para (strikeout "deleted")
, "Inline code" =:
"foo <code java>public static void main</code> bar" =?>
para (text "foo " <> codeWith ("", ["java"], []) "public static void main" <> text " bar")
, "Inline file" =:
"foo <file></code></file> bar" =?>
para (text "foo " <> code "</code>" <> text " bar")
, "Inline HTML" =:
"<html>\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n</html>" =?>
para (rawInline "html" "\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n")
, "Inline PHP" =:
"<php>echo '<p>Hello World</p>';</php>" =?>
para (codeWith ("", ["php"], []) "echo '<p>Hello World</p>';")
, "Linebreak" =:
T.unlines [ "This is some text with some linebreaks\\\\ Note that the"
, "two backslashes are only recognized at the end of a line\\\\"
, "or followed by\\\\ a whitespace \\\\this happens without it."
] =?>
para ("This is some text with some linebreaks" <> linebreak <> "Note that the\n" <>
"two backslashes are only recognized at the end of a line" <> linebreak <>
"or followed by" <> linebreak <> "a whitespace \\\\this happens without it.")
, testGroup "External links"
[ "Autolink" =:
"http://www.google.com" =?>
para (link "http://www.google.com" "" (str "http://www.google.com"))
, "Link without description" =:
"[[https://example.com]]" =?>
para (link "https://example.com" "" (str "https://example.com"))
, "Link with description" =:
"[[http://www.google.com|This Link points to google]]" =?>
para (link "http://www.google.com" "" (text "This Link points to google"))
, "Trim whitespace around link and description" =:
"[[ http://www.google.com | This Link points to google ]]" =?>
para (link "http://www.google.com" "" (text "This Link points to google"))
, "Email address" =:
"<andi@splitbrain.org>" =?>
para (link "mailto:andi@splitbrain.org" "" (str "andi@splitbrain.org"))
]
, testGroup "Internal links"
[ "Current namespace" =:
"[[example]]" =?>
para (link "example" "" (str "example"))
, "Current namespace starting with dot" =:
"[[.example]]" =?>
para (link "example" "" (str ".example"))
, "Current namespace starting with dot and colon" =:
"[[.:example]]" =?>
para (link "example" "" (str "example"))
, "Root namespace" =:
"[[:example]]" =?>
para (link "/example" "" (str "example"))
, "Parent namespace" =:
"[[..example]]" =?>
para (link "../example" "" (str "..example"))
, "Parent namespace with colon" =:
"[[..:example]]" =?>
para (link "../example" "" (str "example"))
, "Beneath the root namespace" =:
"[[wiki:example]]" =?>
para (link "/wiki/example" "" (str "example"))
, "Explicitly beneath the root namespace" =:
"[[:wiki:example]]" =?>
para (link "/wiki/example" "" (str "example"))
]
, testGroup "Interwiki links"
[ "Interwiki without description" =:
"[[doku>DokuWiki]]" =?>
para (link "https://www.dokuwiki.org/DokuWiki" "" (str "DokuWiki"))
, "Interwiki link with description" =:
"[[doku>toolbar|quickbuttons]]" =?>
para (link "https://www.dokuwiki.org/toolbar" "" (str "quickbuttons"))
]
, "Footnote" =:
"((This is a footnote))" =?>
para (note (para "This is a footnote"))
, testGroup "Images"
[ "Image" =:
"{{image.jpg}}" =?>
para (image "image.jpg" "" (str "image.jpg"))
, "Image with caption" =:
"{{image.png|This is the caption}}" =?>
para (image "image.png" "" "This is the caption")
, "Image with } in caption" =:
"{{image.png|There is an } in the caption}}" =?>
para (image "image.png" "" "There is an } in the caption")
, "Wiki namespace starting with dot" =:
"{{.wiki:image.jpg}}" =?>
para (image "wiki/image.jpg" "" (str "image.jpg"))
, "Left aligned image" =:
"{{wiki:dokuwiki-128.png }}" =?>
para (imageWith ("", ["align-left"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
, "Right aligned image" =:
"{{ wiki:dokuwiki-128.png}}" =?>
para (imageWith ("", ["align-right"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
, "Centered image" =:
"{{ wiki:dokuwiki-128.png }}" =?>
para (imageWith ("", ["align-center"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
, "Image with width" =:
"{{wiki:dokuwiki-128.png?50}}" =?>
para (imageWith ("", [], [("width", "50")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
, "Image with width and height" =:
"{{wiki:dokuwiki-128.png?nocache&50x100}}" =?>
para (imageWith ("", [], [("width", "50"), ("height", "100")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
, "Linkonly" =:
"{{wiki:dokuwiki-128.png?linkonly}}" =?>
para (link "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
]
, "Ignore ~~NOTOC~~" =:
"Here is a ~~NOTOC~~ macro" =?>
para "Here is a macro"
, "Ignore ~~NOCACHE~~" =:
"Here is a ~~NOCACHE~~ macro" =?>
para "Here is a macro"
]
, testGroup "Sectioning"
[ "Headline level 1" =:
"====== Headline Level 1 ======" =?>
header 1 "Headline Level 1"
, "Headline level 2" =:
"===== Headline Level 2 =====" =?>
header 2 "Headline Level 2"
, "Headline level 3" =:
"==== Headline Level 3 ====" =?>
header 3 "Headline Level 3"
, "Headline level 4" =:
"=== Headline Level 4 ===" =?>
header 4 "Headline Level 4"
, "Headline level 5" =:
"== Headline Level 5 ==" =?>
header 5 "Headline Level 5"
, "Only two closing = are required" =:
"====== Headline Level 1 ==" =?>
header 1 "Headline Level 1"
, "One closing = is not enough" =:
"====== Headline Level 1 =" =?>
para "====== Headline Level 1 ="
, "One closing = is not enough" =:
"== Headline with = sign ==" =?>
header 5 "Headline with = sign"
]
, "Horizontal line" =:
"----" =?>
horizontalRule
, testGroup "Lists"
[ "Unordered list" =:
T.unlines [ " * This is a list"
, " * The second item"
, " * You may have different levels"
, " * Another item"
] =?>
bulletList [ plain "This is a list"
, plain "The second item" <>
bulletList [ plain "You may have different levels" ]
, plain "Another item"
]
, "Ordered list" =:
T.unlines [ " - The same list but ordered"
, " - Another item"
, " - Just use indention for deeper levels"
, " - That's it"
] =?>
orderedList [ plain "The same list but ordered"
, plain "Another item" <>
orderedList [ plain "Just use indention for deeper levels" ]
, plain "That's it"
]
, "Multiline list items" =: -- https://www.dokuwiki.org/faq:lists
T.unlines [ " - first item"
, " - second item with linebreak\\\\ second line"
, " - third item with code: <code>"
, "some code"
, "comes here"
, "</code>"
, " - fourth item"
] =?>
orderedList [ plain "first item"
, plain ("second item with linebreak" <> linebreak <> " second line")
, plain ("third item with code: " <> code "some code\ncomes here\n")
, plain "fourth item"
]
]
, "Block HTML" =:
T.unlines [ "<HTML>"
, "<p style=\"border:2px dashed red;\">And this is some block HTML</p>"
, "</HTML>"
] =?>
rawBlock "html" "<p style=\"border:2px dashed red;\">And this is some block HTML</p>\n"
, "Block PHP" =:
T.unlines [ "<PHP>"
, "echo '<p>Hello World</p>';"
, "</PHP>"
] =?>
codeBlockWith ("", ["php"], []) "echo '<p>Hello World</p>';\n"
, "Quote" =:
T.unlines [ "> foo"
, ">no space is required after >"
, "> bar"
, ">> baz"
, "> bat"
] =?>
blockQuote (plain "foo" <>
plain "no space is required after >" <>
plain "bar" <>
blockQuote (plain "baz") <>
plain "bat")
, "Code block" =:
T.unlines [ "<code>"
, "foo bar baz"
, "</code>"
] =?>
codeBlock "foo bar baz\n"
, "Java code block" =:
T.unlines [ "<code java>"
, "public static void main"
, "</code>"
] =?>
codeBlockWith ("", ["java"], []) "public static void main\n"
, "File with filename and no language" =:
T.unlines [ "<file - foo.bar>"
, "file contents"
, "</file>"
] =?>
codeBlock "file contents\n"
, "Table" =:
T.unlines [ "| foo | bar |"
, "| bat | baz |"
] =?>
table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
[]
[[plain "foo", plain "bar"]
,[plain "bat", plain "baz"]]
, "Table with header" =:
T.unlines [ "^ foo ^ bar ^"
, "| bat | baz |"
] =?>
table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
[plain "foo", plain "bar"]
[[plain "bat", plain "baz"]]
, "Table with colspan" =:
T.unlines [ "^ 0,0 ^ 0,1 ^ 0,2 ^"
, "| 1,0 | 1,1 ||"
, "| 2,0 | 2,1 | 2,2 |"
] =?>
table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0), (AlignDefault, 0.0)]
[plain "0,0", plain "0,1", plain "0,2"]
[[plain "1,0", plain "1,1", mempty]
,[plain "2,0", plain "2,1", plain "2,2"]
]
, "Indented code block" =:
T.unlines [ "foo"
, " bar"
, " bat"
, "baz"
] =?>
para "foo" <>
codeBlock "bar\n bat\n" <>
para "baz"
]

View file

@ -11,6 +11,7 @@ import qualified Tests.Lua
import qualified Tests.Old
import qualified Tests.Readers.Creole
import qualified Tests.Readers.Docx
import qualified Tests.Readers.DokuWiki
import qualified Tests.Readers.EPUB
import qualified Tests.Readers.FB2
import qualified Tests.Readers.HTML
@ -80,6 +81,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
, testGroup "Creole" Tests.Readers.Creole.tests
, testGroup "Man" Tests.Readers.Man.tests
, testGroup "FB2" Tests.Readers.FB2.tests
, testGroup "DokuWiki" Tests.Readers.DokuWiki.tests
]
, testGroup "Lua filters" Tests.Lua.tests
]