Add DokuWiki reader (#5108)

Closes #1792
2019-01-06 23:06:32 +00:00 · 2019-01-06 23:06:32 +00:00 · 40c30a9d88
commit 40c30a9d88
parent a5f5002eef
6 changed files with 855 additions and 2 deletions
--- a/MANUAL.txt
+++ b/MANUAL.txt
@ -238,6 +238,7 @@ General options {.options}
    - `creole` ([Creole 1.0])
    - `docbook` ([DocBook])
    - `docx` ([Word docx])
+    - `dokuwiki` ([DokuWiki markup])
    - `epub` ([EPUB])
    - `fb2` ([FictionBook2] e-book)
    - `gfm` ([GitHub-Flavored Markdown]),
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -18,8 +18,9 @@ description:     Pandoc is a Haskell library for converting from one markup
                 format to another, and a command-line tool that uses
                 this library. It can read several dialects of Markdown and
                 (subsets of) HTML, reStructuredText, LaTeX, DocBook, JATS,
-                 MediaWiki markup, TWiki markup, TikiWiki markup, Creole 1.0,
-                 Haddock markup, OPML, Emacs Org-Mode, Emacs Muse, txt2tags,
+                 MediaWiki markup, DokuWiki markup, TWiki markup,
+                 TikiWiki markup, Creole 1.0, Haddock markup, OPML,
+                 Emacs Org-Mode, Emacs Muse, txt2tags,
                 Vimwiki, Word Docx, ODT, EPUB, FictionBook2, roff man,
                 and Textile, and it can write Markdown, reStructuredText,
                 XHTML, HTML 5, LaTeX, ConTeXt, DocBook, JATS, OPML, TEI,
@ -467,6 +468,7 @@ library
                   Text.Pandoc.Readers.Muse,
                   Text.Pandoc.Readers.Man,
                   Text.Pandoc.Readers.FB2,
+                   Text.Pandoc.Readers.DokuWiki,
                   Text.Pandoc.Writers,
                   Text.Pandoc.Writers.Native,
                   Text.Pandoc.Writers.Docbook,
@ -696,6 +698,7 @@ test-suite test-pandoc
                  Tests.Readers.Creole
                  Tests.Readers.Man
                  Tests.Readers.FB2
+                  Tests.Readers.DokuWiki
                  Tests.Writers.Native
                  Tests.Writers.ConTeXt
                  Tests.Writers.Docbook
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@ -47,6 +47,7 @@ module Text.Pandoc.Readers
  , readMarkdown
  , readCommonMark
  , readCreole
+  , readDokuWiki
  , readMediaWiki
  , readVimwiki
  , readRST
@ -86,6 +87,7 @@ import Text.Pandoc.Readers.CommonMark
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
+import Text.Pandoc.Readers.DokuWiki
 import Text.Pandoc.Readers.EPUB
 import Text.Pandoc.Readers.FB2
 import Text.Pandoc.Readers.Haddock
@ -123,6 +125,7 @@ readers = [ ("native"       , TextReader readNative)
           ,("markdown_mmd",  TextReader readMarkdown)
           ,("commonmark"   , TextReader readCommonMark)
           ,("creole"       , TextReader readCreole)
+           ,("dokuwiki"     , TextReader readDokuWiki)
           ,("gfm"          , TextReader readCommonMark)
           ,("rst"          , TextReader readRST)
           ,("mediawiki"    , TextReader readMediaWiki)
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@ -0,0 +1,529 @@
+{-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections     #-}
+
+{-
+  Copyright (C) 2018-2019 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DokuWiki
+   Copyright   : Copyright (C) 2018 Alexander Krotov
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Alexander Krotov <ilabdsf@gmail.com>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of DokuWiki text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.DokuWiki (readDokuWiki) where
+
+import Prelude
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.Char (isAlphaNum, isDigit)
+import qualified Data.Foldable as F
+import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
+import Data.List.Split (splitOn)
+import Data.Maybe (fromMaybe, catMaybes)
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError (PandocParsecError))
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed, nested)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+
+-- | Read DokuWiki from an input string and return a Pandoc document.
+readDokuWiki :: PandocMonad m
+             => ReaderOptions
+             -> Text
+             -> m Pandoc
+readDokuWiki opts s = do
+  let input = crFilter s
+  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  case res of
+       Left e  -> throwError $ PandocParsecError (T.unpack input) e
+       Right d -> return d
+
+type DWParser = ParserT Text ParserState
+
+-- * Utility functions
+
+-- | Parse end-of-line, which can be either a newline or end-of-file.
+eol :: Stream s m Char => ParserT s st m ()
+eol = void newline <|> eof
+
+nested :: PandocMonad m => DWParser m a -> DWParser m a
+nested p = do
+  nestlevel <- stateMaxNestingLevel <$>  getState
+  guard $ nestlevel > 0
+  updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+  res <- p
+  updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+  return res
+
+guardColumnOne :: PandocMonad m => DWParser m ()
+guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
+-- | Parse DokuWiki document.
+parseDokuWiki :: PandocMonad m => DWParser m Pandoc
+parseDokuWiki =
+  B.doc . mconcat <$> many block <* spaces <* eof
+
+-- | Parse <code> and <file> attributes
+codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
+codeLanguage = try $ do
+  rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
+  let attr = case rawLang of
+               "-" -> []
+               l -> [l]
+  return ("", attr, [])
+
+-- | Generic parser for <code> and <file> tags
+codeTag :: PandocMonad m
+        => ((String, [String], [(String, String)]) -> String -> a)
+        -> String
+        -> DWParser m a
+codeTag f tag = try $ f
+  <$  char '<'
+  <*  string tag
+  <*> codeLanguage
+  <*  manyTill anyChar (char '>')
+  <*  optional (manyTill spaceChar eol)
+  <*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
+
+-- * Inline parsers
+
+-- | Parse any inline element but softbreak.
+inline' :: PandocMonad m => DWParser m B.Inlines
+inline' = whitespace
+      <|> br
+      <|> bold
+      <|> italic
+      <|> underlined
+      <|> nowiki
+      <|> percent
+      <|> link
+      <|> image
+      <|> monospaced
+      <|> subscript
+      <|> superscript
+      <|> deleted
+      <|> footnote
+      <|> inlineCode
+      <|> inlineFile
+      <|> inlineHtml
+      <|> inlinePhp
+      <|> autoLink
+      <|> autoEmail
+      <|> notoc
+      <|> nocache
+      <|> str
+      <|> symbol
+      <?> "inline"
+
+-- | Parse any inline element, including soft break.
+inline :: PandocMonad m => DWParser m B.Inlines
+inline = endline <|> inline'
+
+endline :: PandocMonad m => DWParser m B.Inlines
+endline = try $ B.softbreak <$ skipMany spaceChar <* linebreak
+
+whitespace :: PandocMonad m => DWParser m B.Inlines
+whitespace = try $ B.space <$ skipMany1 spaceChar
+
+br :: PandocMonad m => DWParser m B.Inlines
+br = try $ B.linebreak <$ string "\\\\" <* space
+
+linebreak :: PandocMonad m => DWParser m B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+  where lastNewline  = mempty <$ eof
+        innerNewline = pure B.space
+
+between :: (Monoid c, PandocMonad m, Show b)
+        => DWParser m a -> DWParser m b -> (DWParser m b -> DWParser m c)
+        -> DWParser m c
+between start end p =
+  mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Monoid b, PandocMonad m, Show a)
+         => DWParser m a -> (DWParser m a -> DWParser m b) -> DWParser m b
+enclosed sep p = between sep (try sep) p
+
+nestedInlines :: (Show a, PandocMonad m)
+              => DWParser m a -> DWParser m B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+  where
+    innerSpace   = try $ whitespace <* notFollowedBy end
+    nestedInline = notFollowedBy whitespace >> nested inline
+
+bold :: PandocMonad m => DWParser m B.Inlines
+bold = try $ B.strong <$> enclosed (string "**") nestedInlines
+
+italic :: PandocMonad m => DWParser m B.Inlines
+italic = try $ B.emph <$> enclosed (string "//") nestedInlines
+
+underlined :: PandocMonad m => DWParser m B.Inlines
+underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
+
+nowiki :: PandocMonad m => DWParser m B.Inlines
+nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
+
+percent :: PandocMonad m => DWParser m B.Inlines
+percent = try $ B.text <$> enclosed (string "%%") nestedString
+
+nestedString :: (Show a, PandocMonad m)
+             => DWParser m a -> DWParser m String
+nestedString end = innerSpace <|> count 1 nonspaceChar
+  where
+    innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+monospaced :: PandocMonad m => DWParser m B.Inlines
+monospaced = try $ B.code <$> enclosed (string "''") nestedString
+
+subscript :: PandocMonad m => DWParser m B.Inlines
+subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
+
+superscript :: PandocMonad m => DWParser m B.Inlines
+superscript = try $ B.superscript <$> between (string "<sup>") (try $ string "</sup>") nestedInlines
+
+deleted :: PandocMonad m => DWParser m B.Inlines
+deleted = try $ B.strikeout <$> between (string "<del>") (try $ string "</del>") nestedInlines
+
+-- | Parse a footnote.
+footnote :: PandocMonad m => DWParser m B.Inlines
+footnote = try $ B.note . B.para <$> between (string "((") (try $ string "))") nestedInlines
+
+inlineCode :: PandocMonad m => DWParser m B.Inlines
+inlineCode = codeTag B.codeWith "code"
+
+inlineFile :: PandocMonad m => DWParser m B.Inlines
+inlineFile = codeTag B.codeWith "file"
+
+inlineHtml :: PandocMonad m => DWParser m B.Inlines
+inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
+
+inlinePhp :: PandocMonad m => DWParser m B.Inlines
+inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
+
+makeLink :: (String, String) -> B.Inlines
+makeLink (text, url) = B.link url "" $ B.str text
+
+autoEmail :: PandocMonad m => DWParser m B.Inlines
+autoEmail = try $ do
+  state <- getState
+  guard $ stateAllowLinks state
+  makeLink <$ char '<' <*> emailAddress <* char '>'
+
+autoLink :: PandocMonad m => DWParser m B.Inlines
+autoLink = try $ do
+  state <- getState
+  guard $ stateAllowLinks state
+  (text, url) <- uri
+  guard $ checkLink (last url)
+  return $ makeLink (text, url)
+  where
+    checkLink c
+      | c == '/' = True
+      | otherwise = isAlphaNum c
+
+notoc :: PandocMonad m => DWParser m B.Inlines
+notoc = try $ mempty <$ string "~~NOTOC~~"
+
+nocache :: PandocMonad m => DWParser m B.Inlines
+nocache = try $ mempty <$ string "~~NOCACHE~~"
+
+str :: PandocMonad m => DWParser m B.Inlines
+str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+
+symbol :: PandocMonad m => DWParser m B.Inlines
+symbol = B.str <$> count 1 nonspaceChar
+
+link :: PandocMonad m => DWParser m B.Inlines
+link = try $ do
+  st <- getState
+  guard $ stateAllowLinks st
+  setState $ st{ stateAllowLinks = False }
+  l <- linkText
+  setState $ st{ stateAllowLinks = True }
+  return l
+
+isExternalLink :: String -> Bool
+isExternalLink s =
+  case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
+    (':':'/':'/':_) -> True
+    _ -> False
+
+isAbsolutePath :: String -> Bool
+isAbsolutePath ('.':_) = False
+isAbsolutePath s = ':' `elem` s
+
+normalizeDots :: String -> String
+normalizeDots path@('.':_) =
+  case dropWhile (== '.') path of
+    ':':_ -> path
+    _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
+normalizeDots path = path
+
+normalizeInternalPath :: String -> String
+normalizeInternalPath path =
+  if isAbsolutePath path
+    then ensureAbsolute normalizedPath
+    else normalizedPath
+  where
+    normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
+    ensureAbsolute s@('/':_) = s
+    ensureAbsolute s = '/':s
+
+normalizePath :: String -> String
+normalizePath path =
+  if isExternalLink path
+    then path
+    else normalizeInternalPath path
+
+urlToText :: String -> String
+urlToText url =
+  if isExternalLink url
+    then url
+    else reverse $ takeWhile (/= ':') $ reverse url
+
+-- Parse link or image
+parseLink :: PandocMonad m
+          => (String -> Maybe B.Inlines -> B.Inlines)
+          -> String
+          -> String
+          -> DWParser m B.Inlines
+parseLink f l r = f
+  <$  string l
+  <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
+  <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
+  <*  string r
+
+-- | Split Interwiki link into left and right part
+-- | Return Nothing if it is not Interwiki link
+splitInterwiki :: String -> Maybe (String, String)
+splitInterwiki path =
+  case span (\c -> isAlphaNum c || c == '.') path of
+    (l, '>':r) -> Just (l, r)
+    _ -> Nothing
+
+interwikiToUrl :: String -> String -> String
+interwikiToUrl "callto" page = "callto://" ++ page
+interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
+interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
+interwikiToUrl "tel" page = "tel:" ++ page
+interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
+interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
+
+linkText :: PandocMonad m => DWParser m B.Inlines
+linkText = parseLink fromRaw "[[" "]]"
+  where
+    fromRaw path description =
+      B.link normalizedPath "" (fromMaybe (B.str defaultDescription) description)
+      where
+        path' = trim path
+        interwiki = splitInterwiki path'
+        normalizedPath =
+          case interwiki of
+            Nothing -> normalizePath path'
+            Just (l, r) -> interwikiToUrl l r
+        defaultDescription =
+          case interwiki of
+            Nothing -> urlToText path'
+            Just (_, r) -> r
+
+-- Matches strings like "100x100" (width x height) and "50" (width)
+isWidthHeightParameter :: String -> Bool
+isWidthHeightParameter s =
+  case s of
+    (x:xs) ->
+      isDigit x && case dropWhile isDigit xs of
+                     ('x':ys@(_:_)) -> all isDigit ys
+                     "" -> True
+                     _ -> False
+    _ -> False
+
+parseWidthHeight :: String -> (Maybe String, Maybe String)
+parseWidthHeight s = (width, height)
+  where
+    width = Just $ takeWhile isDigit s
+    height =
+      case dropWhile isDigit s of
+        ('x':xs) -> Just xs
+        _ -> Nothing
+
+image :: PandocMonad m => DWParser m B.Inlines
+image = parseLink fromRaw "{{" "}}"
+  where
+    fromRaw path description =
+      if linkOnly
+        then B.link normalizedPath "" (fromMaybe defaultDescription description)
+        else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
+      where
+        (path', parameters) = span (/= '?') $ trim path
+        normalizedPath = normalizePath path'
+        leftPadding = " " `isPrefixOf` path
+        rightPadding = " " `isSuffixOf` path
+        classes =
+          case (leftPadding, rightPadding) of
+            (False, False) -> []
+            (False, True) -> ["align-left"]
+            (True, False) -> ["align-right"]
+            (True, True) -> ["align-center"]
+        parameterList = splitOn "&" $ drop 1 parameters
+        linkOnly = "linkonly" `elem` parameterList
+        (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
+        attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
+        defaultDescription = B.str $ urlToText path'
+
+-- * Block parsers
+
+block :: PandocMonad m => DWParser m B.Blocks
+block = do
+  res <- mempty <$ skipMany1 blankline
+         <|> blockElements
+         <|> para
+  skipMany blankline
+  trace (take 60 $ show $ B.toList res)
+  return res
+
+blockElements :: PandocMonad m => DWParser m B.Blocks
+blockElements = horizontalLine
+            <|> header
+            <|> list "  "
+            <|> indentedCode
+            <|> quote
+            <|> blockCode
+            <|> blockFile
+            <|> blockHtml
+            <|> blockPhp
+            <|> table
+
+horizontalLine :: PandocMonad m => DWParser m B.Blocks
+horizontalLine = try $ B.horizontalRule <$ string "---" <* many1 (char '-') <* eol
+
+header :: PandocMonad m => DWParser m B.Blocks
+header = try $ do
+  guardColumnOne
+  eqs <- many1 (char '=')
+  let lev = length eqs
+  guard $ lev < 7
+  contents <- B.trimInlines . mconcat <$> manyTill inline (try $ char '=' *> many1 (char '='))
+  attr <- registerHeader nullAttr contents
+  return $ B.headerWith attr (7 - lev) contents
+
+list :: PandocMonad m => String -> DWParser m B.Blocks
+list prefix = bulletList prefix <|> orderedList prefix
+
+bulletList :: PandocMonad m => String -> DWParser m B.Blocks
+bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
+
+orderedList :: PandocMonad m => String -> DWParser m B.Blocks
+orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
+
+parseList :: PandocMonad m
+          => String
+          -> Char
+          -> DWParser m [B.Blocks]
+parseList prefix marker =
+  many1 ((<>) <$> item <*> fmap mconcat (many continuation))
+  where
+    continuation = try $ list ("  " ++ prefix)
+    item = try $ string prefix *> char marker *> char ' ' *> itemContents
+    itemContents = B.plain . mconcat <$> many1Till inline' eol
+
+indentedCode :: PandocMonad m => DWParser m B.Blocks
+indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
+ where
+   indentedLine = try $ string "  " *> manyTill anyChar eol
+
+quote :: PandocMonad m => DWParser m B.Blocks
+quote = try $ nestedQuote 0
+  where
+    prefix level = count level (char '>')
+    contents level = nestedQuote level <|> quoteLine
+    quoteLine = try $ B.plain . B.trimInlines . mconcat <$> many1Till inline' eol
+    quoteContents level = (<>) <$> contents level <*> quoteContinuation level
+    quoteContinuation level = mconcat <$> many (try $ prefix level *> contents level)
+    nestedQuote level = B.blockQuote <$ char '>' <*> quoteContents (level + 1 :: Int)
+
+blockHtml :: PandocMonad m => DWParser m B.Blocks
+blockHtml = try $ B.rawBlock "html"
+  <$  string "<HTML>"
+  <*  optional (manyTill spaceChar eol)
+  <*> manyTill anyChar (try $ string "</HTML>")
+
+blockPhp :: PandocMonad m => DWParser m B.Blocks
+blockPhp = try $ B.codeBlockWith ("", ["php"], [])
+  <$  string "<PHP>"
+  <*  optional (manyTill spaceChar eol)
+  <*> manyTill anyChar (try $ string "</PHP>")
+
+table :: PandocMonad m => DWParser m B.Blocks
+table = do
+  firstSeparator <- lookAhead tableCellSeparator
+  rows <- tableRows
+  let (headerRow, body) = if firstSeparator == '^'
+                            then (head rows, tail rows)
+                            else ([], rows)
+  let attrs = const (AlignDefault, 0.0) <$> transpose rows
+  pure $ B.table mempty attrs headerRow body
+
+tableRows :: PandocMonad m => DWParser m [[B.Blocks]]
+tableRows = many1 tableRow
+
+tableRow :: PandocMonad m => DWParser m [B.Blocks]
+tableRow = many1Till tableCell tableRowEnd
+
+tableRowEnd :: PandocMonad m => DWParser m Char
+tableRowEnd = try $ tableCellSeparator <* manyTill spaceChar eol
+
+tableCellSeparator :: PandocMonad m => DWParser m Char
+tableCellSeparator = char '|' <|> char '^'
+
+tableCell :: PandocMonad m => DWParser m B.Blocks
+tableCell = try $ B.plain . B.trimInlines . mconcat <$> (normalCell <|> headerCell)
+  where
+    normalCell = char '|' *> manyTill inline' (lookAhead tableCellSeparator)
+    headerCell = char '^' *> manyTill inline' (lookAhead tableCellSeparator)
+
+blockCode :: PandocMonad m => DWParser m B.Blocks
+blockCode = codeTag B.codeBlockWith "code"
+
+blockFile :: PandocMonad m => DWParser m B.Blocks
+blockFile = codeTag B.codeBlockWith "file"
+
+para :: PandocMonad m => DWParser m B.Blocks
+para = result . mconcat <$> many1Till inline endOfParaElement
+ where
+   endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+   endOfInput       = try $ skipMany blankline >> skipSpaces >> eof
+   endOfPara        = try $ blankline >> skipMany1 blankline
+   newBlockElement  = try $ blankline >> void blockElements
+   result content   = if F.all (==Space) content
+                      then mempty
+                      else B.para $ B.trimInlines content
--- a/test/Tests/Readers/DokuWiki.hs
+++ b/test/Tests/Readers/DokuWiki.hs
@ -0,0 +1,315 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+module Tests.Readers.DokuWiki (tests) where
+
+import Prelude
+import Data.Text (Text)
+import qualified Data.Text as T
+import Test.Tasty
+import Tests.Helpers
+import Text.Pandoc
+import Text.Pandoc.Arbitrary ()
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared (underlineSpan)
+
+dokuwiki :: Text -> Pandoc
+dokuwiki = purely $ readDokuWiki def{ readerStandalone = True }
+
+infix 4 =:
+(=:) :: ToString c
+     => String -> (Text, c) -> TestTree
+(=:) = test dokuwiki
+
+tests :: [TestTree]
+tests = [ testGroup "inlines"
+          [ "Bold" =:
+            "**bold**" =?>
+            para (strong "bold")
+          , "Italic" =:
+            "//italic//" =?>
+            para (emph "italic")
+          , "Underlined" =:
+            "__underlined__" =?>
+            para (underlineSpan "underlined")
+          , "Monospaced" =:
+            "''monospaced''" =?>
+            para (code "monospaced")
+          , "Combined" =:
+            "**__//''combine''//__**" =?>
+            para (strong $ underlineSpan $ emph $ code "combine")
+          , "Nowiki" =:
+            T.unlines [ "<nowiki>"
+                      , "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
+                      , "</nowiki>"
+                      ] =?>
+            para "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
+          , "Percent" =:
+            "The same is true for %%//__this__ text// with a smiley ;-)%%." =?>
+            para "The same is true for //__this__ text// with a smiley ;-)."
+          , "Subscript" =:
+            "<sub>subscript</sub>" =?>
+            para (subscript "subscript")
+          , "Superscript" =:
+            "<sup>superscript</sup>" =?>
+            para (superscript "superscript")
+          , "Deleted" =:
+            "<del>deleted</del>" =?>
+            para (strikeout "deleted")
+          , "Inline code" =:
+            "foo <code java>public static void main</code> bar" =?>
+            para (text "foo " <> codeWith ("", ["java"], []) "public static void main" <> text " bar")
+          , "Inline file" =:
+            "foo <file></code></file> bar" =?>
+            para (text "foo " <> code "</code>" <> text " bar")
+          , "Inline HTML" =:
+            "<html>\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n</html>" =?>
+            para (rawInline "html" "\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n")
+          , "Inline PHP" =:
+            "<php>echo '<p>Hello World</p>';</php>" =?>
+            para (codeWith ("", ["php"], []) "echo '<p>Hello World</p>';")
+          , "Linebreak" =:
+            T.unlines [ "This is some text with some linebreaks\\\\ Note that the"
+                      , "two backslashes are only recognized at the end of a line\\\\"
+                      , "or followed by\\\\ a whitespace \\\\this happens without it."
+                      ] =?>
+            para ("This is some text with some linebreaks" <> linebreak <> "Note that the\n" <>
+                  "two backslashes are only recognized at the end of a line" <> linebreak <>
+                  "or followed by" <> linebreak <> "a whitespace \\\\this happens without it.")
+          , testGroup "External links"
+            [ "Autolink" =:
+              "http://www.google.com" =?>
+              para (link "http://www.google.com" "" (str "http://www.google.com"))
+            , "Link without description" =:
+              "[[https://example.com]]" =?>
+              para (link "https://example.com" "" (str "https://example.com"))
+            , "Link with description" =:
+              "[[http://www.google.com|This Link points to google]]" =?>
+              para (link "http://www.google.com" "" (text "This Link points to google"))
+            , "Trim whitespace around link and description" =:
+              "[[   http://www.google.com    |   This Link points to google   ]]" =?>
+              para (link "http://www.google.com" "" (text "This Link points to google"))
+            , "Email address" =:
+              "<andi@splitbrain.org>" =?>
+              para (link "mailto:andi@splitbrain.org" "" (str "andi@splitbrain.org"))
+            ]
+          , testGroup "Internal links"
+            [ "Current namespace" =:
+              "[[example]]" =?>
+              para (link "example" "" (str "example"))
+            , "Current namespace starting with dot" =:
+              "[[.example]]" =?>
+              para (link "example" "" (str ".example"))
+            , "Current namespace starting with dot and colon" =:
+              "[[.:example]]" =?>
+              para (link "example" "" (str "example"))
+            , "Root namespace" =:
+              "[[:example]]" =?>
+              para (link "/example" "" (str "example"))
+            , "Parent namespace" =:
+              "[[..example]]" =?>
+              para (link "../example" "" (str "..example"))
+            , "Parent namespace with colon" =:
+              "[[..:example]]" =?>
+              para (link "../example" "" (str "example"))
+            , "Beneath the root namespace" =:
+              "[[wiki:example]]" =?>
+              para (link "/wiki/example" "" (str "example"))
+            , "Explicitly beneath the root namespace" =:
+              "[[:wiki:example]]" =?>
+              para (link "/wiki/example" "" (str "example"))
+            ]
+          , testGroup "Interwiki links"
+            [ "Interwiki without description" =:
+              "[[doku>DokuWiki]]" =?>
+              para (link "https://www.dokuwiki.org/DokuWiki" "" (str "DokuWiki"))
+            , "Interwiki link with description" =:
+              "[[doku>toolbar|quickbuttons]]" =?>
+              para (link "https://www.dokuwiki.org/toolbar" "" (str "quickbuttons"))
+            ]
+          , "Footnote" =:
+            "((This is a footnote))" =?>
+            para (note (para "This is a footnote"))
+          , testGroup "Images"
+            [ "Image" =:
+              "{{image.jpg}}" =?>
+              para (image "image.jpg" "" (str "image.jpg"))
+            , "Image with caption" =:
+              "{{image.png|This is the caption}}" =?>
+              para (image "image.png" "" "This is the caption")
+            , "Image with } in caption" =:
+              "{{image.png|There is an } in the caption}}" =?>
+              para (image "image.png" "" "There is an } in the caption")
+            , "Wiki namespace starting with dot" =:
+              "{{.wiki:image.jpg}}" =?>
+              para (image "wiki/image.jpg" "" (str "image.jpg"))
+            , "Left aligned image" =:
+              "{{wiki:dokuwiki-128.png }}" =?>
+              para (imageWith ("", ["align-left"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+            , "Right aligned image" =:
+              "{{ wiki:dokuwiki-128.png}}" =?>
+              para (imageWith ("", ["align-right"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+            , "Centered image" =:
+              "{{ wiki:dokuwiki-128.png }}" =?>
+              para (imageWith ("", ["align-center"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+            , "Image with width" =:
+              "{{wiki:dokuwiki-128.png?50}}" =?>
+              para (imageWith ("", [], [("width", "50")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+            , "Image with width and height" =:
+              "{{wiki:dokuwiki-128.png?nocache&50x100}}" =?>
+              para (imageWith ("", [], [("width", "50"), ("height", "100")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+            , "Linkonly" =:
+              "{{wiki:dokuwiki-128.png?linkonly}}" =?>
+              para (link "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+            ]
+          , "Ignore ~~NOTOC~~" =:
+            "Here is a ~~NOTOC~~ macro" =?>
+            para "Here is a macro"
+          , "Ignore ~~NOCACHE~~" =:
+            "Here is a ~~NOCACHE~~ macro" =?>
+            para "Here is a macro"
+          ]
+        , testGroup "Sectioning"
+          [ "Headline level 1" =:
+            "====== Headline Level 1 ======" =?>
+            header 1 "Headline Level 1"
+          , "Headline level 2" =:
+            "===== Headline Level 2 =====" =?>
+            header 2 "Headline Level 2"
+          , "Headline level 3" =:
+            "==== Headline Level 3 ====" =?>
+            header 3 "Headline Level 3"
+          , "Headline level 4" =:
+            "=== Headline Level 4 ===" =?>
+            header 4 "Headline Level 4"
+          , "Headline level 5" =:
+            "== Headline Level 5 ==" =?>
+            header 5 "Headline Level 5"
+          , "Only two closing = are required" =:
+            "====== Headline Level 1 ==" =?>
+            header 1 "Headline Level 1"
+          , "One closing = is not enough" =:
+            "====== Headline Level 1 =" =?>
+            para "====== Headline Level 1 ="
+          , "One closing = is not enough" =:
+            "== Headline with = sign ==" =?>
+            header 5 "Headline with = sign"
+          ]
+        , "Horizontal line" =:
+          "----" =?>
+          horizontalRule
+        , testGroup "Lists"
+          [ "Unordered list" =:
+            T.unlines [ "  * This is a list"
+                      , "  * The second item"
+                      , "    * You may have different levels"
+                      , "  * Another item"
+                      ] =?>
+            bulletList [ plain "This is a list"
+                       , plain "The second item" <>
+                         bulletList [ plain "You may have different levels" ]
+                       , plain "Another item"
+                       ]
+          , "Ordered list" =:
+            T.unlines [ "  - The same list but ordered"
+                      , "  - Another item"
+                      , "    - Just use indention for deeper levels"
+                      , "  - That's it"
+                      ] =?>
+            orderedList [ plain "The same list but ordered"
+                        , plain "Another item" <>
+                          orderedList [ plain "Just use indention for deeper levels" ]
+                        , plain "That's it"
+                        ]
+          , "Multiline list items" =: -- https://www.dokuwiki.org/faq:lists
+            T.unlines [ "  - first item"
+                      , "  - second item with linebreak\\\\ second line"
+                      , "  - third item with code: <code>"
+                      , "some code"
+                      , "comes here"
+                      , "</code>"
+                      , "  - fourth item"
+                      ] =?>
+            orderedList [ plain "first item"
+                        , plain ("second item with linebreak" <> linebreak <> " second line")
+                        , plain ("third item with code: " <> code "some code\ncomes here\n")
+                        , plain "fourth item"
+                        ]
+          ]
+        , "Block HTML" =:
+          T.unlines [ "<HTML>"
+                    , "<p style=\"border:2px dashed red;\">And this is some block HTML</p>"
+                    , "</HTML>"
+                    ] =?>
+          rawBlock "html" "<p style=\"border:2px dashed red;\">And this is some block HTML</p>\n"
+        , "Block PHP" =:
+          T.unlines [ "<PHP>"
+                    , "echo '<p>Hello World</p>';"
+                    , "</PHP>"
+                    ] =?>
+          codeBlockWith ("", ["php"], []) "echo '<p>Hello World</p>';\n"
+        , "Quote" =:
+          T.unlines [ "> foo"
+                    , ">no space is required after >"
+                    , "> bar"
+                    , ">> baz"
+                    , "> bat"
+                    ] =?>
+          blockQuote (plain "foo" <>
+                      plain "no space is required after >" <>
+                      plain "bar" <>
+                      blockQuote (plain "baz") <>
+                      plain "bat")
+        , "Code block" =:
+          T.unlines [ "<code>"
+                    , "foo bar baz"
+                    , "</code>"
+                    ] =?>
+          codeBlock "foo bar baz\n"
+        , "Java code block" =:
+          T.unlines [ "<code java>"
+                    , "public static void main"
+                    , "</code>"
+                    ] =?>
+          codeBlockWith ("", ["java"], []) "public static void main\n"
+        , "File with filename and no language" =:
+          T.unlines [ "<file - foo.bar>"
+                    , "file contents"
+                    , "</file>"
+                    ] =?>
+          codeBlock "file contents\n"
+        , "Table" =:
+          T.unlines [ "| foo | bar |"
+                    , "| bat | baz |"
+                    ] =?>
+          table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
+                       []
+                       [[plain "foo", plain "bar"]
+                       ,[plain "bat", plain "baz"]]
+        , "Table with header" =:
+          T.unlines [ "^ foo ^ bar ^"
+                    , "| bat | baz |"
+                    ] =?>
+          table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
+                       [plain "foo", plain "bar"]
+                       [[plain "bat", plain "baz"]]
+        , "Table with colspan" =:
+          T.unlines [ "^ 0,0 ^ 0,1 ^ 0,2 ^"
+                    , "| 1,0 | 1,1 ||"
+                    , "| 2,0 | 2,1 | 2,2 |"
+                    ] =?>
+          table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0), (AlignDefault, 0.0)]
+                       [plain "0,0", plain "0,1", plain "0,2"]
+                       [[plain "1,0", plain "1,1", mempty]
+                       ,[plain "2,0", plain "2,1", plain "2,2"]
+                       ]
+        , "Indented code block" =:
+          T.unlines [ "foo"
+                    , "  bar"
+                    , "    bat"
+                    , "baz"
+                    ] =?>
+          para "foo" <>
+          codeBlock "bar\n  bat\n" <>
+          para "baz"
+        ]
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@ -11,6 +11,7 @@ import qualified Tests.Lua
 import qualified Tests.Old
 import qualified Tests.Readers.Creole
 import qualified Tests.Readers.Docx
+import qualified Tests.Readers.DokuWiki
 import qualified Tests.Readers.EPUB
 import qualified Tests.Readers.FB2
 import qualified Tests.Readers.HTML
@ -80,6 +81,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
          , testGroup "Creole" Tests.Readers.Creole.tests
          , testGroup "Man" Tests.Readers.Man.tests
          , testGroup "FB2" Tests.Readers.FB2.tests
+          , testGroup "DokuWiki" Tests.Readers.DokuWiki.tests
          ]
        , testGroup "Lua filters" Tests.Lua.tests
        ]