parent
a5f5002eef
commit
40c30a9d88
6 changed files with 855 additions and 2 deletions
|
@ -238,6 +238,7 @@ General options {.options}
|
|||
- `creole` ([Creole 1.0])
|
||||
- `docbook` ([DocBook])
|
||||
- `docx` ([Word docx])
|
||||
- `dokuwiki` ([DokuWiki markup])
|
||||
- `epub` ([EPUB])
|
||||
- `fb2` ([FictionBook2] e-book)
|
||||
- `gfm` ([GitHub-Flavored Markdown]),
|
||||
|
|
|
@ -18,8 +18,9 @@ description: Pandoc is a Haskell library for converting from one markup
|
|||
format to another, and a command-line tool that uses
|
||||
this library. It can read several dialects of Markdown and
|
||||
(subsets of) HTML, reStructuredText, LaTeX, DocBook, JATS,
|
||||
MediaWiki markup, TWiki markup, TikiWiki markup, Creole 1.0,
|
||||
Haddock markup, OPML, Emacs Org-Mode, Emacs Muse, txt2tags,
|
||||
MediaWiki markup, DokuWiki markup, TWiki markup,
|
||||
TikiWiki markup, Creole 1.0, Haddock markup, OPML,
|
||||
Emacs Org-Mode, Emacs Muse, txt2tags,
|
||||
Vimwiki, Word Docx, ODT, EPUB, FictionBook2, roff man,
|
||||
and Textile, and it can write Markdown, reStructuredText,
|
||||
XHTML, HTML 5, LaTeX, ConTeXt, DocBook, JATS, OPML, TEI,
|
||||
|
@ -467,6 +468,7 @@ library
|
|||
Text.Pandoc.Readers.Muse,
|
||||
Text.Pandoc.Readers.Man,
|
||||
Text.Pandoc.Readers.FB2,
|
||||
Text.Pandoc.Readers.DokuWiki,
|
||||
Text.Pandoc.Writers,
|
||||
Text.Pandoc.Writers.Native,
|
||||
Text.Pandoc.Writers.Docbook,
|
||||
|
@ -696,6 +698,7 @@ test-suite test-pandoc
|
|||
Tests.Readers.Creole
|
||||
Tests.Readers.Man
|
||||
Tests.Readers.FB2
|
||||
Tests.Readers.DokuWiki
|
||||
Tests.Writers.Native
|
||||
Tests.Writers.ConTeXt
|
||||
Tests.Writers.Docbook
|
||||
|
|
|
@ -47,6 +47,7 @@ module Text.Pandoc.Readers
|
|||
, readMarkdown
|
||||
, readCommonMark
|
||||
, readCreole
|
||||
, readDokuWiki
|
||||
, readMediaWiki
|
||||
, readVimwiki
|
||||
, readRST
|
||||
|
@ -86,6 +87,7 @@ import Text.Pandoc.Readers.CommonMark
|
|||
import Text.Pandoc.Readers.Creole
|
||||
import Text.Pandoc.Readers.DocBook
|
||||
import Text.Pandoc.Readers.Docx
|
||||
import Text.Pandoc.Readers.DokuWiki
|
||||
import Text.Pandoc.Readers.EPUB
|
||||
import Text.Pandoc.Readers.FB2
|
||||
import Text.Pandoc.Readers.Haddock
|
||||
|
@ -123,6 +125,7 @@ readers = [ ("native" , TextReader readNative)
|
|||
,("markdown_mmd", TextReader readMarkdown)
|
||||
,("commonmark" , TextReader readCommonMark)
|
||||
,("creole" , TextReader readCreole)
|
||||
,("dokuwiki" , TextReader readDokuWiki)
|
||||
,("gfm" , TextReader readCommonMark)
|
||||
,("rst" , TextReader readRST)
|
||||
,("mediawiki" , TextReader readMediaWiki)
|
||||
|
|
529
src/Text/Pandoc/Readers/DokuWiki.hs
Normal file
529
src/Text/Pandoc/Readers/DokuWiki.hs
Normal file
|
@ -0,0 +1,529 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE NoImplicitPrelude #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
|
||||
{-
|
||||
Copyright (C) 2018-2019 Alexander Krotov <ilabdsf@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.DokuWiki
|
||||
Copyright : Copyright (C) 2018 Alexander Krotov
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Alexander Krotov <ilabdsf@gmail.com>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Conversion of DokuWiki text to 'Pandoc' document.
|
||||
-}
|
||||
module Text.Pandoc.Readers.DokuWiki (readDokuWiki) where
|
||||
|
||||
import Prelude
|
||||
import Control.Monad
|
||||
import Control.Monad.Except (throwError)
|
||||
import Data.Char (isAlphaNum, isDigit)
|
||||
import qualified Data.Foldable as F
|
||||
import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
|
||||
import Data.List.Split (splitOn)
|
||||
import Data.Maybe (fromMaybe, catMaybes)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class (PandocMonad (..))
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Error (PandocError (PandocParsecError))
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (enclosed, nested)
|
||||
import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
|
||||
|
||||
-- | Read DokuWiki from an input string and return a Pandoc document.
|
||||
readDokuWiki :: PandocMonad m
|
||||
=> ReaderOptions
|
||||
-> Text
|
||||
-> m Pandoc
|
||||
readDokuWiki opts s = do
|
||||
let input = crFilter s
|
||||
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
|
||||
case res of
|
||||
Left e -> throwError $ PandocParsecError (T.unpack input) e
|
||||
Right d -> return d
|
||||
|
||||
type DWParser = ParserT Text ParserState
|
||||
|
||||
-- * Utility functions
|
||||
|
||||
-- | Parse end-of-line, which can be either a newline or end-of-file.
|
||||
eol :: Stream s m Char => ParserT s st m ()
|
||||
eol = void newline <|> eof
|
||||
|
||||
nested :: PandocMonad m => DWParser m a -> DWParser m a
|
||||
nested p = do
|
||||
nestlevel <- stateMaxNestingLevel <$> getState
|
||||
guard $ nestlevel > 0
|
||||
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
|
||||
res <- p
|
||||
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
|
||||
return res
|
||||
|
||||
guardColumnOne :: PandocMonad m => DWParser m ()
|
||||
guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
|
||||
|
||||
-- | Parse DokuWiki document.
|
||||
parseDokuWiki :: PandocMonad m => DWParser m Pandoc
|
||||
parseDokuWiki =
|
||||
B.doc . mconcat <$> many block <* spaces <* eof
|
||||
|
||||
-- | Parse <code> and <file> attributes
|
||||
codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
|
||||
codeLanguage = try $ do
|
||||
rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
|
||||
let attr = case rawLang of
|
||||
"-" -> []
|
||||
l -> [l]
|
||||
return ("", attr, [])
|
||||
|
||||
-- | Generic parser for <code> and <file> tags
|
||||
codeTag :: PandocMonad m
|
||||
=> ((String, [String], [(String, String)]) -> String -> a)
|
||||
-> String
|
||||
-> DWParser m a
|
||||
codeTag f tag = try $ f
|
||||
<$ char '<'
|
||||
<* string tag
|
||||
<*> codeLanguage
|
||||
<* manyTill anyChar (char '>')
|
||||
<* optional (manyTill spaceChar eol)
|
||||
<*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
|
||||
|
||||
-- * Inline parsers
|
||||
|
||||
-- | Parse any inline element but softbreak.
|
||||
inline' :: PandocMonad m => DWParser m B.Inlines
|
||||
inline' = whitespace
|
||||
<|> br
|
||||
<|> bold
|
||||
<|> italic
|
||||
<|> underlined
|
||||
<|> nowiki
|
||||
<|> percent
|
||||
<|> link
|
||||
<|> image
|
||||
<|> monospaced
|
||||
<|> subscript
|
||||
<|> superscript
|
||||
<|> deleted
|
||||
<|> footnote
|
||||
<|> inlineCode
|
||||
<|> inlineFile
|
||||
<|> inlineHtml
|
||||
<|> inlinePhp
|
||||
<|> autoLink
|
||||
<|> autoEmail
|
||||
<|> notoc
|
||||
<|> nocache
|
||||
<|> str
|
||||
<|> symbol
|
||||
<?> "inline"
|
||||
|
||||
-- | Parse any inline element, including soft break.
|
||||
inline :: PandocMonad m => DWParser m B.Inlines
|
||||
inline = endline <|> inline'
|
||||
|
||||
endline :: PandocMonad m => DWParser m B.Inlines
|
||||
endline = try $ B.softbreak <$ skipMany spaceChar <* linebreak
|
||||
|
||||
whitespace :: PandocMonad m => DWParser m B.Inlines
|
||||
whitespace = try $ B.space <$ skipMany1 spaceChar
|
||||
|
||||
br :: PandocMonad m => DWParser m B.Inlines
|
||||
br = try $ B.linebreak <$ string "\\\\" <* space
|
||||
|
||||
linebreak :: PandocMonad m => DWParser m B.Inlines
|
||||
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
|
||||
where lastNewline = mempty <$ eof
|
||||
innerNewline = pure B.space
|
||||
|
||||
between :: (Monoid c, PandocMonad m, Show b)
|
||||
=> DWParser m a -> DWParser m b -> (DWParser m b -> DWParser m c)
|
||||
-> DWParser m c
|
||||
between start end p =
|
||||
mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
|
||||
|
||||
enclosed :: (Monoid b, PandocMonad m, Show a)
|
||||
=> DWParser m a -> (DWParser m a -> DWParser m b) -> DWParser m b
|
||||
enclosed sep p = between sep (try sep) p
|
||||
|
||||
nestedInlines :: (Show a, PandocMonad m)
|
||||
=> DWParser m a -> DWParser m B.Inlines
|
||||
nestedInlines end = innerSpace <|> nestedInline
|
||||
where
|
||||
innerSpace = try $ whitespace <* notFollowedBy end
|
||||
nestedInline = notFollowedBy whitespace >> nested inline
|
||||
|
||||
bold :: PandocMonad m => DWParser m B.Inlines
|
||||
bold = try $ B.strong <$> enclosed (string "**") nestedInlines
|
||||
|
||||
italic :: PandocMonad m => DWParser m B.Inlines
|
||||
italic = try $ B.emph <$> enclosed (string "//") nestedInlines
|
||||
|
||||
underlined :: PandocMonad m => DWParser m B.Inlines
|
||||
underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
|
||||
|
||||
nowiki :: PandocMonad m => DWParser m B.Inlines
|
||||
nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
|
||||
|
||||
percent :: PandocMonad m => DWParser m B.Inlines
|
||||
percent = try $ B.text <$> enclosed (string "%%") nestedString
|
||||
|
||||
nestedString :: (Show a, PandocMonad m)
|
||||
=> DWParser m a -> DWParser m String
|
||||
nestedString end = innerSpace <|> count 1 nonspaceChar
|
||||
where
|
||||
innerSpace = try $ many1 spaceChar <* notFollowedBy end
|
||||
|
||||
monospaced :: PandocMonad m => DWParser m B.Inlines
|
||||
monospaced = try $ B.code <$> enclosed (string "''") nestedString
|
||||
|
||||
subscript :: PandocMonad m => DWParser m B.Inlines
|
||||
subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
|
||||
|
||||
superscript :: PandocMonad m => DWParser m B.Inlines
|
||||
superscript = try $ B.superscript <$> between (string "<sup>") (try $ string "</sup>") nestedInlines
|
||||
|
||||
deleted :: PandocMonad m => DWParser m B.Inlines
|
||||
deleted = try $ B.strikeout <$> between (string "<del>") (try $ string "</del>") nestedInlines
|
||||
|
||||
-- | Parse a footnote.
|
||||
footnote :: PandocMonad m => DWParser m B.Inlines
|
||||
footnote = try $ B.note . B.para <$> between (string "((") (try $ string "))") nestedInlines
|
||||
|
||||
inlineCode :: PandocMonad m => DWParser m B.Inlines
|
||||
inlineCode = codeTag B.codeWith "code"
|
||||
|
||||
inlineFile :: PandocMonad m => DWParser m B.Inlines
|
||||
inlineFile = codeTag B.codeWith "file"
|
||||
|
||||
inlineHtml :: PandocMonad m => DWParser m B.Inlines
|
||||
inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
|
||||
|
||||
inlinePhp :: PandocMonad m => DWParser m B.Inlines
|
||||
inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
|
||||
|
||||
makeLink :: (String, String) -> B.Inlines
|
||||
makeLink (text, url) = B.link url "" $ B.str text
|
||||
|
||||
autoEmail :: PandocMonad m => DWParser m B.Inlines
|
||||
autoEmail = try $ do
|
||||
state <- getState
|
||||
guard $ stateAllowLinks state
|
||||
makeLink <$ char '<' <*> emailAddress <* char '>'
|
||||
|
||||
autoLink :: PandocMonad m => DWParser m B.Inlines
|
||||
autoLink = try $ do
|
||||
state <- getState
|
||||
guard $ stateAllowLinks state
|
||||
(text, url) <- uri
|
||||
guard $ checkLink (last url)
|
||||
return $ makeLink (text, url)
|
||||
where
|
||||
checkLink c
|
||||
| c == '/' = True
|
||||
| otherwise = isAlphaNum c
|
||||
|
||||
notoc :: PandocMonad m => DWParser m B.Inlines
|
||||
notoc = try $ mempty <$ string "~~NOTOC~~"
|
||||
|
||||
nocache :: PandocMonad m => DWParser m B.Inlines
|
||||
nocache = try $ mempty <$ string "~~NOCACHE~~"
|
||||
|
||||
str :: PandocMonad m => DWParser m B.Inlines
|
||||
str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
|
||||
|
||||
symbol :: PandocMonad m => DWParser m B.Inlines
|
||||
symbol = B.str <$> count 1 nonspaceChar
|
||||
|
||||
link :: PandocMonad m => DWParser m B.Inlines
|
||||
link = try $ do
|
||||
st <- getState
|
||||
guard $ stateAllowLinks st
|
||||
setState $ st{ stateAllowLinks = False }
|
||||
l <- linkText
|
||||
setState $ st{ stateAllowLinks = True }
|
||||
return l
|
||||
|
||||
isExternalLink :: String -> Bool
|
||||
isExternalLink s =
|
||||
case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
|
||||
(':':'/':'/':_) -> True
|
||||
_ -> False
|
||||
|
||||
isAbsolutePath :: String -> Bool
|
||||
isAbsolutePath ('.':_) = False
|
||||
isAbsolutePath s = ':' `elem` s
|
||||
|
||||
normalizeDots :: String -> String
|
||||
normalizeDots path@('.':_) =
|
||||
case dropWhile (== '.') path of
|
||||
':':_ -> path
|
||||
_ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
|
||||
normalizeDots path = path
|
||||
|
||||
normalizeInternalPath :: String -> String
|
||||
normalizeInternalPath path =
|
||||
if isAbsolutePath path
|
||||
then ensureAbsolute normalizedPath
|
||||
else normalizedPath
|
||||
where
|
||||
normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
|
||||
ensureAbsolute s@('/':_) = s
|
||||
ensureAbsolute s = '/':s
|
||||
|
||||
normalizePath :: String -> String
|
||||
normalizePath path =
|
||||
if isExternalLink path
|
||||
then path
|
||||
else normalizeInternalPath path
|
||||
|
||||
urlToText :: String -> String
|
||||
urlToText url =
|
||||
if isExternalLink url
|
||||
then url
|
||||
else reverse $ takeWhile (/= ':') $ reverse url
|
||||
|
||||
-- Parse link or image
|
||||
parseLink :: PandocMonad m
|
||||
=> (String -> Maybe B.Inlines -> B.Inlines)
|
||||
-> String
|
||||
-> String
|
||||
-> DWParser m B.Inlines
|
||||
parseLink f l r = f
|
||||
<$ string l
|
||||
<*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
|
||||
<*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
|
||||
<* string r
|
||||
|
||||
-- | Split Interwiki link into left and right part
|
||||
-- | Return Nothing if it is not Interwiki link
|
||||
splitInterwiki :: String -> Maybe (String, String)
|
||||
splitInterwiki path =
|
||||
case span (\c -> isAlphaNum c || c == '.') path of
|
||||
(l, '>':r) -> Just (l, r)
|
||||
_ -> Nothing
|
||||
|
||||
interwikiToUrl :: String -> String -> String
|
||||
interwikiToUrl "callto" page = "callto://" ++ page
|
||||
interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
|
||||
interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
|
||||
interwikiToUrl "tel" page = "tel:" ++ page
|
||||
interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
|
||||
interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
|
||||
interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
|
||||
interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
|
||||
interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
|
||||
interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
|
||||
interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
|
||||
|
||||
linkText :: PandocMonad m => DWParser m B.Inlines
|
||||
linkText = parseLink fromRaw "[[" "]]"
|
||||
where
|
||||
fromRaw path description =
|
||||
B.link normalizedPath "" (fromMaybe (B.str defaultDescription) description)
|
||||
where
|
||||
path' = trim path
|
||||
interwiki = splitInterwiki path'
|
||||
normalizedPath =
|
||||
case interwiki of
|
||||
Nothing -> normalizePath path'
|
||||
Just (l, r) -> interwikiToUrl l r
|
||||
defaultDescription =
|
||||
case interwiki of
|
||||
Nothing -> urlToText path'
|
||||
Just (_, r) -> r
|
||||
|
||||
-- Matches strings like "100x100" (width x height) and "50" (width)
|
||||
isWidthHeightParameter :: String -> Bool
|
||||
isWidthHeightParameter s =
|
||||
case s of
|
||||
(x:xs) ->
|
||||
isDigit x && case dropWhile isDigit xs of
|
||||
('x':ys@(_:_)) -> all isDigit ys
|
||||
"" -> True
|
||||
_ -> False
|
||||
_ -> False
|
||||
|
||||
parseWidthHeight :: String -> (Maybe String, Maybe String)
|
||||
parseWidthHeight s = (width, height)
|
||||
where
|
||||
width = Just $ takeWhile isDigit s
|
||||
height =
|
||||
case dropWhile isDigit s of
|
||||
('x':xs) -> Just xs
|
||||
_ -> Nothing
|
||||
|
||||
image :: PandocMonad m => DWParser m B.Inlines
|
||||
image = parseLink fromRaw "{{" "}}"
|
||||
where
|
||||
fromRaw path description =
|
||||
if linkOnly
|
||||
then B.link normalizedPath "" (fromMaybe defaultDescription description)
|
||||
else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
|
||||
where
|
||||
(path', parameters) = span (/= '?') $ trim path
|
||||
normalizedPath = normalizePath path'
|
||||
leftPadding = " " `isPrefixOf` path
|
||||
rightPadding = " " `isSuffixOf` path
|
||||
classes =
|
||||
case (leftPadding, rightPadding) of
|
||||
(False, False) -> []
|
||||
(False, True) -> ["align-left"]
|
||||
(True, False) -> ["align-right"]
|
||||
(True, True) -> ["align-center"]
|
||||
parameterList = splitOn "&" $ drop 1 parameters
|
||||
linkOnly = "linkonly" `elem` parameterList
|
||||
(width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
|
||||
attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
|
||||
defaultDescription = B.str $ urlToText path'
|
||||
|
||||
-- * Block parsers
|
||||
|
||||
block :: PandocMonad m => DWParser m B.Blocks
|
||||
block = do
|
||||
res <- mempty <$ skipMany1 blankline
|
||||
<|> blockElements
|
||||
<|> para
|
||||
skipMany blankline
|
||||
trace (take 60 $ show $ B.toList res)
|
||||
return res
|
||||
|
||||
blockElements :: PandocMonad m => DWParser m B.Blocks
|
||||
blockElements = horizontalLine
|
||||
<|> header
|
||||
<|> list " "
|
||||
<|> indentedCode
|
||||
<|> quote
|
||||
<|> blockCode
|
||||
<|> blockFile
|
||||
<|> blockHtml
|
||||
<|> blockPhp
|
||||
<|> table
|
||||
|
||||
horizontalLine :: PandocMonad m => DWParser m B.Blocks
|
||||
horizontalLine = try $ B.horizontalRule <$ string "---" <* many1 (char '-') <* eol
|
||||
|
||||
header :: PandocMonad m => DWParser m B.Blocks
|
||||
header = try $ do
|
||||
guardColumnOne
|
||||
eqs <- many1 (char '=')
|
||||
let lev = length eqs
|
||||
guard $ lev < 7
|
||||
contents <- B.trimInlines . mconcat <$> manyTill inline (try $ char '=' *> many1 (char '='))
|
||||
attr <- registerHeader nullAttr contents
|
||||
return $ B.headerWith attr (7 - lev) contents
|
||||
|
||||
list :: PandocMonad m => String -> DWParser m B.Blocks
|
||||
list prefix = bulletList prefix <|> orderedList prefix
|
||||
|
||||
bulletList :: PandocMonad m => String -> DWParser m B.Blocks
|
||||
bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
|
||||
|
||||
orderedList :: PandocMonad m => String -> DWParser m B.Blocks
|
||||
orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
|
||||
|
||||
parseList :: PandocMonad m
|
||||
=> String
|
||||
-> Char
|
||||
-> DWParser m [B.Blocks]
|
||||
parseList prefix marker =
|
||||
many1 ((<>) <$> item <*> fmap mconcat (many continuation))
|
||||
where
|
||||
continuation = try $ list (" " ++ prefix)
|
||||
item = try $ string prefix *> char marker *> char ' ' *> itemContents
|
||||
itemContents = B.plain . mconcat <$> many1Till inline' eol
|
||||
|
||||
indentedCode :: PandocMonad m => DWParser m B.Blocks
|
||||
indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
|
||||
where
|
||||
indentedLine = try $ string " " *> manyTill anyChar eol
|
||||
|
||||
quote :: PandocMonad m => DWParser m B.Blocks
|
||||
quote = try $ nestedQuote 0
|
||||
where
|
||||
prefix level = count level (char '>')
|
||||
contents level = nestedQuote level <|> quoteLine
|
||||
quoteLine = try $ B.plain . B.trimInlines . mconcat <$> many1Till inline' eol
|
||||
quoteContents level = (<>) <$> contents level <*> quoteContinuation level
|
||||
quoteContinuation level = mconcat <$> many (try $ prefix level *> contents level)
|
||||
nestedQuote level = B.blockQuote <$ char '>' <*> quoteContents (level + 1 :: Int)
|
||||
|
||||
blockHtml :: PandocMonad m => DWParser m B.Blocks
|
||||
blockHtml = try $ B.rawBlock "html"
|
||||
<$ string "<HTML>"
|
||||
<* optional (manyTill spaceChar eol)
|
||||
<*> manyTill anyChar (try $ string "</HTML>")
|
||||
|
||||
blockPhp :: PandocMonad m => DWParser m B.Blocks
|
||||
blockPhp = try $ B.codeBlockWith ("", ["php"], [])
|
||||
<$ string "<PHP>"
|
||||
<* optional (manyTill spaceChar eol)
|
||||
<*> manyTill anyChar (try $ string "</PHP>")
|
||||
|
||||
table :: PandocMonad m => DWParser m B.Blocks
|
||||
table = do
|
||||
firstSeparator <- lookAhead tableCellSeparator
|
||||
rows <- tableRows
|
||||
let (headerRow, body) = if firstSeparator == '^'
|
||||
then (head rows, tail rows)
|
||||
else ([], rows)
|
||||
let attrs = const (AlignDefault, 0.0) <$> transpose rows
|
||||
pure $ B.table mempty attrs headerRow body
|
||||
|
||||
tableRows :: PandocMonad m => DWParser m [[B.Blocks]]
|
||||
tableRows = many1 tableRow
|
||||
|
||||
tableRow :: PandocMonad m => DWParser m [B.Blocks]
|
||||
tableRow = many1Till tableCell tableRowEnd
|
||||
|
||||
tableRowEnd :: PandocMonad m => DWParser m Char
|
||||
tableRowEnd = try $ tableCellSeparator <* manyTill spaceChar eol
|
||||
|
||||
tableCellSeparator :: PandocMonad m => DWParser m Char
|
||||
tableCellSeparator = char '|' <|> char '^'
|
||||
|
||||
tableCell :: PandocMonad m => DWParser m B.Blocks
|
||||
tableCell = try $ B.plain . B.trimInlines . mconcat <$> (normalCell <|> headerCell)
|
||||
where
|
||||
normalCell = char '|' *> manyTill inline' (lookAhead tableCellSeparator)
|
||||
headerCell = char '^' *> manyTill inline' (lookAhead tableCellSeparator)
|
||||
|
||||
blockCode :: PandocMonad m => DWParser m B.Blocks
|
||||
blockCode = codeTag B.codeBlockWith "code"
|
||||
|
||||
blockFile :: PandocMonad m => DWParser m B.Blocks
|
||||
blockFile = codeTag B.codeBlockWith "file"
|
||||
|
||||
para :: PandocMonad m => DWParser m B.Blocks
|
||||
para = result . mconcat <$> many1Till inline endOfParaElement
|
||||
where
|
||||
endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
|
||||
endOfInput = try $ skipMany blankline >> skipSpaces >> eof
|
||||
endOfPara = try $ blankline >> skipMany1 blankline
|
||||
newBlockElement = try $ blankline >> void blockElements
|
||||
result content = if F.all (==Space) content
|
||||
then mempty
|
||||
else B.para $ B.trimInlines content
|
315
test/Tests/Readers/DokuWiki.hs
Normal file
315
test/Tests/Readers/DokuWiki.hs
Normal file
|
@ -0,0 +1,315 @@
|
|||
{-# LANGUAGE NoImplicitPrelude #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
module Tests.Readers.DokuWiki (tests) where
|
||||
|
||||
import Prelude
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Test.Tasty
|
||||
import Tests.Helpers
|
||||
import Text.Pandoc
|
||||
import Text.Pandoc.Arbitrary ()
|
||||
import Text.Pandoc.Builder
|
||||
import Text.Pandoc.Shared (underlineSpan)
|
||||
|
||||
dokuwiki :: Text -> Pandoc
|
||||
dokuwiki = purely $ readDokuWiki def{ readerStandalone = True }
|
||||
|
||||
infix 4 =:
|
||||
(=:) :: ToString c
|
||||
=> String -> (Text, c) -> TestTree
|
||||
(=:) = test dokuwiki
|
||||
|
||||
tests :: [TestTree]
|
||||
tests = [ testGroup "inlines"
|
||||
[ "Bold" =:
|
||||
"**bold**" =?>
|
||||
para (strong "bold")
|
||||
, "Italic" =:
|
||||
"//italic//" =?>
|
||||
para (emph "italic")
|
||||
, "Underlined" =:
|
||||
"__underlined__" =?>
|
||||
para (underlineSpan "underlined")
|
||||
, "Monospaced" =:
|
||||
"''monospaced''" =?>
|
||||
para (code "monospaced")
|
||||
, "Combined" =:
|
||||
"**__//''combine''//__**" =?>
|
||||
para (strong $ underlineSpan $ emph $ code "combine")
|
||||
, "Nowiki" =:
|
||||
T.unlines [ "<nowiki>"
|
||||
, "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
|
||||
, "</nowiki>"
|
||||
] =?>
|
||||
para "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
|
||||
, "Percent" =:
|
||||
"The same is true for %%//__this__ text// with a smiley ;-)%%." =?>
|
||||
para "The same is true for //__this__ text// with a smiley ;-)."
|
||||
, "Subscript" =:
|
||||
"<sub>subscript</sub>" =?>
|
||||
para (subscript "subscript")
|
||||
, "Superscript" =:
|
||||
"<sup>superscript</sup>" =?>
|
||||
para (superscript "superscript")
|
||||
, "Deleted" =:
|
||||
"<del>deleted</del>" =?>
|
||||
para (strikeout "deleted")
|
||||
, "Inline code" =:
|
||||
"foo <code java>public static void main</code> bar" =?>
|
||||
para (text "foo " <> codeWith ("", ["java"], []) "public static void main" <> text " bar")
|
||||
, "Inline file" =:
|
||||
"foo <file></code></file> bar" =?>
|
||||
para (text "foo " <> code "</code>" <> text " bar")
|
||||
, "Inline HTML" =:
|
||||
"<html>\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n</html>" =?>
|
||||
para (rawInline "html" "\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n")
|
||||
, "Inline PHP" =:
|
||||
"<php>echo '<p>Hello World</p>';</php>" =?>
|
||||
para (codeWith ("", ["php"], []) "echo '<p>Hello World</p>';")
|
||||
, "Linebreak" =:
|
||||
T.unlines [ "This is some text with some linebreaks\\\\ Note that the"
|
||||
, "two backslashes are only recognized at the end of a line\\\\"
|
||||
, "or followed by\\\\ a whitespace \\\\this happens without it."
|
||||
] =?>
|
||||
para ("This is some text with some linebreaks" <> linebreak <> "Note that the\n" <>
|
||||
"two backslashes are only recognized at the end of a line" <> linebreak <>
|
||||
"or followed by" <> linebreak <> "a whitespace \\\\this happens without it.")
|
||||
, testGroup "External links"
|
||||
[ "Autolink" =:
|
||||
"http://www.google.com" =?>
|
||||
para (link "http://www.google.com" "" (str "http://www.google.com"))
|
||||
, "Link without description" =:
|
||||
"[[https://example.com]]" =?>
|
||||
para (link "https://example.com" "" (str "https://example.com"))
|
||||
, "Link with description" =:
|
||||
"[[http://www.google.com|This Link points to google]]" =?>
|
||||
para (link "http://www.google.com" "" (text "This Link points to google"))
|
||||
, "Trim whitespace around link and description" =:
|
||||
"[[ http://www.google.com | This Link points to google ]]" =?>
|
||||
para (link "http://www.google.com" "" (text "This Link points to google"))
|
||||
, "Email address" =:
|
||||
"<andi@splitbrain.org>" =?>
|
||||
para (link "mailto:andi@splitbrain.org" "" (str "andi@splitbrain.org"))
|
||||
]
|
||||
, testGroup "Internal links"
|
||||
[ "Current namespace" =:
|
||||
"[[example]]" =?>
|
||||
para (link "example" "" (str "example"))
|
||||
, "Current namespace starting with dot" =:
|
||||
"[[.example]]" =?>
|
||||
para (link "example" "" (str ".example"))
|
||||
, "Current namespace starting with dot and colon" =:
|
||||
"[[.:example]]" =?>
|
||||
para (link "example" "" (str "example"))
|
||||
, "Root namespace" =:
|
||||
"[[:example]]" =?>
|
||||
para (link "/example" "" (str "example"))
|
||||
, "Parent namespace" =:
|
||||
"[[..example]]" =?>
|
||||
para (link "../example" "" (str "..example"))
|
||||
, "Parent namespace with colon" =:
|
||||
"[[..:example]]" =?>
|
||||
para (link "../example" "" (str "example"))
|
||||
, "Beneath the root namespace" =:
|
||||
"[[wiki:example]]" =?>
|
||||
para (link "/wiki/example" "" (str "example"))
|
||||
, "Explicitly beneath the root namespace" =:
|
||||
"[[:wiki:example]]" =?>
|
||||
para (link "/wiki/example" "" (str "example"))
|
||||
]
|
||||
, testGroup "Interwiki links"
|
||||
[ "Interwiki without description" =:
|
||||
"[[doku>DokuWiki]]" =?>
|
||||
para (link "https://www.dokuwiki.org/DokuWiki" "" (str "DokuWiki"))
|
||||
, "Interwiki link with description" =:
|
||||
"[[doku>toolbar|quickbuttons]]" =?>
|
||||
para (link "https://www.dokuwiki.org/toolbar" "" (str "quickbuttons"))
|
||||
]
|
||||
, "Footnote" =:
|
||||
"((This is a footnote))" =?>
|
||||
para (note (para "This is a footnote"))
|
||||
, testGroup "Images"
|
||||
[ "Image" =:
|
||||
"{{image.jpg}}" =?>
|
||||
para (image "image.jpg" "" (str "image.jpg"))
|
||||
, "Image with caption" =:
|
||||
"{{image.png|This is the caption}}" =?>
|
||||
para (image "image.png" "" "This is the caption")
|
||||
, "Image with } in caption" =:
|
||||
"{{image.png|There is an } in the caption}}" =?>
|
||||
para (image "image.png" "" "There is an } in the caption")
|
||||
, "Wiki namespace starting with dot" =:
|
||||
"{{.wiki:image.jpg}}" =?>
|
||||
para (image "wiki/image.jpg" "" (str "image.jpg"))
|
||||
, "Left aligned image" =:
|
||||
"{{wiki:dokuwiki-128.png }}" =?>
|
||||
para (imageWith ("", ["align-left"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
|
||||
, "Right aligned image" =:
|
||||
"{{ wiki:dokuwiki-128.png}}" =?>
|
||||
para (imageWith ("", ["align-right"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
|
||||
, "Centered image" =:
|
||||
"{{ wiki:dokuwiki-128.png }}" =?>
|
||||
para (imageWith ("", ["align-center"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
|
||||
, "Image with width" =:
|
||||
"{{wiki:dokuwiki-128.png?50}}" =?>
|
||||
para (imageWith ("", [], [("width", "50")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
|
||||
, "Image with width and height" =:
|
||||
"{{wiki:dokuwiki-128.png?nocache&50x100}}" =?>
|
||||
para (imageWith ("", [], [("width", "50"), ("height", "100")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
|
||||
, "Linkonly" =:
|
||||
"{{wiki:dokuwiki-128.png?linkonly}}" =?>
|
||||
para (link "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
|
||||
]
|
||||
, "Ignore ~~NOTOC~~" =:
|
||||
"Here is a ~~NOTOC~~ macro" =?>
|
||||
para "Here is a macro"
|
||||
, "Ignore ~~NOCACHE~~" =:
|
||||
"Here is a ~~NOCACHE~~ macro" =?>
|
||||
para "Here is a macro"
|
||||
]
|
||||
, testGroup "Sectioning"
|
||||
[ "Headline level 1" =:
|
||||
"====== Headline Level 1 ======" =?>
|
||||
header 1 "Headline Level 1"
|
||||
, "Headline level 2" =:
|
||||
"===== Headline Level 2 =====" =?>
|
||||
header 2 "Headline Level 2"
|
||||
, "Headline level 3" =:
|
||||
"==== Headline Level 3 ====" =?>
|
||||
header 3 "Headline Level 3"
|
||||
, "Headline level 4" =:
|
||||
"=== Headline Level 4 ===" =?>
|
||||
header 4 "Headline Level 4"
|
||||
, "Headline level 5" =:
|
||||
"== Headline Level 5 ==" =?>
|
||||
header 5 "Headline Level 5"
|
||||
, "Only two closing = are required" =:
|
||||
"====== Headline Level 1 ==" =?>
|
||||
header 1 "Headline Level 1"
|
||||
, "One closing = is not enough" =:
|
||||
"====== Headline Level 1 =" =?>
|
||||
para "====== Headline Level 1 ="
|
||||
, "One closing = is not enough" =:
|
||||
"== Headline with = sign ==" =?>
|
||||
header 5 "Headline with = sign"
|
||||
]
|
||||
, "Horizontal line" =:
|
||||
"----" =?>
|
||||
horizontalRule
|
||||
, testGroup "Lists"
|
||||
[ "Unordered list" =:
|
||||
T.unlines [ " * This is a list"
|
||||
, " * The second item"
|
||||
, " * You may have different levels"
|
||||
, " * Another item"
|
||||
] =?>
|
||||
bulletList [ plain "This is a list"
|
||||
, plain "The second item" <>
|
||||
bulletList [ plain "You may have different levels" ]
|
||||
, plain "Another item"
|
||||
]
|
||||
, "Ordered list" =:
|
||||
T.unlines [ " - The same list but ordered"
|
||||
, " - Another item"
|
||||
, " - Just use indention for deeper levels"
|
||||
, " - That's it"
|
||||
] =?>
|
||||
orderedList [ plain "The same list but ordered"
|
||||
, plain "Another item" <>
|
||||
orderedList [ plain "Just use indention for deeper levels" ]
|
||||
, plain "That's it"
|
||||
]
|
||||
, "Multiline list items" =: -- https://www.dokuwiki.org/faq:lists
|
||||
T.unlines [ " - first item"
|
||||
, " - second item with linebreak\\\\ second line"
|
||||
, " - third item with code: <code>"
|
||||
, "some code"
|
||||
, "comes here"
|
||||
, "</code>"
|
||||
, " - fourth item"
|
||||
] =?>
|
||||
orderedList [ plain "first item"
|
||||
, plain ("second item with linebreak" <> linebreak <> " second line")
|
||||
, plain ("third item with code: " <> code "some code\ncomes here\n")
|
||||
, plain "fourth item"
|
||||
]
|
||||
]
|
||||
, "Block HTML" =:
|
||||
T.unlines [ "<HTML>"
|
||||
, "<p style=\"border:2px dashed red;\">And this is some block HTML</p>"
|
||||
, "</HTML>"
|
||||
] =?>
|
||||
rawBlock "html" "<p style=\"border:2px dashed red;\">And this is some block HTML</p>\n"
|
||||
, "Block PHP" =:
|
||||
T.unlines [ "<PHP>"
|
||||
, "echo '<p>Hello World</p>';"
|
||||
, "</PHP>"
|
||||
] =?>
|
||||
codeBlockWith ("", ["php"], []) "echo '<p>Hello World</p>';\n"
|
||||
, "Quote" =:
|
||||
T.unlines [ "> foo"
|
||||
, ">no space is required after >"
|
||||
, "> bar"
|
||||
, ">> baz"
|
||||
, "> bat"
|
||||
] =?>
|
||||
blockQuote (plain "foo" <>
|
||||
plain "no space is required after >" <>
|
||||
plain "bar" <>
|
||||
blockQuote (plain "baz") <>
|
||||
plain "bat")
|
||||
, "Code block" =:
|
||||
T.unlines [ "<code>"
|
||||
, "foo bar baz"
|
||||
, "</code>"
|
||||
] =?>
|
||||
codeBlock "foo bar baz\n"
|
||||
, "Java code block" =:
|
||||
T.unlines [ "<code java>"
|
||||
, "public static void main"
|
||||
, "</code>"
|
||||
] =?>
|
||||
codeBlockWith ("", ["java"], []) "public static void main\n"
|
||||
, "File with filename and no language" =:
|
||||
T.unlines [ "<file - foo.bar>"
|
||||
, "file contents"
|
||||
, "</file>"
|
||||
] =?>
|
||||
codeBlock "file contents\n"
|
||||
, "Table" =:
|
||||
T.unlines [ "| foo | bar |"
|
||||
, "| bat | baz |"
|
||||
] =?>
|
||||
table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
|
||||
[]
|
||||
[[plain "foo", plain "bar"]
|
||||
,[plain "bat", plain "baz"]]
|
||||
, "Table with header" =:
|
||||
T.unlines [ "^ foo ^ bar ^"
|
||||
, "| bat | baz |"
|
||||
] =?>
|
||||
table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
|
||||
[plain "foo", plain "bar"]
|
||||
[[plain "bat", plain "baz"]]
|
||||
, "Table with colspan" =:
|
||||
T.unlines [ "^ 0,0 ^ 0,1 ^ 0,2 ^"
|
||||
, "| 1,0 | 1,1 ||"
|
||||
, "| 2,0 | 2,1 | 2,2 |"
|
||||
] =?>
|
||||
table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0), (AlignDefault, 0.0)]
|
||||
[plain "0,0", plain "0,1", plain "0,2"]
|
||||
[[plain "1,0", plain "1,1", mempty]
|
||||
,[plain "2,0", plain "2,1", plain "2,2"]
|
||||
]
|
||||
, "Indented code block" =:
|
||||
T.unlines [ "foo"
|
||||
, " bar"
|
||||
, " bat"
|
||||
, "baz"
|
||||
] =?>
|
||||
para "foo" <>
|
||||
codeBlock "bar\n bat\n" <>
|
||||
para "baz"
|
||||
]
|
|
@ -11,6 +11,7 @@ import qualified Tests.Lua
|
|||
import qualified Tests.Old
|
||||
import qualified Tests.Readers.Creole
|
||||
import qualified Tests.Readers.Docx
|
||||
import qualified Tests.Readers.DokuWiki
|
||||
import qualified Tests.Readers.EPUB
|
||||
import qualified Tests.Readers.FB2
|
||||
import qualified Tests.Readers.HTML
|
||||
|
@ -80,6 +81,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
|
|||
, testGroup "Creole" Tests.Readers.Creole.tests
|
||||
, testGroup "Man" Tests.Readers.Man.tests
|
||||
, testGroup "FB2" Tests.Readers.FB2.tests
|
||||
, testGroup "DokuWiki" Tests.Readers.DokuWiki.tests
|
||||
]
|
||||
, testGroup "Lua filters" Tests.Lua.tests
|
||||
]
|
||||
|
|
Loading…
Reference in a new issue