From c3780992ab8e13944b69e5eab0e64884363ef1ad Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> Date: Mon, 7 Jul 2014 20:21:38 +0200 Subject: [PATCH 1/3] TWiki Reader: add new new twiki reader --- pandoc.cabal | 1 + src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Readers/TWiki.hs | 526 +++++++++++++++++++++++++++++++ 3 files changed, 530 insertions(+) create mode 100644 src/Text/Pandoc/Readers/TWiki.hs diff --git a/pandoc.cabal b/pandoc.cabal index b9b996c1f..e214e9edd 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -289,6 +289,7 @@ Library Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, Text.Pandoc.Readers.Haddock, + Text.Pandoc.Readers.TWiki, Text.Pandoc.Readers.Docx, Text.Pandoc.Readers.EPUB, Text.Pandoc.Writers.Native, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index fd849316b..1f22122ac 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -77,6 +77,7 @@ module Text.Pandoc , readHaddock , readNative , readJSON + , readTWiki , readTxt2Tags , readTxt2TagsNoMacros , readEPUB @@ -133,6 +134,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.TWiki import Text.Pandoc.Readers.Docx import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Readers.EPUB @@ -233,6 +235,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("html" , mkStringReader readHtml) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) + ,("twiki" , mkStringReader readTWiki) ,("docx" , mkBSReader readDocx) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs new file mode 100644 index 000000000..c2325c0ea --- /dev/null +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -0,0 +1,526 @@ +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} +-- RelaxedPolyRec needed for inlinesBetween on GHC < 7 +{- + Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.TWiki + Copyright : Copyright (C) 2014 Alexander Sulfrian + License : GNU GPL, version 2 or above + + Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + Stability : alpha + Portability : portable + +Conversion of twiki text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.TWiki ( readTWiki + , readTWikiWithWarnings + ) where + +import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) +import Data.Monoid (Monoid, mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad +import Text.Printf (printf) +import Debug.Trace (trace) +import Text.Pandoc.XML (fromEntities) +import Data.Maybe (fromMaybe) +import Text.HTML.TagSoup +import Data.Char (isAlphaNum) +import qualified Data.Foldable as F + +-- | Read twiki from an input string and return a Pandoc document. +readTWiki :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readTWiki opts s = + (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") + +readTWikiWithWarnings :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> (Pandoc, [String]) +readTWikiWithWarnings opts s = + (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") + where parseTWikiWithWarnings = do + doc <- parseTWiki + warnings <- stateWarnings <$> getState + return (doc, warnings) + +type TWParser = Parser [Char] ParserState + +-- +-- utility functions +-- + +tryMsg :: String -> TWParser a -> TWParser a +tryMsg msg p = try p <?> msg + +skip :: TWParser a -> TWParser () +skip parser = parser >> return () + +nested :: TWParser a -> TWParser a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +htmlElement :: String -> TWParser (Attr, String) +htmlElement tag = tryMsg tag $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar (endtag <|> endofinput) + return (htmlAttrToPandoc attr, trim content) + where + endtag = skip $ htmlTag (~== TagClose tag) + endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof + trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a]) +parseHtmlContentWithAttrs tag parser = do + (attr, content) <- htmlElement tag + parsedContent <- try $ parseContent content + return (attr, parsedContent) + where + parseContent = parseFromString $ nested $ manyTill parser endOfContent + endOfContent = try $ skipMany blankline >> skipSpaces >> eof + +parseHtmlContent :: String -> TWParser a -> TWParser [a] +parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd + +-- +-- main parser +-- + +parseTWiki :: TWParser Pandoc +parseTWiki = do + bs <- mconcat <$> many block + spaces + eof + return $ B.doc bs + + +-- +-- block parsers +-- + +block :: TWParser B.Blocks +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res + +blockElements :: TWParser B.Blocks +blockElements = choice [ separator + , header + , verbatim + , literal + , list "" + , table + , blockQuote + , noautolink + ] + +separator :: TWParser B.Blocks +separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule + +header :: TWParser B.Blocks +header = tryMsg "header" $ do + string "---" + level <- many1 (char '+') >>= return . length + guard $ level <= 6 + classes <- option [] $ string "!!" >> return ["unnumbered"] + skipSpaces + content <- B.trimInlines . mconcat <$> manyTill inline newline + attr <- registerHeader ("", classes, []) content + return $ B.headerWith attr level $ content + +verbatim :: TWParser B.Blocks +verbatim = (htmlElement "verbatim" <|> htmlElement "pre") + >>= return . (uncurry B.codeBlockWith) + +literal :: TWParser B.Blocks +literal = htmlElement "literal" >>= return . rawBlock + where + format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) content + +list :: String -> TWParser B.Blocks +list prefix = choice [ bulletList prefix + , orderedList prefix + , definitionList prefix] + +definitionList :: String -> TWParser B.Blocks +definitionList prefix = tryMsg "definitionList" $ do + indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " + elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + return $ B.definitionList elements + where + parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks]) + parseDefinitionListItem indent = do + string (indent ++ "$ ") >> skipSpaces + term <- many1Till inline $ string ": " + line <- listItemLine indent $ string "$ " + return $ (mconcat term, [line]) + +bulletList :: String -> TWParser B.Blocks +bulletList prefix = tryMsg "bulletList" $ + parseList prefix (char '*') (char ' ') + +orderedList :: String -> TWParser B.Blocks +orderedList prefix = tryMsg "orderedList" $ + parseList prefix (oneOf "1iIaA") (string ". ") + +parseList :: Show a => String -> TWParser Char -> TWParser a -> TWParser B.Blocks +parseList prefix marker delim = do + (indent, style) <- lookAhead $ string prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + return $ case style of + '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks + 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks + 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks + 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks + 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks + _ -> B.bulletList blocks + where + listStyle = do + indent <- many1 $ string " " + style <- marker + return (concat indent, style) + +parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks +parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + +listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks +listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat + where + lineContent = do + content <- anyLine + continuation <- optionMaybe listContinuation + return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation) + filterSpaces = reverse . dropWhile (== ' ') . reverse + listContinuation = notFollowedBy (string prefix >> marker) >> + string " " >> lineContent + parseContent = parseFromString $ many1 $ nestedList <|> parseInline + parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= + return . B.plain . mconcat + nestedList = list prefix + lastNewline = try $ char '\n' <* eof + newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList + +table :: TWParser B.Blocks +table = try $ do + tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip + rows <- many1 tableParseRow + return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead + where + buildTable caption rows (aligns, heads) + = B.table caption aligns heads rows + align rows = replicate (columCount rows) (AlignDefault, 0) + columns rows = replicate (columCount rows) mempty + columCount rows = length $ head rows + +tableParseHeader :: TWParser ((Alignment, Double), B.Blocks) +tableParseHeader = try $ do + char '|' + leftSpaces <- many spaceChar >>= return . length + char '*' + content <- tableColumnContent (char '*' >> skipSpaces >> char '|') + char '*' + rightSpaces <- many spaceChar >>= return . length + optional tableEndOfRow + return (tableAlign leftSpaces rightSpaces, content) + where + tableAlign left right + | left >= 2 && left == right = (AlignCenter, 0) + | left > right = (AlignRight, 0) + | otherwise = (AlignLeft, 0) + +tableParseRow :: TWParser [B.Blocks] +tableParseRow = many1Till tableParseColumn newline + +tableParseColumn :: TWParser B.Blocks +tableParseColumn = char '|' *> skipSpaces *> + tableColumnContent (skipSpaces >> char '|') + <* skipSpaces <* optional tableEndOfRow + +tableEndOfRow :: TWParser Char +tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' + +tableColumnContent :: Show a => TWParser a -> TWParser B.Blocks +tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat + where + content = continuation <|> inline + continuation = try $ char '\\' >> newline >> return mempty + +blockQuote :: TWParser B.Blocks +blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat + +noautolink :: TWParser B.Blocks +noautolink = do + (_, content) <- htmlElement "noautolink" + st <- getState + setState $ st{ stateAllowLinks = False } + blocks <- try $ parseContent content + setState $ st{ stateAllowLinks = True } + return $ mconcat blocks + where + parseContent = parseFromString $ many $ block + +para :: TWParser B.Blocks +para = many1Till inline endOfParaElement >>= return . result . mconcat + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> skip blockElements + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content + + +-- +-- inline parsers +-- + +inline :: TWParser B.Inlines +inline = choice [ whitespace + , br + , macro + , strong + , strongHtml + , strongAndEmph + , emph + , emphHtml + , boldCode + , smart + , link + , htmlComment + , code + , codeHtml + , nop + , autoLink + , str + , symbol + ] <?> "inline" + +whitespace :: TWParser B.Inlines +whitespace = (lb <|> regsp) >>= return + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +br :: TWParser B.Inlines +br = try $ string "%BR%" >> return B.linebreak + +linebreak :: TWParser B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = eof >> return mempty + innerNewline = return B.space + +between :: (Show b, Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c +between start end p = + mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) + +enclosed :: (Show a, Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b +enclosed sep p = between sep (try $ sep <* endMarker) p + where + endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof + endSpace = (spaceChar <|> newline) >> return B.space + +macro :: TWParser B.Inlines +macro = macroWithParameters <|> withoutParameters + where + withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan + emptySpan name = buildSpan name [] mempty + +macroWithParameters :: TWParser B.Inlines +macroWithParameters = try $ do + char '%' + name <- macroName + (content, kvs) <- attributes + char '%' + return $ buildSpan name kvs $ B.str content + +buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan className kvs = B.spanWith attrs + where + attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) + additionalClasses = maybe [] words $ lookup "class" kvs + kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] + +macroName :: TWParser String +macroName = do + first <- letter + rest <- many $ alphaNum <|> char '_' + return (first:rest) + +attributes :: TWParser (String, [(String, String)]) +attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= + return . foldr (either mkContent mkKvs) ([], []) + where + spnl = skipMany (spaceChar <|> newline) + mkContent c ([], kvs) = (c, kvs) + mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkKvs kv (cont, rest) = (cont, (kv : rest)) + +attribute :: TWParser (Either String (String, String)) +attribute = withKey <|> withoutKey + where + withKey = try $ do + key <- macroName + char '=' + parseValue False >>= return . (curry Right key) + withoutKey = try $ parseValue True >>= return . Left + parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities + withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withoutQuotes allowSpaces + | allowSpaces == True = many1 $ noneOf "}" + | otherwise = many1 $ noneOf " }" + +nestedInlines :: Show a => TWParser a -> TWParser B.Inlines +nestedInlines end = innerSpace <|> nestedInline + where + innerSpace = try $ whitespace <* (notFollowedBy end) + nestedInline = notFollowedBy whitespace >> nested inline + +strong :: TWParser B.Inlines +strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong + +strongHtml :: TWParser B.Inlines +strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) + >>= return . B.strong . mconcat + +strongAndEmph :: TWParser B.Inlines +strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong + +emph :: TWParser B.Inlines +emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph + +emphHtml :: TWParser B.Inlines +emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) + >>= return . B.emph . mconcat + +nestedString :: Show a => TWParser a -> TWParser String +nestedString end = innerSpace <|> (count 1 nonspaceChar) + where + innerSpace = try $ many1 spaceChar <* notFollowedBy end + +boldCode :: TWParser B.Inlines +boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities + +htmlComment :: TWParser B.Inlines +htmlComment = htmlTag isCommentTag >> return mempty + +code :: TWParser B.Inlines +code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities + +codeHtml :: TWParser B.Inlines +codeHtml = do + (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + return $ B.codeWith attrs $ fromEntities content + +autoLink :: TWParser B.Inlines +autoLink = try $ do + state <- getState + guard $ stateAllowLinks state + (text, url) <- parseLink + guard $ checkLink (head $ reverse url) + return $ makeLink (text, url) + where + parseLink = notFollowedBy nop >> (uri <|> emailAddress) + makeLink (text, url) = B.link url "" $ B.str text + checkLink c + | c == '/' = True + | otherwise = isAlphaNum c + +str :: TWParser B.Inlines +str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str + +nop :: TWParser B.Inlines +nop = try $ (skip exclamation <|> skip nopTag) >> followContent + where + exclamation = char '!' + nopTag = stringAnyCase "<nop>" + followContent = many1 nonspaceChar >>= return . B.str . fromEntities + +symbol :: TWParser B.Inlines +symbol = count 1 nonspaceChar >>= return . B.str + +smart :: TWParser B.Inlines +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice [ apostrophe + , dash + , ellipses + ] + +singleQuoted :: TWParser B.Inlines +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + many1Till inline singleQuoteEnd >>= + (return . B.singleQuoted . B.trimInlines . mconcat) + +doubleQuoted :: TWParser B.Inlines +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> + return (B.doubleQuoted $ B.trimInlines contents)) + <|> (return $ (B.str "\8220") B.<> contents) + +link :: TWParser B.Inlines +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, content) <- linkText + setState $ st{ stateAllowLinks = True } + return $ B.link url title content + +linkText :: TWParser (String, String, B.Inlines) +linkText = do + string "[[" + url <- many1Till anyChar (char ']') + content <- option [B.str url] linkContent + char ']' + return (url, "", mconcat content) + where + linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent + parseLinkContent = parseFromString $ many1 inline From 79f25fb9ce49659427c2fe690115e56c255e29e5 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> Date: Thu, 30 Oct 2014 15:31:38 +0100 Subject: [PATCH 2/3] TWiki Reader: add basic syntax test --- pandoc.cabal | 1 + tests/Tests/Old.hs | 3 + tests/twiki-reader.native | 174 ++++++++++++++++++++++++++++++ tests/twiki-reader.twiki | 221 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 399 insertions(+) create mode 100644 tests/twiki-reader.native create mode 100644 tests/twiki-reader.twiki diff --git a/pandoc.cabal b/pandoc.cabal index e214e9edd..b9dc30488 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -182,6 +182,7 @@ Extra-Source-Files: tests/epub/*.epub tests/epub/*.native tests/txt2tags.t2t + tests/twiki-reader.twiki Source-repository head type: git diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 8a256b761..0f7b33dd1 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -153,6 +153,9 @@ tests = [ testGroup "markdown" , test "formatting" ["-r", "epub", "-w", "native"] "epub/formatting.epub" "epub/formatting.native" ] + , testGroup "twiki" + [ test "reader" ["-r", "twiki", "-w", "native", "-s"] + "twiki-reader.twiki" "twiki-reader.native" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) [ "opendocument" , "context" , "texinfo", "icml" , "man" , "plain" , "rtf", "org", "asciidoc" diff --git a/tests/twiki-reader.native b/tests/twiki-reader.native new file mode 100644 index 000000000..bde55a378 --- /dev/null +++ b/tests/twiki-reader.native @@ -0,0 +1,174 @@ +Pandoc (Meta {unMeta = fromList []}) +[Header 1 ("header",[],[]) [Str "header"] +,Header 2 ("header-level-two",[],[]) [Str "header",Space,Str "level",Space,Str "two"] +,Header 3 ("header-level-3",[],[]) [Str "header",Space,Str "level",Space,Str "3"] +,Header 4 ("header-level-four",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"] +,Header 5 ("header-level-5",[],[]) [Str "header",Space,Str "level",Space,Str "5"] +,Header 6 ("header-level-6",[],[]) [Str "header",Space,Str "level",Space,Str "6"] +,Para [Str "---+++++++",Space,Str "not",Space,Str "a",Space,Str "header"] +,Para [Str "--++",Space,Str "not",Space,Str "a",Space,Str "header"] +,Header 1 ("emph-and-strong",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"] +,Para [Emph [Str "emph"],Space,Strong [Str "strong"]] +,Para [Emph [Strong [Str "strong",Space,Str "and",Space,Str "emph"]]] +,Para [Strong [Emph [Str "emph",Space,Str "inside"],Space,Str "strong"]] +,Para [Strong [Str "strong",Space,Str "with",Space,Emph [Str "emph"]]] +,Para [Emph [Strong [Str "strong",Space,Str "inside"],Space,Str "emph"]] +,Header 1 ("horizontal-rule",[],[]) [Str "horizontal",Space,Str "rule"] +,Para [Str "top"] +,HorizontalRule +,Para [Str "bottom"] +,HorizontalRule +,Header 1 ("nop",[],[]) [Str "nop"] +,Para [Str "_not",Space,Str "emph_"] +,Header 1 ("entities",[],[]) [Str "entities"] +,Para [Str "hi",Space,Str "&",Space,Str "low"] +,Para [Str "hi",Space,Str "&",Space,Str "low"] +,Para [Str "G\246del"] +,Para [Str "\777\2730"] +,Header 1 ("comments",[],[]) [Str "comments"] +,Para [Str "inline",Space,Str "comment"] +,Para [Str "between",Space,Str "blocks"] +,Header 1 ("linebreaks",[],[]) [Str "linebreaks"] +,Para [Str "hi",LineBreak,Str "there"] +,Para [Str "hi",LineBreak,Space,Str "there"] +,Header 1 ("inline-code",[],[]) [Str "inline",Space,Str "code"] +,Para [Code ("",[],[]) "*\8594*",Space,Code ("",[],[]) "typed",Space,Code ("",["haskell"],[]) ">>="] +,Header 1 ("code-blocks",[],[]) [Str "code",Space,Str "blocks"] +,CodeBlock ("",[],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']" +,CodeBlock ("",["haskell"],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']" +,Header 1 ("block-quotes",[],[]) [Str "block",Space,Str "quotes"] +,Para [Str "Regular",Space,Str "paragraph"] +,BlockQuote + [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."] + ,Para [Str "With",Space,Str "two",Space,Str "paragraphs."]] +,Para [Str "Nother",Space,Str "paragraph."] +,Header 1 ("external-links",[],[]) [Str "external",Space,Str "links"] +,Para [Link [Emph [Str "Google"],Space,Str "search",Space,Str "engine"] ("http://google.com","")] +,Para [Link [Str "http://johnmacfarlane.net/pandoc/"] ("http://johnmacfarlane.net/pandoc/","")] +,Para [Link [Str "http://google.com"] ("http://google.com",""),Space,Link [Str "http://yahoo.com"] ("http://yahoo.com","")] +,Para [Link [Str "email",Space,Str "me"] ("mailto:info@example.org","")] +,Para [Str "http://google.com"] +,Para [Str "http://google.com"] +,Para [Str "http://google.com"] +,Para [Str "info@example.org"] +,Para [Str "info@example.org"] +,Para [Str "info@example.org"] +,Header 1 ("lists",[],[]) [Str "lists"] +,BulletList + [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] + ,[Plain [Str "with",Space,Str "an",Space,Str "asterisk",Space,Str "(*)."] + ,BulletList + [[Plain [Str "More",Space,Str "asterisks",Space,Str "gives",Space,Str "deeper"] + ,BulletList + [[Plain [Str "and",Space,Str "deeper",Space,Str "levels."]]]]]] + ,[Plain [Str "Line",Space,Str "breaks",LineBreak,Str "don't",Space,Str "break",Space,Str "levels."]] + ,[Plain [Str "Continuations",Space,Str "are",Space,Str "also",Space,Str "possible"] + ,BulletList + [[Plain [Str "and",Space,Str "do",Space,Str "not",Space,Str "break",Space,Str "the",Space,Str "list",Space,Str "flow"]]]] + ,[Plain [Str "Level",Space,Str "one"]]] +,Para [Str "Any",Space,Str "other",Space,Str "start",Space,Str "ends",Space,Str "the",Space,Str "list."] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] + ,[Plain [Str "with",Space,Str "a",Space,Str "number",Space,Str "(1.)."] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "More",Space,Str "number",Space,Str "signs",Space,Str "gives",Space,Str "deeper"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "and",Space,Str "deeper"]] + ,[Plain [Str "levels."]]]]]] + ,[Plain [Str "Line",Space,Str "breaks",LineBreak,Str "don't",Space,Str "break",Space,Str "levels."]] + ,[Plain [Str "Blank",Space,Str "lines"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "end",Space,Str "the",Space,Str "list",Space,Str "and",Space,Str "start",Space,Str "another."]]] +,Para [Str "Any",Space,Str "other",Space,Str "start",Space,Str "also",Space,Str "ends",Space,Str "the",Space,Str "list."] +,DefinitionList + [([Str "item",Space,Str "1"], + [[Plain [Str "definition",Space,Str "1"]]]) + ,([Str "item",Space,Str "2"], + [[Plain [Str "definition",Space,Str "2-1",Space,Str "definition",Space,Str "2-2"]]]) + ,([Str "item",Space,Emph [Str "3"]], + [[Plain [Str "definition",Space,Emph [Str "3"]]]])] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "one"]] + ,[Plain [Str "two"] + ,BulletList + [[Plain [Str "two",Space,Str "point",Space,Str "one"]] + ,[Plain [Str "two",Space,Str "point",Space,Str "two"]]]] + ,[Plain [Str "three"] + ,DefinitionList + [([Str "three",Space,Str "item",Space,Str "one"], + [[Plain [Str "three",Space,Str "def",Space,Str "one"]]])]] + ,[Plain [Str "four"] + ,DefinitionList + [([Str "four",Space,Str "def",Space,Str "one"], + [[Plain [Str "this",Space,Str "is",Space,Str "a",Space,Str "continuation"]]])]] + ,[Plain [Str "five"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "five",Space,Str "sub",Space,Str "1"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "five",Space,Str "sub",Space,Str "1",Space,Str "sub",Space,Str "1"]]]] + ,[Plain [Str "five",Space,Str "sub",Space,Str "2"]]]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "other"] + ,OrderedList (1,UpperRoman,DefaultDelim) + [[Plain [Str "list"]] + ,[Plain [Str "styles"]]]] + ,[Plain [Str "are"] + ,OrderedList (1,LowerRoman,DefaultDelim) + [[Plain [Str "also"]] + ,[Plain [Str "possible"]]]] + ,[Plain [Str "all"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Plain [Str "the"]] + ,[Plain [Str "different"]] + ,[Plain [Str "styles"]]]] + ,[Plain [Str "are"] + ,OrderedList (1,UpperAlpha,DefaultDelim) + [[Plain [Str "implemented"]] + ,[Plain [Str "and"]] + ,[Plain [Str "supported"]]]]] +,Header 1 ("tables",[],[]) [Str "tables"] +,Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [[] + ,[]] + [[[Plain [Str "Orange"]] + ,[Plain [Str "Apple"]]] + ,[[Plain [Str "Bread"]] + ,[Plain [Str "Pie"]]] + ,[[Plain [Str "Butter"]] + ,[Plain [Str "Ice",Space,Str "cream"]]]] +,Table [] [AlignLeft,AlignLeft] [0.0,0.0] + [[Plain [Str "Orange"]] + ,[Plain [Str "Apple"]]] + [[[Plain [Str "Bread"]] + ,[Plain [Str "Pie"]]] + ,[[Plain [Strong [Str "Butter"]]] + ,[Plain [Str "Ice",Space,Str "cream"]]]] +,Table [] [AlignLeft,AlignLeft] [0.0,0.0] + [[Plain [Str "Orange"]] + ,[Plain [Str "Apple"]]] + [[[Plain [Str "Bread",LineBreak,LineBreak,Str "and",Space,Str "cheese"]] + ,[Plain [Str "Pie",LineBreak,LineBreak,Strong [Str "apple"],Space,Str "and",Space,Emph [Str "carrot"]]]]] +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[] + ,[] + ,[]] + [[[Plain [Str "Orange"]] + ,[Plain [Str "Apple"]] + ,[Plain [Str "more"]]] + ,[[Plain [Str "Bread"]] + ,[Plain [Str "Pie"]] + ,[Plain [Str "more"]]] + ,[[Plain [Str "Butter"]] + ,[Plain [Str "Ice",Space,Str "cream"]] + ,[Plain [Str "and",Space,Str "more"]]]] +,Header 1 ("macros",[],[]) [Str "macros"] +,Para [Span ("",["twiki-macro","TEST"],[]) []] +,Para [Span ("",["twiki-macro","TEST"],[]) [Str ""]] +,Para [Span ("",["twiki-macro","TEST"],[]) [Str "content with spaces"]] +,Para [Span ("",["twiki-macro","TEST"],[]) [Str "content with spaces"]] +,Para [Span ("",["twiki-macro","TEST"],[("ARG1","test")]) [Str "content with spaces"]] +,Para [Span ("",["twiki-macro","TEST"],[]) [Str "content with spaces ARG1=test"]] +,Para [Span ("",["twiki-macro","TEST"],[("ARG1","test")]) [Str "content with spaces"]] +,Para [Span ("",["twiki-macro","TEST"],[("ARG1","test"),("ARG2","test2")]) [Str ""]] +,Para [Span ("",["twiki-macro","TEST"],[("ARG1","test"),("ARG2","test2")]) [Str ""]] +,Para [Span ("",["twiki-macro","TEST"],[("ARG1","test"),("ARG2","test2")]) [Str "multiline\ndoes also work"]]] diff --git a/tests/twiki-reader.twiki b/tests/twiki-reader.twiki new file mode 100644 index 000000000..51828ef80 --- /dev/null +++ b/tests/twiki-reader.twiki @@ -0,0 +1,221 @@ +---+ header + +---++ header level two + +---+++ header level 3 + +---++++ header _level_ four + +---+++++ header level 5 + +---++++++ header level 6 + +---+++++++ not a header + + --++ not a header + +---+ emph and strong + +_emph_ *strong* + +__strong and emph__ + +*<i>emph inside</i> strong* + +*strong with <i>emph</i>* + +_<b>strong inside</b> emph_ + +---+ horizontal rule + +top +--- +bottom + +--- + +---+ nop + +<nop>_not emph_ + +---+ entities + +hi & low + +hi & low + +Gödel + +̉પ + +---+ comments + +inline <!-- secret --> comment + +<!-- secret --> + +between blocks + + <!-- secret --> + +---+ linebreaks + +hi%BR%there + +hi%BR% +there + +---+ inline code + +<code>*→*</code> =typed= <code class="haskell">>>=</code> + +---+ code blocks + +<verbatim> +case xs of + (_:_) -> reverse xs + [] -> ['*'] +</verbatim> + +<verbatim class="haskell"> +case xs of + (_:_) -> reverse xs + [] -> ['*'] +</verbatim> + +---+ block quotes + +Regular paragraph +<blockquote> +This is a block quote. + +With two paragraphs. +</blockquote> +Nother paragraph. + +---+ external links + +[[http://google.com][<i>Google</i> search engine]] + +http://johnmacfarlane.net/pandoc/ + +[[http://google.com]] [[http://yahoo.com]] + +[[mailto:info@example.org][email me]] + +!http://google.com + +<nop>http://google.com + +<noautolink> +http://google.com +</noautolink> + +!info@example.org + +<nop>info@example.org + +<noautolink> +info@example.org +</noautolink> + +---+ lists + + * Start each line + * with an asterisk (*). + * More asterisks gives deeper + * and deeper levels. + * Line breaks%BR%don't break levels. + * Continuations + are also possible + * and do not break the list flow + * Level one +Any other start ends the list. + + 1. Start each line + 1. with a number (1.). + 1. More number signs gives deeper + 1. and deeper + 1. levels. + 1. Line breaks%BR%don't break levels. + 1. Blank lines + + 1. end the list and start another. +Any other start also +ends the list. + + $ item 1: definition 1 + $ item 2: definition 2-1 + definition 2-2 + $ item _3_: definition _3_ + + 1. one + 1. two + * two point one + * two point two + 1. three + $ three item one: three def one + 1. four + $ four def one: this + is a continuation + 1. five + 1. five sub 1 + 1. five sub 1 sub 1 + 1. five sub 2 + + 1. other + I. list + I. styles + 1. are + i. also + i. possible + 1. all + a. the + a. different + a. styles + 1. are + A. implemented + A. and + A. supported + +---+ tables + +|Orange|Apple| +|Bread|Pie| +|Butter|Ice cream| + +|*Orange*|*Apple*| +|Bread|Pie| +|*Butter*|Ice cream| + +|*Orange*|*Apple*| +|Bread%BR%%BR%and cheese|Pie%BR%%BR%*apple* and <i>carrot</i>| + +| Orange | Apple | more | +| Bread | Pie | more | +| Butter | Ice cream | and more | + +---+ macros + +%TEST% + +%TEST{}% + +%TEST{content with spaces}% + +%TEST{"content with spaces"}% + +%TEST{"content with spaces" ARG1="test"}% + +%TEST{content with spaces ARG1=test}% + +%TEST{ARG1=test content with spaces}% + +%TEST{ARG1=test ARG2=test2}% + +%TEST{ARG1="test" ARG2="test2"}% + +%TEST{ARG1="test" +ARG2="test2" +multiline +does also work}% From 2cf09aad5590de4acb2e7d75d975f337efd437d6 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> Date: Thu, 30 Oct 2014 20:01:38 +0100 Subject: [PATCH 3/3] TWiki Reader: update documentation --- README | 18 ++++++++++-------- pandoc.cabal | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/README b/README index 556c35bc8..d2eaceb98 100644 --- a/README +++ b/README @@ -18,10 +18,11 @@ Org-mode], [DocBook], [txt2tags], [EPUB] and [Word docx]; and it can write plain [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], -[DokuWiki markup], [Haddock markup], [EPUB] (v2 or v3), [FictionBook2], -[Textile], [groff man] pages, [Emacs Org-Mode], [AsciiDoc], [InDesign ICML], -and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. -It can also produce [PDF] output on systems where LaTeX is installed. +[DokuWiki markup], [TWiki markup], [Haddock markup], [EPUB] (v2 or v3), +[FictionBook2], [Textile], [groff man] pages, [Emacs Org-Mode], [AsciiDoc], +[InDesign ICML], and [Slidy], [Slideous], [DZSlides], [reveal.js] or +[S5] HTML slide shows. It can also produce [PDF] output on systems where +LaTeX is installed. Pandoc's enhanced version of markdown includes syntax for footnotes, tables, flexible ordered lists, definition lists, fenced code blocks, @@ -147,8 +148,8 @@ General options `textile` (Textile), `rst` (reStructuredText), `html` (HTML), `docbook` (DocBook), `t2t` (txt2tags), `docx` (docx), `epub` (EPUB), `opml` (OPML), `org` (Emacs Org-mode), `mediawiki` (MediaWiki markup), - `haddock` (Haddock markup), or `latex` (LaTeX). If `+lhs` is appended - to `markdown`, `rst`, + `twiki` (TWiki markpu), `haddock` (Haddock markup), or `latex` (LaTeX). + If `+lhs` is appended to `markdown`, `rst`, `latex`, or `html`, the input will be treated as literate Haskell source: see [Literate Haskell support](#literate-haskell-support), below. Markdown syntax extensions can be individually enabled or @@ -239,8 +240,8 @@ Reader options to curly quotes, `---` to em-dashes, `--` to en-dashes, and `...` to ellipses. Nonbreaking spaces are inserted after certain abbreviations, such as "Mr." (Note: This option is significant only when - the input format is `markdown`, `markdown_strict`, or `textile`. It - is selected automatically when the input format is `textile` or the + the input format is `markdown`, `markdown_strict`, `textile` or `twiki`. + It is selected automatically when the input format is `textile` or the output format is `latex` or `context`, unless `--no-tex-ligatures` is used.) @@ -3168,6 +3169,7 @@ Rosenthal. [Textile]: http://redcloth.org/textile [MediaWiki markup]: http://www.mediawiki.org/wiki/Help:Formatting [DokuWiki markup]: https://www.dokuwiki.org/dokuwiki +[TWiki markup]: http://twiki.org/cgi-bin/view/TWiki/TextFormattingRules [Haddock markup]: http://www.haskell.org/haddock/doc/html/ch03s08.html [groff man]: http://developer.apple.com/DOCUMENTATION/Darwin/Reference/ManPages/man7/groff_man.7.html [Haskell]: http://www.haskell.org/ diff --git a/pandoc.cabal b/pandoc.cabal index b9dc30488..585b388cd 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -20,8 +20,8 @@ Description: Pandoc is a Haskell library for converting from one markup markup, OPML, Emacs Org-Mode, txt2tags and Textile, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OPML, OpenDocument, ODT, Word docx, RTF, MediaWiki, DokuWiki, - Textile, groff man pages, plain text, Emacs Org-Mode, AsciiDoc, - Haddock markup, EPUB (v2 and v3), FictionBook2, + TWiki, Textile, groff man pages, plain text, Emacs Org-Mode, + AsciiDoc, Haddock markup, EPUB (v2 and v3), FictionBook2, InDesign ICML, and several kinds of HTML/javascript slide shows (S5, Slidy, Slideous, DZSlides, reveal.js). .