pandoc/src/Text/Pandoc/Readers/MediaWiki.hs

678 lines
24 KiB
Haskell
Raw Normal View History

{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{-
Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.MediaWiki
Copyright : Copyright (C) 2012-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of mediawiki text to 'Pandoc' document.
-}
{-
TODO:
_ correctly handle tables within tables
_ parse templates?
-}
module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import Data.Monoid ((<>))
import Text.Pandoc.Options
import Text.Pandoc.Logging
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing hiding ( nested )
import Text.Pandoc.Walk ( walk )
import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
import Control.Monad
import Data.List (intersperse, intercalate, isPrefixOf )
import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))
import qualified Data.Foldable as F
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Char (isDigit, isSpace)
import Data.Maybe (fromMaybe)
2016-11-28 17:13:46 -05:00
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, report)
2015-02-18 13:04:24 +00:00
-- | Read mediawiki from an input string and return a Pandoc document.
2016-11-28 17:13:46 -05:00
readMediaWiki :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
2016-11-28 17:13:46 -05:00
-> m Pandoc
readMediaWiki opts s = do
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = Set.empty
}
(s ++ "\n")
case parsed of
Right result -> return result
Left e -> throwError e
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
, mwIdentifierList :: Set.Set String
}
2016-11-28 17:13:46 -05:00
type MWParser m = ParserT [Char] MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
instance HasHeaderMap MWState where
extractHeaderMap = mwHeaderMap
updateHeaderMap f st = st{ mwHeaderMap = f $ mwHeaderMap st }
instance HasIdentifierList MWState where
extractIdentifierList = mwIdentifierList
updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
--
-- auxiliary functions
--
-- This is used to prevent exponential blowups for things like:
-- ''a'''a''a'''a''a'''a''a'''a
2016-11-28 17:13:46 -05:00
nested :: PandocMonad m => MWParser m a -> MWParser m a
nested p = do
nestlevel <- mwMaxNestingLevel `fmap` getState
guard $ nestlevel > 0
updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 }
res <- p
updateState $ \st -> st{ mwMaxNestingLevel = nestlevel }
return res
specialChars :: [Char]
specialChars = "'[]<=&*{}|\":\\"
spaceChars :: [Char]
spaceChars = " \n\t"
2016-11-28 17:13:46 -05:00
sym :: PandocMonad m => String -> MWParser m ()
sym s = () <$ try (string s)
newBlockTags :: [String]
newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
isBlockTag' :: Tag String -> Bool
isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag = isBlockTag tag
isInlineTag' :: Tag String -> Bool
isInlineTag' (TagComment _) = True
isInlineTag' t = not (isBlockTag' t)
eitherBlockOrInline :: [String]
eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
"map", "area", "object"]
2016-11-28 17:13:46 -05:00
htmlComment :: PandocMonad m => MWParser m ()
htmlComment = () <$ htmlTag isCommentTag
2016-11-28 17:13:46 -05:00
inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
inlinesInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
if '/' `elem` raw -- self-closing tag
then return mempty
else trimInlines . mconcat <$>
manyTill inline (htmlTag (~== TagClose tag))
2016-11-28 17:13:46 -05:00
blocksInTags :: PandocMonad m => String -> MWParser m Blocks
blocksInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
let closer = if tag == "li"
then htmlTag (~== TagClose "li")
<|> lookAhead (
htmlTag (~== TagOpen "li" [])
<|> htmlTag (~== TagClose "ol")
<|> htmlTag (~== TagClose "ul"))
else htmlTag (~== TagClose tag)
if '/' `elem` raw -- self-closing tag
then return mempty
else mconcat <$> manyTill block closer
2016-11-28 17:13:46 -05:00
charsInTags :: PandocMonad m => String -> MWParser m [Char]
charsInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
if '/' `elem` raw -- self-closing tag
then return ""
else manyTill anyChar (htmlTag (~== TagClose tag))
--
-- main parser
--
2016-11-28 17:13:46 -05:00
parseMediaWiki :: PandocMonad m => MWParser m Pandoc
parseMediaWiki = do
bs <- mconcat <$> many block
spaces
eof
categoryLinks <- reverse . mwCategoryLinks <$> getState
let categories = if null categoryLinks
then mempty
else B.para $ mconcat $ intersperse B.space categoryLinks
return $ B.doc $ bs <> categories
--
-- block parsers
--
2016-11-28 17:13:46 -05:00
block :: PandocMonad m => MWParser m Blocks
2014-06-20 11:39:24 -07:00
block = do
pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> table
<|> header
<|> hrule
<|> orderedList
2012-09-13 14:47:11 -07:00
<|> bulletList
<|> definitionList
<|> mempty <$ try (spaces *> htmlComment)
<|> preformatted
<|> blockTag
<|> (B.rawBlock "mediawiki" <$> template)
<|> para
report $ ParsingTrace (take 60 $ show $ B.toList res) pos
2014-06-20 11:39:24 -07:00
return res
2016-11-28 17:13:46 -05:00
para :: PandocMonad m => MWParser m Blocks
para = do
contents <- trimInlines . mconcat <$> many1 inline
if F.all (==Space) contents
then return mempty
else return $ B.para contents
2016-11-28 17:13:46 -05:00
table :: PandocMonad m => MWParser m Blocks
table = do
tableStart
styles <- option [] parseAttrs <* blanklines
let tableWidth = case lookup "width" styles of
Just w -> fromMaybe 1.0 $ parseWidth w
Nothing -> 1.0
caption <- option mempty tableCaption
optional rowsep
hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!'))
(cellspecs',hdr) <- unzip <$> tableRow
let widths = map ((tableWidth *) . snd) cellspecs'
let restwidth = tableWidth - sum widths
let zerocols = length $ filter (==0.0) widths
let defaultwidth = if zerocols == 0 || zerocols == length widths
then 0.0
else restwidth / fromIntegral zerocols
let widths' = map (\w -> if w == 0 then defaultwidth else w) widths
let cellspecs = zip (map fst cellspecs') widths'
rows' <- many $ try $ rowsep *> (map snd <$> tableRow)
optional blanklines
tableEnd
let cols = length hdr
let (headers,rows) = if hasheader
then (hdr, rows')
else (replicate cols mempty, hdr:rows')
return $ B.table caption cellspecs headers rows
2016-11-28 17:13:46 -05:00
parseAttrs :: PandocMonad m => MWParser m [(String,String)]
parseAttrs = many1 parseAttr
2016-11-28 17:13:46 -05:00
parseAttr :: PandocMonad m => MWParser m (String, String)
parseAttr = try $ do
skipMany spaceChar
k <- many1 letter
char '='
v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
<|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
2016-11-28 17:13:46 -05:00
tableStart :: PandocMonad m => MWParser m ()
tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|"
2016-11-28 17:13:46 -05:00
tableEnd :: PandocMonad m => MWParser m ()
tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}"
2016-11-28 17:13:46 -05:00
rowsep :: PandocMonad m => MWParser m ()
rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <*
many (char '-') <* optional parseAttr <* blanklines
2016-11-28 17:13:46 -05:00
cellsep :: PandocMonad m => MWParser m ()
cellsep = try $
(guardColumnOne *> skipSpaces <*
( (char '|' <* notFollowedBy (oneOf "-}+"))
<|> (char '!')
)
)
<|> (() <$ try (string "||"))
<|> (() <$ try (string "!!"))
2016-11-28 17:13:46 -05:00
tableCaption :: PandocMonad m => MWParser m Inlines
tableCaption = try $ do
guardColumnOne
skipSpaces
sym "|+"
optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces)
(trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline)
2016-11-28 17:13:46 -05:00
tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)]
tableRow = try $ skipMany htmlComment *> many tableCell
2016-11-28 17:13:46 -05:00
tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks)
tableCell = try $ do
cellsep
skipMany spaceChar
attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
notFollowedBy (char '|')
skipMany spaceChar
ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
((snd <$> withRaw table) <|> count 1 anyChar))
bs <- parseFromString (mconcat <$> many block) ls
let align = case lookup "align" attrs of
Just "left" -> AlignLeft
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
let width = case lookup "width" attrs of
Just xs -> fromMaybe 0.0 $ parseWidth xs
Nothing -> 0.0
return ((align, width), bs)
parseWidth :: String -> Maybe Double
parseWidth s =
case reverse s of
('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
_ -> Nothing
2016-11-28 17:13:46 -05:00
template :: PandocMonad m => MWParser m String
template = try $ do
string "{{"
notFollowedBy (char '{')
lookAhead $ letter <|> digit <|> char ':'
let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
contents <- manyTill chunk (try $ string "}}")
return $ "{{" ++ concat contents ++ "}}"
2016-11-28 17:13:46 -05:00
blockTag :: PandocMonad m => MWParser m Blocks
blockTag = do
(tag, _) <- lookAhead $ htmlTag isBlockTag'
case tag of
TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote"
TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre"
TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs
TagOpen "source" attrs -> syntaxhighlight "source" attrs
TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
charsInTags "haskell"
TagOpen "gallery" _ -> blocksInTags "gallery"
TagOpen "p" _ -> mempty <$ htmlTag (~== tag)
TagClose "p" -> mempty <$ htmlTag (~== tag)
_ -> B.rawBlock "html" . snd <$> htmlTag (~== tag)
trimCode :: String -> String
trimCode ('\n':xs) = stripTrailingNewlines xs
trimCode xs = stripTrailingNewlines xs
2016-11-28 17:13:46 -05:00
syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
syntaxhighlight tag attrs = try $ do
let mblang = lookup "lang" attrs
let mbstart = lookup "start" attrs
let mbline = lookup "line" attrs
let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline
let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
contents <- charsInTags tag
return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
2016-11-28 17:13:46 -05:00
hrule :: PandocMonad m => MWParser m Blocks
hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
2016-11-28 17:13:46 -05:00
guardColumnOne :: PandocMonad m => MWParser m ()
guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
2016-11-28 17:13:46 -05:00
preformatted :: PandocMonad m => MWParser m Blocks
preformatted = try $ do
guardColumnOne
char ' '
let endline' = B.linebreak <$ (try $ newline <* char ' ')
let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
let spToNbsp ' ' = '\160'
spToNbsp x = x
let nowiki' = mconcat . intersperse B.linebreak . map B.str .
lines . fromEntities . map spToNbsp <$> try
(htmlTag (~== TagOpen "nowiki" []) *>
manyTill anyChar (htmlTag (~== TagClose "nowiki")))
let inline' = whitespace' <|> endline' <|> nowiki'
2013-03-28 10:51:14 -07:00
<|> (try $ notFollowedBy newline *> inline)
contents <- mconcat <$> many1 inline'
let spacesStr (Str xs) = all isSpace xs
spacesStr _ = False
if F.all spacesStr contents
then return mempty
else return $ B.para $ encode contents
encode :: Inlines -> Inlines
encode = B.fromList . normalizeCode . B.toList . walk strToCode
where strToCode (Str s) = Code ("",[],[]) s
strToCode Space = Code ("",[],[]) " "
strToCode x = x
normalizeCode [] = []
normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
normalizeCode $ (Code a1 (x ++ y)) : zs
normalizeCode (x:xs) = x : normalizeCode xs
2016-11-28 17:13:46 -05:00
header :: PandocMonad m => MWParser m Blocks
header = try $ do
guardColumnOne
eqs <- many1 (char '=')
let lev = length eqs
guard $ lev <= 6
contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
attr <- registerHeader nullAttr contents
return $ B.headerWith attr lev contents
2016-11-28 17:13:46 -05:00
bulletList :: PandocMonad m => MWParser m Blocks
2012-09-13 14:47:11 -07:00
bulletList = B.bulletList <$>
( many1 (listItem '*')
<|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
optional (htmlTag (~== TagClose "ul"))) )
2016-11-28 17:13:46 -05:00
orderedList :: PandocMonad m => MWParser m Blocks
2012-09-13 14:47:11 -07:00
orderedList =
(B.orderedList <$> many1 (listItem '#'))
<|> try
(do (tag,_) <- htmlTag (~== TagOpen "ol" [])
spaces
items <- many (listItem '#' <|> li)
optional (htmlTag (~== TagClose "ol"))
let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
2016-11-28 17:13:46 -05:00
definitionList :: PandocMonad m => MWParser m Blocks
definitionList = B.definitionList <$> many1 defListItem
2016-11-28 17:13:46 -05:00
defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks])
defListItem = try $ do
terms <- mconcat . intersperse B.linebreak <$> many defListTerm
-- we allow dd with no dt, or dt with no dd
defs <- if B.isNull terms
then notFollowedBy
(try $ skipMany1 (char ':') >> string "<math>") *>
many1 (listItem ':')
else many (listItem ':')
return (terms, defs)
2016-11-28 17:13:46 -05:00
defListTerm :: PandocMonad m => MWParser m Inlines
defListTerm = char ';' >> skipMany spaceChar >> anyLine >>=
parseFromString (trimInlines . mconcat <$> many inline)
2016-11-28 17:13:46 -05:00
listStart :: PandocMonad m => Char -> MWParser m ()
listStart c = char c *> notFollowedBy listStartChar
2016-11-28 17:13:46 -05:00
listStartChar :: PandocMonad m => MWParser m Char
listStartChar = oneOf "*#;:"
2016-11-28 17:13:46 -05:00
anyListStart :: PandocMonad m => MWParser m Char
anyListStart = char '*'
<|> char '#'
<|> char ':'
<|> char ';'
2016-11-28 17:13:46 -05:00
li :: PandocMonad m => MWParser m Blocks
li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
2012-09-13 14:47:11 -07:00
(firstParaToPlain <$> blocksInTags "li") <* spaces
2016-11-28 17:13:46 -05:00
listItem :: PandocMonad m => Char -> MWParser m Blocks
listItem c = try $ do
extras <- many (try $ char c <* lookAhead listStartChar)
if null extras
then listItem' c
else do
skipMany spaceChar
first <- concat <$> manyTill listChunk newline
rest <- many
(try $ string extras *> lookAhead listStartChar *>
(concat <$> manyTill listChunk newline))
contents <- parseFromString (many1 $ listItem' c)
(unlines (first : rest))
case c of
'*' -> return $ B.bulletList contents
'#' -> return $ B.orderedList contents
':' -> return $ B.definitionList [(mempty, contents)]
_ -> mzero
-- The point of this is to handle stuff like
-- * {{cite book
-- | blah
-- | blah
-- }}
-- * next list item
-- which seems to be valid mediawiki.
2016-11-28 17:13:46 -05:00
listChunk :: PandocMonad m => MWParser m String
listChunk = template <|> count 1 anyChar
2016-11-28 17:13:46 -05:00
listItem' :: PandocMonad m => Char -> MWParser m Blocks
listItem' c = try $ do
listStart c
skipMany spaceChar
first <- concat <$> manyTill listChunk newline
rest <- many (try $ char c *> lookAhead listStartChar *>
(concat <$> manyTill listChunk newline))
2012-09-13 14:47:11 -07:00
parseFromString (firstParaToPlain . mconcat <$> many1 block)
$ unlines $ first : rest
firstParaToPlain :: Blocks -> Blocks
firstParaToPlain contents =
case viewl (B.unMany contents) of
2012-09-13 14:47:11 -07:00
(Para xs) :< ys -> B.Many $ (Plain xs) <| ys
_ -> contents
--
-- inline parsers
--
2016-11-28 17:13:46 -05:00
inline :: PandocMonad m => MWParser m Inlines
inline = whitespace
<|> url
<|> str
<|> doubleQuotes
<|> strong
<|> emph
2012-09-13 18:16:25 -07:00
<|> image
<|> internalLink
<|> externalLink
<|> math
<|> inlineTag
<|> B.singleton <$> charRef
<|> inlineHtml
<|> (B.rawInline "mediawiki" <$> variable)
<|> (B.rawInline "mediawiki" <$> template)
<|> special
2016-11-28 17:13:46 -05:00
str :: PandocMonad m => MWParser m Inlines
str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
2016-11-28 17:13:46 -05:00
math :: PandocMonad m => MWParser m Inlines
math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
<|> (B.math . trim <$> charsInTags "math")
<|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
<|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
where dmStart = string "\\["
dmEnd = try (string "\\]")
mStart = string "\\("
mEnd = try (string "\\)")
2016-11-28 17:13:46 -05:00
variable :: PandocMonad m => MWParser m String
variable = try $ do
string "{{{"
contents <- manyTill anyChar (try $ string "}}}")
return $ "{{{" ++ contents ++ "}}}"
2016-11-28 17:13:46 -05:00
inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
(tag, _) <- lookAhead $ htmlTag isInlineTag'
case tag of
TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
TagOpen "nowiki" _ -> try $ do
(_,raw) <- htmlTag (~== tag)
if '/' `elem` raw
then return mempty
else B.text . fromEntities <$>
manyTill anyChar (htmlTag (~== TagClose "nowiki"))
TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too
*> optional blankline)
TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike"
TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
TagOpen "code" _ -> encode <$> inlinesInTags "code"
TagOpen "tt" _ -> encode <$> inlinesInTags "tt"
TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
2016-11-28 17:13:46 -05:00
special :: PandocMonad m => MWParser m Inlines
special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
oneOf specialChars)
2016-11-28 17:13:46 -05:00
inlineHtml :: PandocMonad m => MWParser m Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
2016-11-28 17:13:46 -05:00
whitespace :: PandocMonad m => MWParser m Inlines
whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment)
<|> B.softbreak <$ endline
2016-11-28 17:13:46 -05:00
endline :: PandocMonad m => MWParser m ()
endline = () <$ try (newline <*
notFollowedBy spaceChar <*
notFollowedBy newline <*
notFollowedBy' hrule <*
notFollowedBy tableStart <*
notFollowedBy' header <*
notFollowedBy anyListStart)
2016-11-28 17:13:46 -05:00
imageIdentifiers :: PandocMonad m => [MWParser m ()]
imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
"Bild"]
2016-11-28 17:13:46 -05:00
image :: PandocMonad m => MWParser m Inlines
2012-09-13 18:16:25 -07:00
image = try $ do
sym "[["
choice imageIdentifiers
fname <- addUnderscores <$> many1 (noneOf "|]")
_ <- many imageOption
dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px")
<|> return []
_ <- many imageOption
let kvs = case dims of
w:[] -> [("width", w)]
w:(h:[]) -> [("width", w), ("height", h)]
_ -> []
let attr = ("", [], kvs)
2012-09-13 18:16:25 -07:00
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
2012-09-13 18:16:25 -07:00
2016-11-28 17:13:46 -05:00
imageOption :: PandocMonad m => MWParser m String
imageOption = try $ char '|' *> opt
where
opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
, "thumb", "upright", "left", "right"
, "center", "none", "baseline", "sub"
, "super", "top", "text-top", "middle"
, "bottom", "text-bottom" ])
<|> try (string "frame")
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
2012-09-13 18:16:25 -07:00
collapseUnderscores :: String -> String
collapseUnderscores [] = []
collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
collapseUnderscores (x:xs) = x : collapseUnderscores xs
addUnderscores :: String -> String
addUnderscores = collapseUnderscores . intercalate "_" . words
2016-11-28 17:13:46 -05:00
internalLink :: PandocMonad m => MWParser m Inlines
internalLink = try $ do
2012-09-13 18:16:25 -07:00
sym "[["
pagename <- unwords . words <$> many (noneOf "|]")
label <- option (B.text pagename) $ char '|' *>
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
-- the "pipe trick"
-- [[Help:Contents|] -> "Contents"
<|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) )
2012-09-13 18:16:25 -07:00
sym "]]"
linktrail <- B.text <$> many letter
let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
if "Category:" `isPrefixOf` pagename
then do
updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
return mempty
else return link
2016-11-28 17:13:46 -05:00
externalLink :: PandocMonad m => MWParser m Inlines
externalLink = try $ do
char '['
2012-09-12 09:29:00 -07:00
(_, src) <- uri
lab <- try (trimInlines . mconcat <$>
(skipMany1 spaceChar *> manyTill inline (char ']')))
<|> do char ']'
num <- mwNextLinkNumber <$> getState
updateState $ \st -> st{ mwNextLinkNumber = num + 1 }
return $ B.str $ show num
return $ B.link src "" lab
2016-11-28 17:13:46 -05:00
url :: PandocMonad m => MWParser m Inlines
url = do
2012-09-12 09:29:00 -07:00
(orig, src) <- uri
return $ B.link src "" (B.str orig)
-- | Parses a list of inlines between start and end delimiters.
2016-11-28 17:13:46 -05:00
inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
inlinesBetween start end =
(trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
2016-11-28 17:13:46 -05:00
emph :: PandocMonad m => MWParser m Inlines
emph = B.emph <$> nested (inlinesBetween start end)
where start = sym "''" >> lookAhead nonspaceChar
end = try $ notFollowedBy' (() <$ strong) >> sym "''"
2016-11-28 17:13:46 -05:00
strong :: PandocMonad m => MWParser m Inlines
strong = B.strong <$> nested (inlinesBetween start end)
where start = sym "'''" >> lookAhead nonspaceChar
end = try $ sym "'''"
2016-11-28 17:13:46 -05:00
doubleQuotes :: PandocMonad m => MWParser m Inlines
doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote)
where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar
closeDoubleQuote = try $ sym "\""