pandoc/src/Text/Pandoc/Readers/Markdown.hs
John MacFarlane f4ac0edf2a Markdown reader: allow latex macro definitions indented 1-3 spaces.
Previously they only worked if nonindented.
2017-03-22 21:19:50 +01:00

2127 lines
77 KiB
Haskell

{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
import qualified Data.HashMap.Strict as H
import Data.List (findIndex, intercalate, sortBy, transpose)
import qualified Data.Map as M
import Data.Maybe
import Data.Monoid ((<>))
import Data.Ord (comparing)
import Data.Scientific (base10Exponent, coefficient)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
import qualified Data.Yaml as Yaml
import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
type MarkdownParser m = ParserT [Char] ParserState m
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readMarkdown opts s = do
parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
trimInlinesF :: F Inlines -> F Inlines
trimInlinesF = liftM trimInlines
--
-- Constants and data structure definitions
--
isBulletListMarker :: Char -> Bool
isBulletListMarker '*' = True
isBulletListMarker '+' = True
isBulletListMarker '-' = True
isBulletListMarker _ = False
isHruleChar :: Char -> Bool
isHruleChar '*' = True
isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
setextHChars :: String
setextHChars = "=-"
isBlank :: Char -> Bool
isBlank ' ' = True
isBlank '\t' = True
isBlank '\n' = True
isBlank _ = False
--
-- auxiliary functions
--
-- | Succeeds when we're in list context.
inList :: PandocMonad m => MarkdownParser m ()
inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
spnl :: PandocMonad m => ParserT [Char] st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
indentSpaces :: PandocMonad m => MarkdownParser m String
indentSpaces = try $ do
tabStop <- getOption readerTabStop
count tabStop (char ' ') <|>
string "\t" <?> "indentation"
nonindentSpaces :: PandocMonad m => MarkdownParser m String
nonindentSpaces = do
n <- skipNonindentSpaces
return $ replicate n ' '
-- returns number of spaces parsed
skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
skipNonindentSpaces = do
tabStop <- getOption readerTabStop
atMostSpaces (tabStop - 1) <* notFollowedBy spaceChar
atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int
atMostSpaces n
| n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
| otherwise = return 0
litChar :: PandocMonad m => MarkdownParser m Char
litChar = escapedChar'
<|> characterReference
<|> noneOf "\n"
<|> try (newline >> notFollowedBy blankline >> return ' ')
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
inlinesInBalancedBrackets = do
char '['
(_, raw) <- withRaw $ charsInBalancedBrackets 1
guard $ not $ null raw
parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m ()
charsInBalancedBrackets 0 = return ()
charsInBalancedBrackets openBrackets =
(char '[' >> charsInBalancedBrackets (openBrackets + 1))
<|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
<|> (( (() <$ code)
<|> (() <$ (escapedChar'))
<|> (newline >> notFollowedBy blankline)
<|> skipMany1 (noneOf "[]`\n\\")
<|> (() <$ count 1 (oneOf "`\\"))
) >> charsInBalancedBrackets openBrackets)
--
-- document structure
--
rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
rawTitleBlockLine = do
char '%'
skipSpaces
first <- anyLine
rest <- many $ try $ do spaceChar
notFollowedBy blankline
skipSpaces
anyLine
return $ trim $ unlines (first:rest)
titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
raw <- rawTitleBlockLine
res <- parseFromString (many inline) raw
return $ trimInlinesF $ mconcat res
authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
authorsLine = try $ do
raw <- rawTitleBlockLine
let sep = (char ';' <* spaces) <|> newline
let pAuthors = sepEndBy
(trimInlinesF . mconcat <$> many
(try $ notFollowedBy sep >> inline))
sep
sequence <$> parseFromString pAuthors raw
dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
dateLine = try $ do
raw <- rawTitleBlockLine
res <- parseFromString (many inline) raw
return $ trimInlinesF $ mconcat res
titleBlock :: PandocMonad m => MarkdownParser m ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
title <- option mempty titleLine
author <- option (return []) authorsLine
date <- option mempty dateLine
optional blanklines
let meta' = do title' <- title
author' <- author
date' <- date
return $
(if B.isNull title' then id else B.setMeta "title" title')
. (if null author' then id else B.setMeta "author" author')
. (if B.isNull date' then id else B.setMeta "date" date')
$ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
pos <- getPosition
string "---"
blankline
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
rawYamlLines <- manyTill anyLine stopLine
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
case Yaml.decodeEither' $ UTF8.fromString rawYaml of
Right (Yaml.Object hashmap) -> do
let alist = H.toList hashmap
mapM_ (\(k, v) -> do
if ignorable k
then return ()
else do
v' <- yamlToMeta v
let k' = T.unpack k
updateState $ \st -> st{ stateMeta' =
(do m <- stateMeta' st
-- if there's already a value, leave it unchanged
case lookupMeta k' m of
Just _ -> return m
Nothing -> do
v'' <- v'
return $ B.setMeta (T.unpack k) v'' m)}
) alist
Right Yaml.Null -> return ()
Right _ -> do
logMessage $
CouldNotParseYamlMetadata "not an object"
pos
return ()
Left err' -> do
case err' of
InvalidYaml (Just YamlParseException{
yamlProblem = problem
, yamlContext = _ctxt
, yamlProblemMark = Yaml.YamlMark {
yamlLine = yline
, yamlColumn = ycol
}}) ->
logMessage $ CouldNotParseYamlMetadata
problem (setSourceLine
(setSourceColumn pos
(sourceColumn pos + ycol))
(sourceLine pos + 1 + yline))
_ -> logMessage $ CouldNotParseYamlMetadata
(show err') pos
return ()
return mempty
-- ignore fields ending with _
ignorable :: Text -> Bool
ignorable t = (T.pack "_") `T.isSuffixOf` t
toMetaValue :: PandocMonad m
=> Text -> MarkdownParser m (F MetaValue)
toMetaValue x = toMeta <$> parseFromString parseBlocks (T.unpack x)
where
toMeta p = do
p' <- p
return $
case B.toList p' of
[Plain xs] -> MetaInlines xs
[Para xs]
| endsWithNewline x -> MetaBlocks [Para xs]
| otherwise -> MetaInlines xs
bs -> MetaBlocks bs
endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
yamlToMeta :: PandocMonad m
=> Yaml.Value -> MarkdownParser m (F MetaValue)
yamlToMeta (Yaml.String t) = toMetaValue t
yamlToMeta (Yaml.Number n)
-- avoid decimal points for numbers that don't need them:
| base10Exponent n >= 0 = return $ return $ MetaString $ show
$ coefficient n * (10 ^ base10Exponent n)
| otherwise = return $ return $ MetaString $ show n
yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
yamlToMeta (Yaml.Array xs) = do
xs' <- mapM yamlToMeta (V.toList xs)
return $ do
xs'' <- sequence xs'
return $ B.toMetaValue xs''
yamlToMeta (Yaml.Object o) = do
let alist = H.toList o
foldM (\m (k,v) -> do
if ignorable k
then return m
else do
v' <- yamlToMeta v
return $ do
MetaMap m' <- m
v'' <- v'
return (MetaMap $ M.insert (T.unpack k) v'' m'))
(return $ MetaMap M.empty)
alist
yamlToMeta _ = return $ return $ MetaString ""
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
firstPair <- kvPair False
restPairs <- many (kvPair True)
let kvPairs = firstPair : restPairs
blanklines
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
kvPair allowEmpty = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
val <- trim <$> manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
guard $ allowEmpty || not (null val)
let key' = concat $ words $ map toLower key
let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
return (key',val')
parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
parseMarkdown = do
optional titleBlock
blocks <- parseBlocks
st <- getState
let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
meta <- stateMeta' st
return $ Pandoc meta bs) st
reportLogMessages
(do guardEnabled Ext_east_asian_line_breaks
return $ bottomUp softBreakFilter doc) <|> return doc
softBreakFilter :: [Inline] -> [Inline]
softBreakFilter (x:SoftBreak:y:zs) =
case (stringify x, stringify y) of
(xs@(_:_), (c:_))
| charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
_ -> x:SoftBreak:y:zs
softBreakFilter xs = xs
referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
let sourceURL = liftM unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
notFollowedBy' (() <$ reference)
many1 $ notFollowedBy space >> litChar
let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
guardEnabled Ext_link_attributes >> skipSpaces >> attributes
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
target = (escapeURI $ trimr src, tit)
st <- getState
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
Just _ -> logMessage $ DuplicateLinkReference raw pos
Nothing -> return ()
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
referenceTitle :: PandocMonad m => MarkdownParser m String
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
-- A link title in quotes
quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
quotedTitle c = try $ do
char c
notFollowedBy spaces
let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
-- an abbreviation.
abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
abbrevKey = do
guardEnabled Ext_abbreviations
try $ do
char '*'
reference
char ':'
skipMany (satisfy (/= '\n'))
blanklines
return $ return mempty
noteMarker :: PandocMonad m => MarkdownParser m String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
rawLine :: PandocMonad m => MarkdownParser m String
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
rawLines :: PandocMonad m => MarkdownParser m String
rawLines = do
first <- anyLine
rest <- many rawLine
return $ unlines (first:rest)
noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
noteBlock = try $ do
pos <- getPosition
skipNonindentSpaces
ref <- noteMarker
char ':'
optional blankline
optional indentSpaces
first <- rawLines
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = unlines (first:rest) ++ "\n"
optional blanklines
parsed <- parseFromString parseBlocks raw
let newnote = (ref, parsed)
oldnotes <- stateNotes' <$> getState
case lookup ref oldnotes of
Just _ -> logMessage $ DuplicateNoteReference ref pos
Nothing -> return ()
updateState $ \s -> s { stateNotes' = newnote : oldnotes }
return mempty
--
-- parsing blocks
--
parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
parseBlocks = mconcat <$> manyTill block eof
block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
-- note: bulletList needs to be before header because of
-- the possibility of empty list items: -
, bulletList
, header
, lhsCodeBlock
, divHtml
, htmlBlock
, table
, codeBlockIndented
, latexMacro
, rawTeXBlock
, lineBlock
, blockQuote
, hrule
, orderedList
, definitionList
, noteBlock
, referenceKey
, abbrevKey
, para
, plain
] <?> "block"
report $ ParsingTrace
(take 60 $ show $ B.toList $ runF res defaultParserState) pos
return res
--
-- header blocks
--
header :: PandocMonad m => MarkdownParser m (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
atxChar :: PandocMonad m => MarkdownParser m Char
atxChar = do
exts <- getOption readerExtensions
return $ if extensionEnabled Ext_literate_haskell exts
then '='
else '#'
atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
atxHeader = try $ do
level <- atxChar >>= many1 . char >>= return . length
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar
skipSpaces
(text, raw) <- withRaw $
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
attr <- atxClosing
attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
atxClosing :: PandocMonad m => MarkdownParser m Attr
atxClosing = try $ do
attr' <- option nullAttr
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
skipMany . char =<< atxChar
skipSpaces
attr <- option attr'
(guardEnabled Ext_header_attributes >> attributes)
blanklines
return attr
setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
setextHeaderEnd = try $ do
attr <- option nullAttr
$ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
<|> (guardEnabled Ext_header_attributes >> attributes)
blanklines
return attr
mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
mmdHeaderIdentifier = do
(_, raw) <- reference
let raw' = trim $ stripFirstAndLast raw
let ident = concat $ words $ map toLower raw'
let attr = (ident, [], [])
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw' attr
skipSpaces
return attr
setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
skipSpaces
(text, raw) <- withRaw $
trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
attr <- setextHeaderEnd
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
registerImplicitHeader raw attr@(ident, _, _) = do
let key = toKey $ "[" ++ raw ++ "]"
updateState (\s -> s { stateHeaderKeys =
M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
--
-- hrule block
--
hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
optional blanklines
return $ return B.horizontalRule
--
-- code blocks
--
indentedLine :: PandocMonad m => MarkdownParser m String
indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
-> ParserT [Char] st m Int
blockDelimiter f len = try $ do
c <- lookAhead (satisfy f)
case len of
Just l -> count l (char c) >> many (char c) >> return l
Nothing -> count 3 (char c) >> many (char c) >>=
return . (+ 3) . length
attributes :: PandocMonad m => MarkdownParser m Attr
attributes = try $ do
char '{'
spnl
attrs <- many (attribute <* spnl)
char '}'
return $ foldl (\x f -> f x) nullAttr attrs
attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
identifier :: PandocMonad m => MarkdownParser m String
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return (first:rest)
identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
identifierAttr = try $ do
char '#'
result <- identifier
return $ \(_,cs,kvs) -> (result,cs,kvs)
classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
classAttr = try $ do
char '.'
result <- identifier
return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
keyValAttr = try $ do
key <- identifier
char '='
val <- enclosed (char '"') (char '"') litChar
<|> enclosed (char '\'') (char '\'') litChar
<|> many (escapedChar' <|> noneOf " \t\n\r}")
return $ \(id',cs,kvs) ->
case key of
"id" -> (val,cs,kvs)
"class" -> (id',cs ++ words val,kvs)
_ -> (id',cs,kvs ++ [(key,val)])
specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
attr <- option ([],[],[]) $
try (guardEnabled Ext_fenced_code_attributes >> attributes)
<|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
blankline
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
toLanguageId = map toLower . go
where go "c++" = "cpp"
go "objective-c" = "objectivec"
go x = x
codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
return $ b ++ l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lhsCodeBlock = do
guardEnabled Ext_literate_haskell
(return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
(lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
<|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
lhsCodeBlockInverseBird)
lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
contents <- many1Till anyChar (try $ string "\\end{code}")
blanklines
return $ stripTrailingNewlines contents
lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
lns <- many1 $ birdTrackLine c
-- if (as is normal) there is always a space after >, drop it
let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
then map (drop 1) lns
else lns
blanklines
return $ intercalate "\n" lns'
birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
when (c == '<') $ notFollowedBy letter
anyLine
--
-- block quotes
--
emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
emailBlockQuote = try $ do
emailBlockQuoteStart
let emailLine = many $ nonEndline <|> try
(endline >> notFollowedBy emailBlockQuoteStart >>
return '\n')
let emailSep = try (newline >> emailBlockQuoteStart)
first <- emailLine
rest <- many $ try $ emailSep >> emailLine
let raw = first:rest
newline <|> (eof >> return '\n')
optional blanklines
return raw
blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
return $ B.blockQuote <$> contents
--
-- list blocks
--
bulletListStart :: PandocMonad m => MarkdownParser m ()
bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
endpos <- sourceColumn <$> getPosition
tabStop <- getOption readerTabStop
lookAhead (newline <|> spaceChar)
() <$ atMostSpaces (tabStop - (endpos - startpos))
anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
anyOrderedListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
res <- do guardDisabled Ext_fancy_lists
start <- many1 digit >>= safeRead
char '.'
return (start, DefaultStyle, DefaultDelim)
<|> do (num, style, delim) <- anyOrderedListMarker
-- if it could be an abbreviated first name,
-- insist on more than one space
when (delim == Period && (style == UpperAlpha ||
(style == UpperRoman &&
num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
() <$ spaceChar
return (num, style, delim)
endpos <- sourceColumn <$> getPosition
tabStop <- getOption readerTabStop
lookAhead (newline <|> spaceChar)
atMostSpaces (tabStop - (endpos - startpos))
return res
listStart :: PandocMonad m => MarkdownParser m ()
listStart = bulletListStart <|> (anyOrderedListStart >> return ())
listLine :: PandocMonad m => MarkdownParser m String
listLine = try $ do
notFollowedBy' (do indentSpaces
many spaceChar
listStart)
notFollowedByHtmlCloser
optional (() <$ indentSpaces)
listLineCommon
listLineCommon :: PandocMonad m => MarkdownParser m String
listLineCommon = concat <$> manyTill
( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
<|> liftM snd (htmlTag isCommentTag)
<|> count 1 anyChar
) newline
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: PandocMonad m
=> MarkdownParser m a
-> MarkdownParser m String
rawListItem start = try $ do
start
first <- listLineCommon
rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
blanks <- many blankline
return $ unlines (first:rest) ++ blanks
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
listContinuation :: PandocMonad m => MarkdownParser m String
listContinuation = try $ do
lookAhead indentSpaces
result <- many1 listContinuationLine
blanks <- many blankline
return $ concat result ++ blanks
notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
notFollowedByHtmlCloser = do
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
Nothing -> return ()
listContinuationLine :: PandocMonad m => MarkdownParser m String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
notFollowedByHtmlCloser
optional indentSpaces
result <- anyLine
return $ result ++ "\n"
listItem :: PandocMonad m
=> MarkdownParser m a
-> MarkdownParser m (F Blocks)
listItem start = try $ do
first <- rawListItem start
continuations <- many listContinuation
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
state <- getState
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may contain various block elements:
let raw = concat (first:continuations)
contents <- parseFromString parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
return contents
orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
unless (style `elem` [DefaultStyle, Decimal, Example] &&
delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
items <- fmap sequence $ many1 $ listItem
( try $ do
optional newline -- if preceded by Plain block in a list
startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
res <- orderedListMarker style delim
endpos <- sourceColumn <$> getPosition
tabStop <- getOption readerTabStop
lookAhead (newline <|> spaceChar)
atMostSpaces (tabStop - (endpos - startpos))
return res )
start' <- option 1 $ guardEnabled Ext_startnum >> return start
return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
bulletList = do
items <- fmap sequence $ many1 $ listItem bulletListStart
return $ B.bulletList <$> fmap compactify items
-- definition lists
defListMarker :: PandocMonad m => MarkdownParser m ()
defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
tabStop <- getOption readerTabStop
let remaining = tabStop - (length sps + 1)
if remaining > 0
then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
else mzero
return ()
definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
firstline <- anyLine
let dline = try
( do notFollowedBy blankline
notFollowedByHtmlCloser
if compact -- laziness not compatible with compact
then () <$ indentSpaces
else (() <$ indentSpaces)
<|> notFollowedBy defListMarker
anyLine )
rawlines <- many dline
cont <- liftM concat $ many $ try $ do
trailing <- option "" blanklines
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
return $ trailing ++ unlines (ln:lns)
return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
if hasBlank || not (null cont) then "\n\n" else ""
definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
definitionList = try $ do
lookAhead (anyLine >>
optional (blankline >> notFollowedBy (table >> return ())) >>
-- don't capture table caption as def list!
defListMarker)
compactDefinitionList <|> normalDefinitionList
compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
compactDefinitionList = do
guardEnabled Ext_compact_definition_lists
items <- fmap sequence $ many1 $ definitionListItem True
return $ B.definitionList <$> fmap compactifyDL items
normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
normalDefinitionList = do
guardEnabled Ext_definition_lists
items <- fmap sequence $ many1 $ definitionListItem False
return $ B.definitionList <$> items
--
-- paragraph block
--
para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
result <- trimInlinesF . mconcat <$> many1 inline
option (B.plain <$> result)
$ try $ do
newline
(blanklines >> return mempty)
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
-- Avoid creating a paragraph in a nested list.
notFollowedBy' inList >>
() <$ lookAhead listStart)
<|> do guardEnabled Ext_native_divs
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just "div" -> () <$
lookAhead (htmlTag (~== TagClose "div"))
_ -> mzero
return $ do
result' <- result
case B.toList result' of
[Image attr alt (src,tit)]
| Ext_implicit_figures `extensionEnabled` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
$ Image attr alt (src,'f':'i':'g':':':tit)
_ -> return $ B.para result'
plain :: PandocMonad m => MarkdownParser m (F Blocks)
plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
--
-- raw html
--
htmlElement :: PandocMonad m => MarkdownParser m String
htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
<|> liftM snd (htmlTag isBlockTag)
htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock = do
guardEnabled Ext_raw_html
try (do
(TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
(guard (t `elem` ["pre","style","script"]) >>
(return . B.rawBlock "html") <$> rawVerbatimBlock)
<|> (do guardEnabled Ext_markdown_attribute
oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
markdownAttribute <-
case lookup "markdown" attrs of
Just "0" -> False <$ updateState (\st -> st{
stateMarkdownAttribute = False })
Just _ -> True <$ updateState (\st -> st{
stateMarkdownAttribute = True })
Nothing -> return oldMarkdownAttribute
res <- if markdownAttribute
then rawHtmlBlocks
else htmlBlock'
updateState $ \st -> st{ stateMarkdownAttribute =
oldMarkdownAttribute }
return res)
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
<|> htmlBlock'
htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
return $ return $ B.rawBlock "html" first
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
rawVerbatimBlock = htmlInBalanced isVerbTag
where isVerbTag (TagOpen "pre" _) = True
isVerbTag (TagOpen "style" _) = True
isVerbTag (TagOpen "script" _) = True
isVerbTag _ = False
latexMacro :: PandocMonad m => MarkdownParser m (F Blocks)
latexMacro = try $ do
guardEnabled Ext_latex_macros
skipNonindentSpaces
res <- macro
return $ return res
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
result <- (B.rawBlock "latex" . concat <$>
rawLaTeXBlock `sepEndBy1` blankline)
<|> (B.rawBlock "context" . concat <$>
rawConTeXtEnvironment `sepEndBy1` blankline)
spaces
return $ return result
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
let closer = htmlTag (\x -> x ~== TagClose tagtype)
contents <- mconcat <$> many (notFollowedBy' closer >> block)
result <-
(closer >>= \(_, rawcloser) -> return (
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
contents <>
return (B.rawBlock "html" rawcloser)))
<|> return (return (B.rawBlock "html" raw) <> contents)
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
return result
-- remove markdown="1" attribute
stripMarkdownAttribute :: String -> String
stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
where filterAttrib (TagOpen t as) = TagOpen t
[(k,v) | (k,v) <- as, k /= "markdown"]
filterAttrib x = x
--
-- line block
--
lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
return $ B.lineBlock <$> sequence lines'
--
-- Tables
--
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
-> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
let lengthDashes = length dashes
lengthSp = length sp
return (lengthDashes, lengthDashes + lengthSp)
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
else anyLine
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
-- If no header, calculate alignment on basis of first row of text
rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
heads <- fmap sequence
$ mapM (parseFromString (mconcat <$> many plain))
$ map trim rawHeads'
return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
-- dashed line under the rows.
alignType :: [String]
-> Int
-> Alignment
alignType [] _ = AlignDefault
alignType strLst len =
let nonempties = filter (not . null) $ map trimr strLst
(leftSpace, rightSpace) =
case sortBy (comparing length) nonempties of
(x:_) -> (head x `elem` " \t", length x < len)
[] -> (False, False)
in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
(True, True) -> AlignCenter
(False, False) -> AlignDefault
-- Parse a table footer - dashed lines followed by blank line.
tableFooter :: PandocMonad m => MarkdownParser m String
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
tableSep :: PandocMonad m => MarkdownParser m Char
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
rawTableLine :: PandocMonad m
=> [Int]
-> MarkdownParser m [String]
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
return $ map trim $ tail $
splitStringByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: PandocMonad m
=> [Int]
-> MarkdownParser m (F [Blocks])
tableLine indices = rawTableLine indices >>=
fmap sequence . mapM (parseFromString (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: PandocMonad m
=> [Int]
-> MarkdownParser m (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
trimInlinesF . mconcat <$> many1 inline <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: PandocMonad m
=> Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
(return ())
(if headless then tableFooter else tableFooter <|> blanklines)
-- Simple tables get 0s for relative column widths (i.e., use default)
return (aligns, replicate (length aligns) 0, heads', lines')
-- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable :: PandocMonad m
=> Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
unless headless $
tableSep >> notFollowedBy blankline
rawContent <- if headless
then return $ repeat ""
else many1 $ notFollowedBy tableSep >> anyLine
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
rawHeadsList <- if headless
then liftM (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
(tail . splitStringByIndices (init indices))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM (parseFromString (mconcat <$> many plain)) $
map trim rawHeads
return (heads, aligns, indices)
-- Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
gridTable :: PandocMonad m => Bool -- ^ Headerless table
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
gridTable headless =
tableWith (gridTableHeader headless) gridTableRow
(gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
rightColon <- option False (True <$ char ':')
char '+'
let lengthDashes = length dashes + (if leftColon then 1 else 0) +
(if rightColon then 1 else 0)
let alignment = case (leftColon, rightColon) of
(True, True) -> AlignCenter
(True, False) -> AlignLeft
(False, True) -> AlignRight
(False, False) -> AlignDefault
return ((lengthDashes, lengthDashes + 1), alignment)
gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
gridTableHeader headless = try $ do
optional blanklines
dashes <- gridDashedLines '-'
rawContent <- if headless
then return []
else many1 (try (char '|' >> anyLine))
underDashes <- if headless
then return dashes
else gridDashedLines '='
guard $ length dashes == length underDashes
let lines' = map (snd . fst) underDashes
let indices = scanl (+) 0 lines'
let aligns = map snd underDashes
let rawHeads = if headless
then replicate (length underDashes) ""
else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
return (heads, aligns, indices)
gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String]
gridTableRawLine indices = do
char '|'
line <- anyLine
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
gridTableRow :: PandocMonad m => [Int]
-> MarkdownParser m (F [Blocks])
gridTableRow indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
if all startsWithSpace xs
then map (drop 1) xs
else xs
where startsWithSpace "" = True
startsWithSpace (y:_) = y == ' '
-- | Parse footer for a grid table.
gridTableFooter :: PandocMonad m => MarkdownParser m [Char]
gridTableFooter = blanklines
pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
first <- pipeTableHeaderPart
rest <- many $ sepPipe *> pipeTableHeaderPart
-- surrounding pipes needed for a one-column table:
guard $ not (null rest && not openPipe)
optional (char '|')
blankline
return $ unzip (first:rest)
pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
let heads' = take (length aligns) <$> heads
lines' <- many pipeTableRow
let lines'' = map (take (length aligns) <$>) lines'
let maxlength = maximum $
map (\x -> length . stringify $ runF x def) (heads' : lines'')
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
fromIntegral (len + 1) / fromIntegral numColumns)
seplengths
else replicate (length aligns) 0.0
return $ (aligns, widths, heads', sequence lines'')
sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
char '|' <|> char '+'
notFollowedBy blankline
-- parse a row, also returning probable alignments for org-table cells
pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
pipeTableRow = try $ do
scanForPipe
skipMany spaceChar
openPipe <- (True <$ char '|') <|> return False
-- split into cells
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
parseFromString pipeTableCell
cells <- cellContents `sepEndBy1` (char '|')
-- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe)
blankline
return $ sequence cells
pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
pipeTableCell = do
result <- many inline
if null result
then return mempty
else return $ B.plain . mconcat <$> sequence result
pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
pipe <- many1 (char '-')
right <- optionMaybe (char ':')
skipMany spaceChar
let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
return $
((case (left,right) of
(Nothing,Nothing) -> AlignDefault
(Just _,Nothing) -> AlignLeft
(Nothing,Just _) -> AlignRight
(Just _,Just _) -> AlignCenter), len)
-- Succeed only if current line contains a pipe.
scanForPipe :: PandocMonad m => ParserT [Char] st m ()
scanForPipe = do
inp <- getInput
case break (\c -> c == '\n' || c == '|') inp of
(_,'|':_) -> return ()
_ -> mzero
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
tableWith :: PandocMonad m
=> MarkdownParser m (F [Blocks], [Alignment], [Int])
-> ([Int] -> MarkdownParser m (F [Blocks]))
-> MarkdownParser m sep
-> MarkdownParser m end
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if (indices == [])
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
return $ (aligns, widths, heads, lines')
table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
try (guardEnabled Ext_multiline_tables >>
multilineTable False) <|>
try (guardEnabled Ext_simple_tables >>
(simpleTable True <|> simpleTable False)) <|>
try (guardEnabled Ext_multiline_tables >>
multilineTable True) <|>
try (guardEnabled Ext_grid_tables >>
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
Nothing -> option (return mempty) tableCaption
Just c -> return c
-- renormalize widths if greater than 100%:
let totalWidth = sum widths
let widths' = if totalWidth < 1
then widths
else map (/ totalWidth) widths
return $ do
caption' <- caption
heads' <- heads
lns' <- lns
return $ B.table caption' (zip aligns widths') heads' lns'
--
-- inline
--
inline :: PandocMonad m => MarkdownParser m (F Inlines)
inline = choice [ whitespace
, bareURL
, str
, endline
, code
, strongOrEmph
, note
, cite
, bracketedSpan
, link
, image
, math
, strikeout
, subscript
, superscript
, inlineNote -- after superscript because of ^[link](/foo)^
, autoLink
, spanHtml
, rawHtmlInline
, escapedChar
, rawLaTeXInline'
, exampleRef
, smart
, return . B.singleton <$> charRef
, emoji
, symbol
, ltSign
] <?> "inline"
escapedChar' :: PandocMonad m => MarkdownParser m Char
escapedChar' = try $ do
char '\\'
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> (guardEnabled Ext_angle_brackets_escapable >>
oneOf "\\`*_{}[]()>#+-.!~\"<>")
<|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
<|> oneOf "\\`*_{}[]()>#+-.!~\""
escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
escapedChar = do
result <- escapedChar'
case result of
' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
'\n' -> guardEnabled Ext_escaped_line_breaks >>
return (return B.linebreak) -- "\[newline]" is a linebreak
_ -> return $ return $ B.str [result]
ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
return $ return $ B.str "<"
exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
lab <- many1 (alphaNum <|> oneOf "-_")
return $ do
st <- askF
return $ case M.lookup lab (stateExamples st) of
Just n -> B.str (show n)
Nothing -> B.str ('@':lab)
symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
>> attributes)
return $ return $ B.codeWith attr $ trim $ concat result
math :: PandocMonad m => MarkdownParser m (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
<|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
(guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
enclosure :: PandocMonad m
=> Char
-> MarkdownParser m (F Inlines)
enclosure c = do
-- we can't start an enclosure with _ if after a string and
-- the intraword_underscores extension is enabled:
guardDisabled Ext_intraword_underscores
<|> guard (c == '*')
<|> (guard =<< notAfterString)
cs <- many1 (char c)
(return (B.str cs) <>) <$> whitespace
<|> do
case length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
_ -> return (return $ B.str cs)
ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
ender c n = try $ do
count n (char c)
guard (c == '*')
<|> guardDisabled Ext_intraword_underscores
<|> notFollowedBy alphaNum
-- Parse inlines til you hit one c or a sequence of two cs.
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
-- Otherwise, emit ccc then the results.
three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
(ender c 3 >> return ((B.strong . B.emph) <$> contents))
<|> (ender c 2 >> one c (B.strong <$> contents))
<|> (ender c 1 >> two c (B.emph <$> contents))
<|> return (return (B.str [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
(ender c 2 >> return (B.strong <$> (prefix' <> contents)))
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
(ender c 1 >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str [c]) <> (prefix' <> contents))
strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
strongOrEmph = enclosure '*' <|> enclosure '_'
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: PandocMonad m
=> (Show b)
=> MarkdownParser m a
-> MarkdownParser m b
-> MarkdownParser m (F Inlines)
inlinesBetween start end =
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
strikeout = fmap B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = fmap B.superscript <$> try (do
guardEnabled Ext_superscript
char '^'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = fmap B.subscript <$> try (do
guardEnabled Ext_subscript
char '~'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
nonEndline :: PandocMonad m => ParserT [Char] st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
(do guardEnabled Ext_smart
abbrevs <- getOption readerAbbreviations
if not (null result) && last result == '.' && result `Set.member` abbrevs
then try (do ils <- whitespace <|> endline
lookAhead alphaNum
return $ do
ils' <- ils
if ils' == B.space
then return (B.str result <> B.str "\160")
else -- linebreak or softbreak
return (ils' <> B.str result <> B.str "\160"))
<|> return (return (B.str result))
else return (return (B.str result)))
<|> return (return (B.str result))
-- an endline character that can be treated as a space, not a structural break
endline :: PandocMonad m => MarkdownParser m (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts differently if in a list:
notFollowedBy (inList >> listStart)
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
guardDisabled Ext_backtick_code_blocks <|>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
(eof >> return mempty)
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
<|> (skipMany spaceChar >> return (return B.softbreak))
--
-- links
--
-- a reference label for a link
reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
reference = do notFollowedBy' (string "[^") -- footnote reference
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
parenthesizedChars = do
result <- charsInBalanced '(' ')' litChar
return $ '(' : result ++ ")"
-- source for a link, with optional title
source :: PandocMonad m => MarkdownParser m (String, String)
source = do
char '('
skipSpaces
let urlChunk =
try parenthesizedChars
<|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
<|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
let sourceURL = (unwords . words . concat) <$> many urlChunk
let betweenAngles = try $
char '<' >> manyTill litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" $ try $ spnl >> linkTitle
skipSpaces
char ')'
return (escapeURI $ trimr src, tit)
linkTitle :: PandocMonad m => MarkdownParser m String
linkTitle = quotedTitle '"' <|> quotedTitle '\''
link :: PandocMonad m => MarkdownParser m (F Inlines)
link = try $ do
st <- getState
guard $ stateAllowLinks st
setState $ st{ stateAllowLinks = False }
(lab,raw) <- reference
setState $ st{ stateAllowLinks = True }
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
bracketedSpan = try $ do
guardEnabled Ext_bracketed_spans
(lab,_) <- reference
attr <- attributes
return $ if isSmallCaps attr
then B.smallcaps <$> lab
else B.spanWith attr <$> lab
-- | We treat a span as SmallCaps if class is "smallcaps" (and
-- no other attributes are set or if style is "font-variant:small-caps"
-- (and no other attributes are set).
isSmallCaps :: Attr -> Bool
isSmallCaps ("",["smallcaps"],[]) = True
isSmallCaps ("",[],kvs) =
case lookup "style" kvs of
Just s -> map toLower (filter (`notElem` " \t;") s) ==
"font-variant:small-caps"
Nothing -> False
isSmallCaps _ = False
regLink :: PandocMonad m
=> (Attr -> String -> String -> Inlines -> Inlines)
-> F Inlines
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
attr <- option nullAttr $
guardEnabled Ext_link_attributes >> attributes
return $ constructor attr src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
referenceLink :: PandocMonad m
=> (Attr -> String -> String -> Inlines -> Inlines)
-> (F Inlines, String)
-> MarkdownParser m (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
(_,raw') <- option (mempty, "") $
lookAhead (try (guardEnabled Ext_citations >>
spnl >> normalCite >> return (mempty, "")))
<|>
try (spnl >> reference)
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
parsedRaw' <- parsedRaw
fallback' <- fallback
return $ B.str "[" <> fallback' <> B.str "]" <>
(if sp && not (null raw) then B.space else mempty) <>
parsedRaw'
return $ do
keys <- asksF stateKeys
case M.lookup key keys of
Nothing ->
if implicitHeaderRefs
then do
headerKeys <- asksF stateHeaderKeys
case M.lookup key headerKeys of
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
Nothing -> makeFallback
else makeFallback
Just ((src,tit), attr) -> constructor attr src tit <$> lab
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
where dropRB (']':xs) = xs
dropRB xs = xs
dropLB ('[':xs) = xs
dropLB xs = xs
bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
(orig, src) <- uri <|> emailAddress
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
return $ return $ B.link src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
(orig, src) <- uri <|> emailAddress
-- in rare cases, something may remain after the uri parser
-- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
attr <- option nullAttr $ try $
guardEnabled Ext_link_attributes >> attributes
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
let constructor attr' src = case takeExtension src of
"" -> B.imageWith attr' (addExtension src defaultExt)
_ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: PandocMonad m => MarkdownParser m (F Inlines)
note = try $ do
guardEnabled Ext_footnotes
ref <- noteMarker
return $ do
notes <- asksF stateNotes'
case lookup ref notes of
Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
Just contents -> do
st <- askF
-- process the note in a context that doesn't resolve
-- notes, to avoid infinite looping with notes inside
-- notes:
let contents' = runF contents st{ stateNotes' = [] }
return $ B.note contents'
inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets
return $ B.note . B.para <$> contents
rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s
-- "tex" because it might be context or latex
rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
<|> (many1 letter)
contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
inBrackets :: PandocMonad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
spanHtml = try $ do
guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ if isSmallCaps (ident, classes, keyvals)
then B.smallcaps <$> contents
else B.spanWith (ident, classes, keyvals) <$> contents
divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = try $ do
guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
bls <- option "" (blankline >> option "" blanklines)
contents <- mconcat <$>
many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
closed <- option False (True <$ htmlTag (~== TagClose "div"))
if closed
then do
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
else -- avoid backtracing
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
rawHtmlInline = do
guardEnabled Ext_raw_html
inHtmlBlock <- stateInHtmlBlock <$> getState
let isCloseBlockTag t = case inHtmlBlock of
Just t' -> t ~== TagClose t'
Nothing -> False
mdInHtml <- option False $
( guardEnabled Ext_markdown_in_html_blocks
<|> guardEnabled Ext_markdown_attribute
) >> return True
(_,result) <- htmlTag $ if mdInHtml
then (\x -> isInlineTag x &&
not (isCloseBlockTag x))
else not . isTextTag
return $ return $ B.rawInline "html" result
-- Emoji
emojiChars :: [Char]
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
emoji :: PandocMonad m => MarkdownParser m (F Inlines)
emoji = try $ do
guardEnabled Ext_emoji
char ':'
emojikey <- many1 (oneOf emojiChars)
char ':'
case M.lookup emojikey emojis of
Just s -> return (return (B.str s))
Nothing -> mzero
-- Citations
cite :: PandocMonad m => MarkdownParser m (F Inlines)
cite = do
guardEnabled Ext_citations
citations <- textualCite
<|> do (cs, raw) <- withRaw normalCite
return $ (flip B.cite (B.text raw)) <$> cs
return citations
textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
textualCite = try $ do
(_, key) <- citeKey
let first = Citation{ citationId = key
, citationPrefix = []
, citationSuffix = []
, citationMode = AuthorInText
, citationNoteNum = 0
, citationHash = 0
}
mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
case mbrest of
Just (rest, raw) ->
return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
<$> rest
Nothing ->
(do
(cs, raw) <- withRaw $ bareloc first
let (spaces',raw') = span isSpace raw
spc | null spaces' = mempty
| otherwise = B.space
lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback
cs' <- cs
return $
case B.toList fallback' of
Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
_ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
<|> return (do st <- askF
return $ case M.lookup key (stateExamples st) of
Just n -> B.str (show n)
_ -> B.cite [first] $ B.str $ '@':key)
bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
bareloc c = try $ do
spnl
char '['
notFollowedBy $ char '^'
suff <- suffix
rest <- option (return []) $ try $ char ';' >> citeList
spnl
char ']'
notFollowedBy $ oneOf "[("
return $ do
suff' <- suff
rest' <- rest
return $ c{ citationSuffix = B.toList suff' } : rest'
normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
normalCite = try $ do
char '['
spnl
citations <- citeList
spnl
char ']'
return citations
suffix :: PandocMonad m => MarkdownParser m (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
then (B.space <>) <$> rest
else rest
prefix :: PandocMonad m => MarkdownParser m (F Inlines)
prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
citeList :: PandocMonad m => MarkdownParser m (F [Citation])
citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
citation :: PandocMonad m => MarkdownParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
return $ do
x <- pref
y <- suff
return $ Citation{ citationId = key
, citationPrefix = B.toList x
, citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
, citationNoteNum = 0
, citationHash = 0
}
smart :: PandocMonad m => MarkdownParser m (F Inlines)
smart = do
guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [apostrophe, dash, ellipses])
singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
many1Till inline singleQuoteEnd
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
(withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))
<|> (return $ return (B.str "\8220") <> contents)