pandoc/src/Text/Pandoc/Readers/Markdown.hs
fiddlosopher 655363da51 Moved Text.ParserCombinators.Pandoc ->
Text.Pandoc.ParserCombinators.  This way, all the
Pandoc modules are in one place.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@663 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-07-09 03:39:25 +00:00

1031 lines
29 KiB
Haskell

{-
Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup )
import Data.Char ( isAlphaNum )
import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Shared
import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag,
htmlEndTag, extractTagType,
htmlBlockElement )
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -> String -> Pandoc
readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
-- | Parse markdown string with default options and print result (for testing).
testString :: String -> IO ()
testString = testStringWith parseMarkdown
--
-- Constants and data structure definitions
--
spaceChars = " \t"
endLineChars = "\n"
labelStart = '['
labelEnd = ']'
labelSep = ':'
srcStart = '('
srcEnd = ')'
imageStart = '!'
noteStart = '^'
codeStart = '`'
codeEnd = '`'
emphStart = '*'
emphEnd = '*'
emphStartAlt = '_'
emphEndAlt = '_'
autoLinkStart = '<'
autoLinkEnd = '>'
mathStart = '$'
mathEnd = '$'
bulletListMarkers = "*+-"
escapeChar = '\\'
hruleChars = "*-_"
quoteChars = "'\""
atxHChar = '#'
titleOpeners = "\"'("
setextHChars = ['=','-']
blockQuoteChar = '>'
hyphenChar = '-'
ellipsesChar = '.'
listColSepChar = '|'
entityStart = '&'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
hyphenChar, ellipsesChar, entityStart] ++ quoteChars
--
-- auxiliary functions
--
-- | Skip a single endline if there is one.
skipEndline = option Space endline
indentSpaces = do
state <- getState
let tabStop = stateTabStop state
try (count tabStop (char ' ')) <|>
(do{many (char ' '); string "\t"}) <?> "indentation"
nonindentSpaces = do
state <- getState
let tabStop = stateTabStop state
choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
-- | Fail if reader is in strict markdown syntax mode.
failIfStrict = do
state <- getState
if stateStrict state then fail "Strict markdown mode" else return ()
-- | Fail unless we're at beginning of a line.
failUnlessBeginningOfLine = do
pos <- getPosition
if sourceColumn pos == 1 then return () else fail "not beginning of line"
-- | Fail unless we're in "smart typography" mode.
failUnlessSmart = do
state <- getState
if stateSmart state then return () else fail "Smart typography feature"
--
-- document structure
--
titleLine = try (do
char '%'
skipSpaces
line <- manyTill inline newline
return line)
authorsLine = try (do
char '%'
skipSpaces
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
newline
return (map (decodeEntities . removeLeadingTrailingSpace) authors))
dateLine = try (do
char '%'
skipSpaces
date <- many (noneOf "\n")
newline
return (decodeEntities $ removeTrailingSpace date))
titleBlock = try (do
failIfStrict
title <- option [] titleLine
author <- option [] authorsLine
date <- option "" dateLine
option "" blanklines
return (title, author, date))
parseMarkdown = do
updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML
(title, author, date) <- option ([],[],"") titleBlock
-- go through once just to get list of reference keys
refs <- manyTill (referenceKey <|> (do l <- lineClump
return (LineClump l))) eof
let keys = map (\(KeyBlock label target) -> (label, target)) $
filter isKeyBlock refs
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
setInput $ concat rawlines -- with keys stripped out
updateState (\state -> state { stateKeys = keys })
-- now go through for notes
refs <- manyTill (noteBlock <|> (do l <- lineClump
return (LineClump l))) eof
let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $
filter isNoteBlock refs
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
setInput $ concat rawlines -- with note blocks and keys stripped out
updateState (\state -> state { stateNotes = notes })
blocks <- parseBlocks -- go through again, for real
let blocks' = filter (/= Null) blocks
return (Pandoc (Meta title author date) blocks')
--
-- initial pass for references
--
referenceKey = try $ do
nonindentSpaces
label <- reference
char labelSep
skipSpaces
option ' ' (char autoLinkStart)
src <- many (noneOf [autoLinkEnd, '\n', '\t', ' '])
option ' ' (char autoLinkEnd)
tit <- option "" title
blanklines
return $ KeyBlock label (removeTrailingSpace src, tit)
noteMarker = try (do
char labelStart
char noteStart
manyTill (noneOf " \t\n") (char labelEnd))
rawLine = try (do
notFollowedBy' blankline
notFollowedBy' noteMarker
contents <- many1 nonEndline
end <- option "" (do
newline
option "" (try indentSpaces)
return "\n")
return (contents ++ end))
rawLines = do
lines <- many1 rawLine
return (concat lines)
noteBlock = try $ do
failIfStrict
ref <- noteMarker
char ':'
option ' ' (try blankline)
option "" (try indentSpaces)
raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
option "" blanklines
-- parse the extracted text, which may contain various block elements:
rest <- getInput
setInput $ (joinWithSep "\n" raw) ++ "\n\n"
contents <- parseBlocks
setInput rest
return (NoteBlock ref contents)
--
-- parsing blocks
--
parseBlocks = manyTill block eof
block = choice [ header
, table
, codeBlock
, hrule
, list
, blockQuote
, htmlBlock
, rawLaTeXEnvironment'
, para
, plain
, nullBlock ] <?> "block"
--
-- header blocks
--
header = choice [ setextHeader, atxHeader ] <?> "header"
atxHeader = try (do
lead <- many1 (char atxHChar)
skipSpaces
txt <- manyTill inline atxClosing
return (Header (length lead) (normalizeSpaces txt)))
atxClosing = try (do
skipMany (char atxHChar)
skipSpaces
newline
option "" blanklines)
setextHeader = choice $
map (\x -> setextH x) (enumFromTo 1 (length setextHChars))
setextH n = try (do
txt <- many1Till inline newline
many1 (char (setextHChars !! (n-1)))
skipSpaces
newline
option "" blanklines
return (Header n (normalizeSpaces txt)))
--
-- hrule block
--
hruleWith chr = try (do
skipSpaces
char chr
skipSpaces
char chr
skipSpaces
char chr
skipMany (oneOf (chr:spaceChars))
newline
option "" blanklines
return HorizontalRule)
hrule = choice (map hruleWith hruleChars) <?> "hrule"
--
-- code blocks
--
indentedLine = try (do
indentSpaces
result <- manyTill anyChar newline
return (result ++ "\n"))
-- two or more indented lines, possibly separated by blank lines
indentedBlock = try (do
res1 <- indentedLine
blanks <- many blankline
res2 <- choice [indentedBlock, indentedLine]
return (res1 ++ blanks ++ res2))
codeBlock = do
result <- choice [indentedBlock, indentedLine]
option "" blanklines
return (CodeBlock (stripTrailingNewlines result))
--
-- block quotes
--
emacsBoxQuote = try (do
failIfStrict
string ",----"
manyTill anyChar newline
raw <- manyTill (try (do
char '|'
option ' ' (char ' ')
result <- manyTill anyChar newline
return result))
(string "`----")
manyTill anyChar newline
option "" blanklines
return raw)
emailBlockQuoteStart = try (do
nonindentSpaces
char blockQuoteChar
option ' ' (char ' ')
return "> ")
emailBlockQuote = try (do
emailBlockQuoteStart
raw <- sepBy (many (choice [nonEndline,
(try (do
endline
notFollowedBy' emailBlockQuoteStart
return '\n'))]))
(try (do {newline; emailBlockQuoteStart}))
newline <|> (do{ eof; return '\n' })
option "" blanklines
return raw)
blockQuote = do
raw <- choice [ emailBlockQuote, emacsBoxQuote ]
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ (joinWithSep "\n" raw) ++ "\n\n"
contents <- parseBlocks
setInput rest
return (BlockQuote contents)
--
-- list blocks
--
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
bulletListStart = try (do
option ' ' newline -- if preceded by a Plain block in a list context
nonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
oneOf bulletListMarkers
spaceChar
skipSpaces)
standardOrderedListStart = try (do
many1 digit
char '.')
extendedOrderedListStart = try (do
failIfStrict
oneOf ['a'..'n']
oneOf ".)")
orderedListStart = try $ do
option ' ' newline -- if preceded by a Plain block in a list context
nonindentSpaces
standardOrderedListStart <|> extendedOrderedListStart
oneOf spaceChars
skipSpaces
-- parse a line of a list item (start = parser for beginning of list item)
listLine start = try (do
notFollowedBy' start
notFollowedBy blankline
notFollowedBy' (do
indentSpaces
many (spaceChar)
choice [bulletListStart, orderedListStart])
line <- manyTill anyChar newline
return (line ++ "\n"))
-- parse raw text for one list item, excluding start marker and continuations
rawListItem start = try (do
start
result <- many1 (listLine start)
blanks <- many blankline
return ((concat result) ++ blanks))
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
listContinuation start = try (do
lookAhead indentSpaces
result <- many1 (listContinuationLine start)
blanks <- many blankline
return ((concat result) ++ blanks))
listContinuationLine start = try (do
notFollowedBy' blankline
notFollowedBy' start
option "" (try indentSpaces)
result <- manyTill anyChar newline
return (result ++ "\n"))
listItem start = try (do
first <- rawListItem start
continuations <- many (listContinuation start)
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
state <- getState
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may contain various block elements:
rest <- getInput
let raw = concat (first:continuations)
setInput raw
contents <- parseBlocks
setInput rest
updateState (\st -> st {stateParserContext = oldContext})
return contents)
orderedList = try (do
items <- many1 (listItem orderedListStart)
let items' = compactify items
return (OrderedList items'))
bulletList = try (do
items <- many1 (listItem bulletListStart)
let items' = compactify items
return (BulletList items'))
-- definition lists
definitionListItem = try $ do
notFollowedBy blankline
notFollowedBy' indentSpaces
term <- manyTill inline newline
raw <- many1 defRawBlock
state <- getState
let oldContext = stateParserContext state
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput (concat raw)
contents <- parseBlocks
setInput rest
updateState (\st -> st {stateParserContext = oldContext})
return ((normalizeSpaces term), contents)
defRawBlock = try $ do
char ':'
state <- getState
let tabStop = stateTabStop state
try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"})
firstline <- anyLine
rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine})
trailing <- option "" blanklines
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
definitionList = do
failIfStrict
items <- many1 definitionListItem
let (terms, defs) = unzip items
let defs' = compactify defs
let items' = zip terms defs'
return $ DefinitionList items'
--
-- paragraph block
--
para = try (do
result <- many1 inline
newline
st <- getState
if stateStrict st
then choice [lookAhead blockQuote, lookAhead header,
(do{blanklines; return Null})]
else choice [(do{lookAhead emacsBoxQuote; return Null}),
(do{blanklines; return Null})]
let result' = normalizeSpaces result
return (Para result'))
plain = do
result <- many1 inline
let result' = normalizeSpaces result
return (Plain result')
--
-- raw html
--
htmlElement = choice [strictHtmlBlock,
htmlBlockElement] <?> "html element"
htmlBlock = do
st <- getState
if stateStrict st
then do
failUnlessBeginningOfLine
first <- htmlElement
finalSpace <- many (oneOf spaceChars)
finalNewlines <- many newline
return (RawHtml (first ++ finalSpace ++ finalNewlines))
else rawHtmlBlocks
-- True if tag is self-closing
isSelfClosing tag =
isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag
strictHtmlBlock = try (do
tag <- anyHtmlBlockTag
let tag' = extractTagType tag
if isSelfClosing tag || tag' == "hr"
then return tag
else do
contents <- many (do{notFollowedBy' (htmlEndTag tag');
htmlElement <|> (count 1 anyChar)})
end <- htmlEndTag tag'
return $ tag ++ (concat contents) ++ end)
rawHtmlBlocks = try (do
htmlBlocks <- many1 rawHtmlBlock
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
let combined' = if (last combined == '\n')
then init combined -- strip extra newline
else combined
return (RawHtml combined'))
--
-- LaTeX
--
rawLaTeXEnvironment' = do
failIfStrict
rawLaTeXEnvironment
--
-- Tables
--
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
return $ (length dashes, length $ dashes ++ sp)
-- Parse a table header with dashed lines of '-' preceded by
-- one line of text.
simpleTableHeader = do
rawContent <- anyLine
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines) = unzip dashes
let indices = scanl (+) (length initSp) lines
let rawHeads = tail $ splitByIndices (init indices) rawContent
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
return $ (rawHeads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
tableFooter = try $ do
nonindentSpaces
many1 (dashedLine '-')
blanklines
-- Parse a table separator - dashed line.
tableSep = try $ do
nonindentSpaces
many1 (dashedLine '-')
string "\n"
-- Parse a raw line and split it into chunks by indices.
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
return $ map removeLeadingTrailingSpace $ tail $
splitByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine indices = try $ do
rawline <- rawTableLine indices
mapM (parseFromStr (many plain)) rawline
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow indices = try $ do
colLines <- many1 (rawTableLine indices)
option "" blanklines
let cols = map unlines $ transpose colLines
mapM (parseFromStr (many plain)) cols
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
-> [Int] -- Indices
-> [Float] -- Fractional relative sizes of columns
widthsFromIndices _ [] = []
widthsFromIndices numColumns indices =
let lengths = zipWith (-) indices (0:indices)
totLength = sum lengths
quotient = if totLength > numColumns
then fromIntegral totLength
else fromIntegral numColumns
fracs = map (\l -> (fromIntegral l) / quotient) lengths in
tail fracs
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption = try $ do
nonindentSpaces
string "Table:"
result <- many1 inline
blanklines
return $ normalizeSpaces result
-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
tableWith headerParser lineParser footerParser = try $ do
(rawHeads, aligns, indices) <- headerParser
lines <- many1Till (lineParser indices) footerParser
caption <- option [] tableCaption
heads <- mapM (parseFromStr (many plain)) rawHeads
state <- getState
let numColumns = stateColumns state
let widths = widthsFromIndices numColumns indices
return $ Table caption aligns widths heads lines
-- Parse a simple table with '---' header and one line per row.
simpleTable = tableWith simpleTableHeader tableLine blanklines
-- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
multilineTableHeader = try $ do
tableSep
rawContent <- many1 (do{notFollowedBy' tableSep;
many1Till anyChar newline})
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines) = unzip dashes
let indices = scanl (+) (length initSp) lines
let rawHeadsList = transpose $ map
(\ln -> tail $ splitByIndices (init indices) ln)
rawContent
let rawHeads = map (joinWithSep " ") rawHeadsList
let aligns = zipWith alignType rawHeadsList lengths
return $ ((map removeLeadingTrailingSpace rawHeads),
aligns, indices)
-- Returns the longest of a list of strings.
longest :: [String] -> String
longest [] = ""
longest [x] = x
longest (x:xs) =
if (length x) >= (maximum $ map length xs)
then x
else longest xs
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
-- dashed line under the rows.
alignType :: [String] -> Int -> Alignment
alignType [] len = AlignDefault
alignType strLst len =
let str = longest $ map removeTrailingSpace strLst
leftSpace = if null str then False else ((str !! 0) `elem` " \t")
rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
(True, True) -> AlignCenter
(False, False) -> AlignDefault
table = do
failIfStrict
result <- simpleTable <|> multilineTable <?> "table"
return result
--
-- inline
--
inline = choice [ rawLaTeXInline'
, escapedChar
, entity
, note
, inlineNote
, link
, referenceLink
, rawHtmlInline'
, autoLink
, image
, escapedChar
, math
, strong
, emph
, smartPunctuation
, code
, ltSign
, symbol
, str
, linebreak
, tabchar
, whitespace
, endline ] <?> "inline"
escapedChar = try $ do
char '\\'
state <- getState
result <- if stateStrict state
then oneOf "\\`*_{}[]()>#+-.!"
else satisfy (not . isAlphaNum)
return (Str [result])
ltSign = try (do
notFollowedBy (noneOf "<") -- continue only if it's a <
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
return (Str ['<']))
specialCharsMinusLt = filter (/= '<') specialChars
symbol = do
result <- oneOf specialCharsMinusLt
return (Str [result])
-- parses inline code, between n codeStarts and n codeEnds
code = try (do
starts <- many1 (char codeStart)
let num = length starts
result <- many1Till anyChar (try (count num (char codeEnd)))
-- get rid of any internal newlines
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
return (Code result'))
mathWord = many1 (choice [ (noneOf (" \t\n\\" ++ [mathEnd])),
(try (do
c <- char '\\'
notFollowedBy (char mathEnd)
return c))])
math = try (do
failIfStrict
char mathStart
notFollowedBy space
words <- sepBy1 mathWord (many1 space)
char mathEnd
return (TeX ("$" ++ (joinWithSep " " words) ++ "$")))
emph = do
result <- choice [ (enclosed (char emphStart) (char emphEnd) inline),
(enclosed (char emphStartAlt) (char emphEndAlt) inline) ]
return (Emph (normalizeSpaces result))
strong = do
result <- (enclosed strongStart strongEnd inline) <|>
(enclosed strongStartAlt strongEndAlt inline)
return (Strong (normalizeSpaces result))
where strongStart = count 2 (char emphStart)
strongEnd = try strongStart
strongStartAlt = count 2 (char emphStartAlt)
strongEndAlt = try strongStartAlt
smartPunctuation = do
failUnlessSmart
choice [ quoted, apostrophe, dash, ellipses ]
apostrophe = do
char '\'' <|> char '\8217'
return Apostrophe
quoted = do
doubleQuoted <|> singleQuoted
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
setState oldState { stateQuoteContext = context }
result <- parser
newState <- getState
setState newState { stateQuoteContext = oldQuoteContext }
return result
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ do
result <- many1Till inline singleQuoteEnd
return $ Quoted SingleQuote $ normalizeSpaces result
doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ do
result <- many1Till inline doubleQuoteEnd
return $ Quoted DoubleQuote $ normalizeSpaces result
failIfInQuoteContext context = do
st <- getState
if (stateQuoteContext st == context)
then fail "already inside quotes"
else return ()
singleQuoteStart = try $ do
failIfInQuoteContext InSingleQuote
char '\8216' <|> do
char '\''
notFollowedBy (oneOf ")!],.;:-? \t\n")
notFollowedBy (try (do -- possessive or contraction
oneOfStrings ["s","t","m","ve","ll","re"]
satisfy (not . isAlphaNum)))
return '\''
singleQuoteEnd = try $ do
char '\'' <|> char '\8217'
notFollowedBy alphaNum
doubleQuoteStart = try $ do
failIfInQuoteContext InDoubleQuote
char '"' <|> char '\8220'
notFollowedBy (oneOf " \t\n")
doubleQuoteEnd = char '"' <|> char '\8221'
ellipses = try (do
oneOfStrings ["...", " . . . ", ". . .", " . . ."]
return Ellipses)
dash = enDash <|> emDash
enDash = try (do
char '-'
notFollowedBy (noneOf "0123456789")
return EnDash)
emDash = try (do
skipSpaces
oneOfStrings ["---", "--"]
skipSpaces
return EmDash)
whitespace = do
many1 (oneOf spaceChars) <?> "whitespace"
return Space
tabchar = do
tab
return (Str "\t")
-- hard line break
linebreak = try (do
oneOf spaceChars
many1 (oneOf spaceChars)
endline
return LineBreak )
nonEndline = noneOf endLineChars
entity = do
ent <- characterEntity
return $ Str [ent]
strChar = noneOf (specialChars ++ spaceChars ++ endLineChars)
str = do
result <- many1 strChar
return (Str result)
-- an endline character that can be treated as a space, not a structural break
endline = try (do
newline
notFollowedBy blankline
st <- getState
if stateStrict st
then do
notFollowedBy' emailBlockQuoteStart
notFollowedBy (char atxHChar) -- atx header
notFollowedBy (try (do{manyTill anyChar newline;
oneOf setextHChars})) -- setext header
else return ()
-- parse potential list-starts differently if in a list:
if (stateParserContext st) == ListItemState
then notFollowedBy' (orderedListStart <|> bulletListStart)
else return ()
return Space)
--
-- links
--
rawLabel = try $ do
char labelStart
-- allow for embedded brackets:
raw <- manyTill (do{res <- rawLabel; return ("[" ++ res ++ "]")} <|>
count 1 anyChar) (char labelEnd)
return $ concat raw
-- a reference label for a link
reference = try $ do
raw <- rawLabel
oldInput <- getInput
setInput raw
label <- many inline
setInput oldInput
return (normalizeSpaces label)
-- source for a link, with optional title
source = try $ do
char srcStart
option ' ' (char autoLinkStart)
src <- many (noneOf [srcEnd, autoLinkEnd, ' ', '\t', '\n'])
option ' ' (char autoLinkEnd)
tit <- option "" title
skipSpaces
char srcEnd
return (removeTrailingSpace src, tit)
titleWith startChar endChar = try (do
leadingSpace <- many1 (oneOf " \t\n")
if length (filter (=='\n') leadingSpace) > 1
then fail "title must be separated by space and on same or next line"
else return ()
char startChar
tit <- manyTill anyChar (try (do
char endChar
skipSpaces
notFollowedBy (noneOf ")\n")))
return $ decodeEntities tit)
title = choice [ titleWith '(' ')',
titleWith '"' '"',
titleWith '\'' '\''] <?> "title"
link = choice [explicitLink, referenceLink] <?> "link"
explicitLink = try (do
label <- reference
src <- source
return (Link label src))
-- a link like [this][ref] or [this][] or [this]
referenceLink = try $ do
label <- reference
ref <- option [] (try (do skipSpaces
option ' ' newline
skipSpaces
reference))
let ref' = if null ref then label else ref
state <- getState
case lookupKeySrc (stateKeys state) ref' of
Nothing -> fail "no corresponding key"
Just target -> return (Link label target)
autoLink = autoLinkEmail <|> autoLinkRegular
-- a link <like@this.com>
autoLinkEmail = try $ do
char autoLinkStart
name <- many1Till (noneOf "/:<> \t\n") (char '@')
domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.')
let src = name ++ "@" ++ (joinWithSep "." domain)
char autoLinkEnd
return $ Link [Str src] (("mailto:" ++ src), "")
-- a link <http://like.this.com>
autoLinkRegular = try $ do
char autoLinkStart
prot <- oneOfStrings ["http:", "ftp:", "mailto:"]
rest <- many1Till (noneOf " \t\n<>") (char autoLinkEnd)
let src = prot ++ rest
return $ Link [Str src] (src, "")
image = try (do
char imageStart
(Link label src) <- link
return (Image label src))
note = try $ do
failIfStrict
ref <- noteMarker
state <- getState
let notes = stateNotes state
case lookup ref notes of
Nothing -> fail "note not found"
Just contents -> return (Note contents)
inlineNote = try $ do
failIfStrict
char noteStart
char labelStart
contents <- manyTill inline (char labelEnd)
return (Note [Para contents])
rawLaTeXInline' = do
failIfStrict
rawLaTeXInline
rawHtmlInline' = do
st <- getState
result <- if stateStrict st
then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
else choice [htmlBlockElement, anyHtmlInlineTag]
return (HtmlInline result)