7701a87a1a
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1324 788f1e2b-df1e-0410-8736-df70ead52e1b
686 lines
22 KiB
Haskell
686 lines
22 KiB
Haskell
{-
|
|
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Readers.RST
|
|
Copyright : Copyright (C) 2006-8 John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
Conversion from reStructuredText to 'Pandoc' document.
|
|
-}
|
|
module Text.Pandoc.Readers.RST (
|
|
readRST
|
|
) where
|
|
import Text.Pandoc.Definition
|
|
import Text.Pandoc.Shared
|
|
import Text.ParserCombinators.Parsec
|
|
import Data.List ( findIndex, delete )
|
|
|
|
-- | Parse reStructuredText string and return Pandoc document.
|
|
readRST :: ParserState -> String -> Pandoc
|
|
readRST state s = (readWith parseRST) state (s ++ "\n\n")
|
|
|
|
--
|
|
-- Constants and data structure definitions
|
|
---
|
|
|
|
bulletListMarkers :: [Char]
|
|
bulletListMarkers = "*+-"
|
|
|
|
underlineChars :: [Char]
|
|
underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
|
|
|
-- treat these as potentially non-text when parsing inline:
|
|
specialChars :: [Char]
|
|
specialChars = "\\`|*_<>$:[-"
|
|
|
|
--
|
|
-- parsing documents
|
|
--
|
|
|
|
isHeader :: Int -> Block -> Bool
|
|
isHeader n (Header x _) = x == n
|
|
isHeader _ _ = False
|
|
|
|
-- | Promote all headers in a list of blocks. (Part of
|
|
-- title transformation for RST.)
|
|
promoteHeaders :: Int -> [Block] -> [Block]
|
|
promoteHeaders num ((Header level text):rest) =
|
|
(Header (level - num) text):(promoteHeaders num rest)
|
|
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
|
|
promoteHeaders _ [] = []
|
|
|
|
-- | If list of blocks starts with a header (or a header and subheader)
|
|
-- of level that are not found elsewhere, return it as a title and
|
|
-- promote all the other headers.
|
|
titleTransform :: [Block] -- ^ list of blocks
|
|
-> ([Block], [Inline]) -- ^ modified list of blocks, title
|
|
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
|
|
if (any (isHeader 1) rest) || (any (isHeader 2) rest)
|
|
then ((Header 1 head1):(Header 2 head2):rest, [])
|
|
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
|
|
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
|
|
if (any (isHeader 1) rest)
|
|
then ((Header 1 head1):rest, [])
|
|
else ((promoteHeaders 1 rest), head1)
|
|
titleTransform blocks = (blocks, [])
|
|
|
|
parseRST :: GenParser Char ParserState Pandoc
|
|
parseRST = do
|
|
startPos <- getPosition
|
|
-- go through once just to get list of reference keys
|
|
-- docMinusKeys is the raw document with blanks where the keys were...
|
|
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
|
|
setInput docMinusKeys
|
|
setPosition startPos
|
|
st <- getState
|
|
let reversedKeys = stateKeys st
|
|
updateState $ \s -> s { stateKeys = reverse reversedKeys }
|
|
-- now parse it for real...
|
|
blocks <- parseBlocks
|
|
let blocks' = filter (/= Null) blocks
|
|
state <- getState
|
|
let (blocks'', title) = if stateStandalone state
|
|
then titleTransform blocks'
|
|
else (blocks', [])
|
|
let authors = stateAuthors state
|
|
let date = stateDate state
|
|
let title' = if (null title) then (stateTitle state) else title
|
|
return $ Pandoc (Meta title' authors date) blocks''
|
|
|
|
--
|
|
-- parsing blocks
|
|
--
|
|
|
|
parseBlocks :: GenParser Char ParserState [Block]
|
|
parseBlocks = manyTill block eof
|
|
|
|
block :: GenParser Char ParserState Block
|
|
block = choice [ codeBlock
|
|
, rawHtmlBlock
|
|
, rawLaTeXBlock
|
|
, fieldList
|
|
, blockQuote
|
|
, imageBlock
|
|
, unknownDirective
|
|
, header
|
|
, hrule
|
|
, list
|
|
, lineBlock
|
|
, para
|
|
, plain
|
|
, nullBlock ] <?> "block"
|
|
|
|
--
|
|
-- field list
|
|
--
|
|
|
|
fieldListItem :: String -> GenParser Char st ([Char], [Char])
|
|
fieldListItem indent = try $ do
|
|
string indent
|
|
char ':'
|
|
name <- many1 alphaNum
|
|
string ": "
|
|
skipSpaces
|
|
first <- manyTill anyChar newline
|
|
rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
|
|
indentedBlock
|
|
return (name, joinWithSep " " (first:(lines rest)))
|
|
|
|
fieldList :: GenParser Char ParserState Block
|
|
fieldList = try $ do
|
|
indent <- lookAhead $ many (oneOf " \t")
|
|
items <- many1 $ fieldListItem indent
|
|
blanklines
|
|
let authors = case lookup "Authors" items of
|
|
Just auth -> [auth]
|
|
Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
|
|
if null authors
|
|
then return ()
|
|
else updateState $ \st -> st {stateAuthors = authors}
|
|
case (lookup "Date" items) of
|
|
Just dat -> updateState $ \st -> st {stateDate = dat}
|
|
Nothing -> return ()
|
|
case (lookup "Title" items) of
|
|
Just tit -> parseFromString (many inline) tit >>=
|
|
\t -> updateState $ \st -> st {stateTitle = t}
|
|
Nothing -> return ()
|
|
let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
|
|
(x /= "Date") && (x /= "Title")) items
|
|
if null remaining
|
|
then return Null
|
|
else do terms <- mapM (return . (:[]) . Str . fst) remaining
|
|
defs <- mapM (parseFromString (many block) . snd)
|
|
remaining
|
|
return $ DefinitionList $ zip terms defs
|
|
|
|
--
|
|
-- line block
|
|
--
|
|
|
|
lineBlockLine :: GenParser Char ParserState [Inline]
|
|
lineBlockLine = try $ do
|
|
string "| "
|
|
white <- many (oneOf " \t")
|
|
line <- manyTill inline newline
|
|
return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
|
|
|
|
lineBlock :: GenParser Char ParserState Block
|
|
lineBlock = try $ do
|
|
lines' <- many1 lineBlockLine
|
|
blanklines
|
|
return $ Para (concat lines')
|
|
|
|
--
|
|
-- paragraph block
|
|
--
|
|
|
|
para :: GenParser Char ParserState Block
|
|
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
|
|
|
|
codeBlockStart :: GenParser Char st Char
|
|
codeBlockStart = string "::" >> blankline >> blankline
|
|
|
|
-- paragraph that ends in a :: starting a code block
|
|
paraBeforeCodeBlock :: GenParser Char ParserState Block
|
|
paraBeforeCodeBlock = try $ do
|
|
result <- many1 (notFollowedBy' codeBlockStart >> inline)
|
|
lookAhead (string "::")
|
|
return $ Para $ if last result == Space
|
|
then normalizeSpaces result
|
|
else (normalizeSpaces result) ++ [Str ":"]
|
|
|
|
-- regular paragraph
|
|
paraNormal :: GenParser Char ParserState Block
|
|
paraNormal = try $ do
|
|
result <- many1 inline
|
|
newline
|
|
blanklines
|
|
return $ Para $ normalizeSpaces result
|
|
|
|
plain :: GenParser Char ParserState Block
|
|
plain = many1 inline >>= return . Plain . normalizeSpaces
|
|
|
|
--
|
|
-- image block
|
|
--
|
|
|
|
imageBlock :: GenParser Char st Block
|
|
imageBlock = try $ do
|
|
string ".. image:: "
|
|
src <- manyTill anyChar newline
|
|
fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
|
|
many1 $ fieldListItem indent
|
|
optional blanklines
|
|
case lookup "alt" fields of
|
|
Just alt -> return $ Plain [Image [Str alt] (src, alt)]
|
|
Nothing -> return $ Plain [Image [Str "image"] (src, "")]
|
|
--
|
|
-- header blocks
|
|
--
|
|
|
|
header :: GenParser Char ParserState Block
|
|
header = doubleHeader <|> singleHeader <?> "header"
|
|
|
|
-- a header with lines on top and bottom
|
|
doubleHeader :: GenParser Char ParserState Block
|
|
doubleHeader = try $ do
|
|
c <- oneOf underlineChars
|
|
rest <- many (char c) -- the top line
|
|
let lenTop = length (c:rest)
|
|
skipSpaces
|
|
newline
|
|
txt <- many1 (notFollowedBy blankline >> inline)
|
|
pos <- getPosition
|
|
let len = (sourceColumn pos) - 1
|
|
if (len > lenTop) then fail "title longer than border" else return ()
|
|
blankline -- spaces and newline
|
|
count lenTop (char c) -- the bottom line
|
|
blanklines
|
|
-- check to see if we've had this kind of header before.
|
|
-- if so, get appropriate level. if not, add to list.
|
|
state <- getState
|
|
let headerTable = stateHeaderTable state
|
|
let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
|
|
Just ind -> (headerTable, ind + 1)
|
|
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
|
|
setState (state { stateHeaderTable = headerTable' })
|
|
return $ Header level (normalizeSpaces txt)
|
|
|
|
-- a header with line on the bottom only
|
|
singleHeader :: GenParser Char ParserState Block
|
|
singleHeader = try $ do
|
|
notFollowedBy' whitespace
|
|
txt <- many1 (do {notFollowedBy blankline; inline})
|
|
pos <- getPosition
|
|
let len = (sourceColumn pos) - 1
|
|
blankline
|
|
c <- oneOf underlineChars
|
|
count (len - 1) (char c)
|
|
many (char c)
|
|
blanklines
|
|
state <- getState
|
|
let headerTable = stateHeaderTable state
|
|
let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
|
|
Just ind -> (headerTable, ind + 1)
|
|
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
|
|
setState (state { stateHeaderTable = headerTable' })
|
|
return $ Header level (normalizeSpaces txt)
|
|
|
|
--
|
|
-- hrule block
|
|
--
|
|
|
|
hrule :: GenParser Char st Block
|
|
hrule = try $ do
|
|
chr <- oneOf underlineChars
|
|
count 3 (char chr)
|
|
skipMany (char chr)
|
|
blankline
|
|
blanklines
|
|
return HorizontalRule
|
|
|
|
--
|
|
-- code blocks
|
|
--
|
|
|
|
-- read a line indented by a given string
|
|
indentedLine :: String -> GenParser Char st [Char]
|
|
indentedLine indents = try $ do
|
|
string indents
|
|
result <- manyTill anyChar newline
|
|
return $ result ++ "\n"
|
|
|
|
-- two or more indented lines, possibly separated by blank lines.
|
|
-- any amount of indentation will work.
|
|
indentedBlock :: GenParser Char st [Char]
|
|
indentedBlock = do
|
|
indents <- lookAhead $ many1 (oneOf " \t")
|
|
lns <- many $ choice $ [ indentedLine indents,
|
|
try $ do b <- blanklines
|
|
l <- indentedLine indents
|
|
return (b ++ l) ]
|
|
optional blanklines
|
|
return $ concat lns
|
|
|
|
codeBlock :: GenParser Char st Block
|
|
codeBlock = try $ do
|
|
codeBlockStart
|
|
result <- indentedBlock
|
|
return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
|
|
|
|
--
|
|
-- raw html
|
|
--
|
|
|
|
rawHtmlBlock :: GenParser Char st Block
|
|
rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
|
|
indentedBlock >>= return . RawHtml
|
|
|
|
--
|
|
-- raw latex
|
|
--
|
|
|
|
rawLaTeXBlock :: GenParser Char st Block
|
|
rawLaTeXBlock = try $ do
|
|
string ".. raw:: latex"
|
|
blanklines
|
|
result <- indentedBlock
|
|
return $ Para [(TeX result)]
|
|
|
|
--
|
|
-- block quotes
|
|
--
|
|
|
|
blockQuote :: GenParser Char ParserState Block
|
|
blockQuote = do
|
|
raw <- indentedBlock
|
|
-- parse the extracted block, which may contain various block elements:
|
|
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
|
|
return $ BlockQuote contents
|
|
|
|
--
|
|
-- list blocks
|
|
--
|
|
|
|
list :: GenParser Char ParserState Block
|
|
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
|
|
|
|
definitionListItem :: GenParser Char ParserState ([Inline], [Block])
|
|
definitionListItem = try $ do
|
|
term <- many1Till inline endline
|
|
raw <- indentedBlock
|
|
-- parse the extracted block, which may contain various block elements:
|
|
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
|
|
return (normalizeSpaces term, contents)
|
|
|
|
definitionList :: GenParser Char ParserState Block
|
|
definitionList = many1 definitionListItem >>= return . DefinitionList
|
|
|
|
-- parses bullet list start and returns its length (inc. following whitespace)
|
|
bulletListStart :: GenParser Char st Int
|
|
bulletListStart = try $ do
|
|
notFollowedBy' hrule -- because hrules start out just like lists
|
|
marker <- oneOf bulletListMarkers
|
|
white <- many1 spaceChar
|
|
return $ length (marker:white)
|
|
|
|
-- parses ordered list start and returns its length (inc following whitespace)
|
|
orderedListStart :: ListNumberStyle
|
|
-> ListNumberDelim
|
|
-> GenParser Char st Int
|
|
orderedListStart style delim = try $ do
|
|
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
|
|
white <- many1 spaceChar
|
|
return $ markerLen + length white
|
|
|
|
-- parse a line of a list item
|
|
listLine :: Int -> GenParser Char ParserState [Char]
|
|
listLine markerLength = try $ do
|
|
notFollowedBy blankline
|
|
indentWith markerLength
|
|
line <- manyTill anyChar newline
|
|
return $ line ++ "\n"
|
|
|
|
-- indent by specified number of spaces (or equiv. tabs)
|
|
indentWith :: Int -> GenParser Char ParserState [Char]
|
|
indentWith num = do
|
|
state <- getState
|
|
let tabStop = stateTabStop state
|
|
if (num < tabStop)
|
|
then count num (char ' ')
|
|
else choice [ try (count num (char ' ')),
|
|
(try (char '\t' >> count (num - tabStop) (char ' '))) ]
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
|
rawListItem :: GenParser Char ParserState Int
|
|
-> GenParser Char ParserState (Int, [Char])
|
|
rawListItem start = try $ do
|
|
markerLength <- start
|
|
firstLine <- manyTill anyChar newline
|
|
restLines <- many (listLine markerLength)
|
|
return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
|
|
|
|
-- continuation of a list item - indented and separated by blankline or
|
|
-- (in compact lists) endline.
|
|
-- Note: nested lists are parsed as continuations.
|
|
listContinuation :: Int -> GenParser Char ParserState [Char]
|
|
listContinuation markerLength = try $ do
|
|
blanks <- many1 blankline
|
|
result <- many1 (listLine markerLength)
|
|
return $ blanks ++ concat result
|
|
|
|
listItem :: GenParser Char ParserState Int
|
|
-> GenParser Char ParserState [Block]
|
|
listItem start = try $ do
|
|
(markerLength, first) <- rawListItem start
|
|
rest <- many (listContinuation markerLength)
|
|
blanks <- choice [ try (many blankline >>~ lookAhead start),
|
|
many1 blankline ] -- whole list must end with blank.
|
|
-- parsing with ListItemState forces markers at beginning of lines to
|
|
-- count as list item markers, even if not separated by blank space.
|
|
-- see definition of "endline"
|
|
state <- getState
|
|
let oldContext = stateParserContext state
|
|
setState $ state {stateParserContext = ListItemState}
|
|
-- parse the extracted block, which may itself contain block elements
|
|
parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
|
return parsed
|
|
|
|
orderedList :: GenParser Char ParserState Block
|
|
orderedList = try $ do
|
|
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
|
|
items <- many1 (listItem (orderedListStart style delim))
|
|
let items' = compactify items
|
|
return $ OrderedList (start, style, delim) items'
|
|
|
|
bulletList :: GenParser Char ParserState Block
|
|
bulletList = many1 (listItem bulletListStart) >>=
|
|
return . BulletList . compactify
|
|
|
|
--
|
|
-- unknown directive (e.g. comment)
|
|
--
|
|
|
|
unknownDirective :: GenParser Char st Block
|
|
unknownDirective = try $ do
|
|
string ".. "
|
|
manyTill anyChar newline
|
|
many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
|
|
many1 (noneOf "\n") >> newline)
|
|
optional blanklines
|
|
return Null
|
|
|
|
--
|
|
-- reference key
|
|
--
|
|
|
|
quotedReferenceName :: GenParser Char ParserState [Inline]
|
|
quotedReferenceName = try $ do
|
|
char '`' >> notFollowedBy (char '`') -- `` means inline code!
|
|
label' <- many1Till inline (char '`')
|
|
return label'
|
|
|
|
unquotedReferenceName :: GenParser Char ParserState [Inline]
|
|
unquotedReferenceName = try $ do
|
|
label' <- many1Till inline (lookAhead $ char ':')
|
|
return label'
|
|
|
|
isolated :: Char -> GenParser Char st Char
|
|
isolated ch = try $ char ch >>~ notFollowedBy (char ch)
|
|
|
|
simpleReferenceName :: GenParser Char st [Inline]
|
|
simpleReferenceName = do
|
|
raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
|
|
(try $ char '_' >>~ lookAhead alphaNum))
|
|
return [Str raw]
|
|
|
|
referenceName :: GenParser Char ParserState [Inline]
|
|
referenceName = quotedReferenceName <|>
|
|
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
|
|
unquotedReferenceName
|
|
|
|
referenceKey :: GenParser Char ParserState [Char]
|
|
referenceKey = do
|
|
startPos <- getPosition
|
|
key <- choice [imageKey, anonymousKey, regularKey]
|
|
st <- getState
|
|
let oldkeys = stateKeys st
|
|
updateState $ \s -> s { stateKeys = key : oldkeys }
|
|
optional blanklines
|
|
endPos <- getPosition
|
|
-- return enough blanks to replace key
|
|
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
|
|
|
|
targetURI :: GenParser Char st [Char]
|
|
targetURI = do
|
|
skipSpaces
|
|
optional newline
|
|
contents <- many1 (try (many spaceChar >> newline >>
|
|
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
|
|
blanklines
|
|
return contents
|
|
|
|
imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
|
|
imageKey = try $ do
|
|
string ".. |"
|
|
ref <- manyTill inline (char '|')
|
|
skipSpaces
|
|
string "image::"
|
|
src <- targetURI
|
|
return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
|
|
|
|
anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
|
|
anonymousKey = try $ do
|
|
oneOfStrings [".. __:", "__"]
|
|
src <- targetURI
|
|
return ([Str "_"], (removeLeadingTrailingSpace src, ""))
|
|
|
|
regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
|
|
regularKey = try $ do
|
|
string ".. _"
|
|
ref <- referenceName
|
|
char ':'
|
|
src <- targetURI
|
|
return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
|
|
|
|
--
|
|
-- inline
|
|
--
|
|
|
|
inline :: GenParser Char ParserState Inline
|
|
inline = choice [ link
|
|
, str
|
|
, whitespace
|
|
, endline
|
|
, strong
|
|
, emph
|
|
, code
|
|
, image
|
|
, hyphens
|
|
, superscript
|
|
, subscript
|
|
, escapedChar
|
|
, symbol ] <?> "inline"
|
|
|
|
hyphens :: GenParser Char ParserState Inline
|
|
hyphens = do
|
|
result <- many1 (char '-')
|
|
option Space endline
|
|
-- don't want to treat endline after hyphen or dash as a space
|
|
return $ Str result
|
|
|
|
escapedChar :: GenParser Char st Inline
|
|
escapedChar = escaped anyChar
|
|
|
|
symbol :: GenParser Char ParserState Inline
|
|
symbol = do
|
|
result <- oneOf specialChars
|
|
return $ Str [result]
|
|
|
|
-- parses inline code, between codeStart and codeEnd
|
|
code :: GenParser Char ParserState Inline
|
|
code = try $ do
|
|
string "``"
|
|
result <- manyTill anyChar (try (string "``"))
|
|
return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
|
|
|
|
emph :: GenParser Char ParserState Inline
|
|
emph = enclosed (char '*') (char '*') inline >>=
|
|
return . Emph . normalizeSpaces
|
|
|
|
strong :: GenParser Char ParserState Inline
|
|
strong = enclosed (string "**") (try $ string "**") inline >>=
|
|
return . Strong . normalizeSpaces
|
|
|
|
interpreted :: [Char] -> GenParser Char st [Inline]
|
|
interpreted role = try $ do
|
|
optional $ try $ string "\\ "
|
|
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
|
|
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
|
|
return [Str result]
|
|
|
|
superscript :: GenParser Char ParserState Inline
|
|
superscript = interpreted "sup" >>= (return . Superscript)
|
|
|
|
subscript :: GenParser Char ParserState Inline
|
|
subscript = interpreted "sub" >>= (return . Subscript)
|
|
|
|
whitespace :: GenParser Char ParserState Inline
|
|
whitespace = many1 spaceChar >> return Space <?> "whitespace"
|
|
|
|
str :: GenParser Char ParserState Inline
|
|
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
|
|
|
|
-- an endline character that can be treated as a space, not a structural break
|
|
endline :: GenParser Char ParserState Inline
|
|
endline = try $ do
|
|
newline
|
|
notFollowedBy blankline
|
|
-- parse potential list-starts at beginning of line differently in a list:
|
|
st <- getState
|
|
if (stateParserContext st) == ListItemState
|
|
then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
|
|
notFollowedBy' bulletListStart
|
|
else return ()
|
|
return Space
|
|
|
|
--
|
|
-- links
|
|
--
|
|
|
|
link :: GenParser Char ParserState Inline
|
|
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
|
|
|
|
explicitLink :: GenParser Char ParserState Inline
|
|
explicitLink = try $ do
|
|
char '`'
|
|
notFollowedBy (char '`') -- `` marks start of inline code
|
|
label' <- manyTill (notFollowedBy (char '`') >> inline)
|
|
(try (spaces >> char '<'))
|
|
src <- manyTill (noneOf ">\n ") (char '>')
|
|
skipSpaces
|
|
string "`_"
|
|
return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "")
|
|
|
|
referenceLink :: GenParser Char ParserState Inline
|
|
referenceLink = try $ do
|
|
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
|
|
key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
|
|
state <- getState
|
|
let keyTable = stateKeys state
|
|
src <- case lookupKeySrc keyTable key of
|
|
Nothing -> fail "no corresponding key"
|
|
Just target -> return target
|
|
-- if anonymous link, remove first anon key so it won't be used again
|
|
let keyTable' = if (key == [Str "_"]) -- anonymous link?
|
|
then delete ([Str "_"], src) keyTable -- remove first anon key
|
|
else keyTable
|
|
setState $ state { stateKeys = keyTable' }
|
|
return $ Link (normalizeSpaces label') src
|
|
|
|
autoURI :: GenParser Char ParserState Inline
|
|
autoURI = do
|
|
src <- uri
|
|
return $ Link [Str src] (src, "")
|
|
|
|
autoEmail :: GenParser Char ParserState Inline
|
|
autoEmail = do
|
|
src <- emailAddress
|
|
return $ Link [Str src] ("mailto:" ++ src, "")
|
|
|
|
autoLink :: GenParser Char ParserState Inline
|
|
autoLink = autoURI <|> autoEmail
|
|
|
|
-- For now, we assume that all substitution references are for images.
|
|
image :: GenParser Char ParserState Inline
|
|
image = try $ do
|
|
char '|'
|
|
ref <- manyTill inline (char '|')
|
|
state <- getState
|
|
let keyTable = stateKeys state
|
|
src <- case lookupKeySrc keyTable ref of
|
|
Nothing -> fail "no corresponding key"
|
|
Just target -> return target
|
|
return $ Image (normalizeSpaces ref) src
|
|
|