2006-12-20 20:54:23 +00:00
|
|
|
{-
|
|
|
|
Copyright (C) 2006 John MacFarlane <jgm at berkeley dot edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Readers.RST
|
|
|
|
Copyright : Copyright (C) 2006 John MacFarlane
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm at berkeley dot edu>
|
2006-12-20 20:20:10 +00:00
|
|
|
Stability : alpha
|
2006-12-20 06:50:14 +00:00
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Conversion from reStructuredText to 'Pandoc' document.
|
|
|
|
-}
|
2006-10-17 14:22:29 +00:00
|
|
|
module Text.Pandoc.Readers.RST (
|
|
|
|
readRST
|
|
|
|
) where
|
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Text.ParserCombinators.Pandoc
|
|
|
|
import Text.Pandoc.Shared
|
|
|
|
import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
|
|
|
|
import Text.Regex ( matchRegex, mkRegex )
|
|
|
|
import Text.ParserCombinators.Parsec
|
|
|
|
import Data.Maybe ( fromMaybe )
|
|
|
|
import List ( findIndex )
|
|
|
|
import Char ( toUpper )
|
|
|
|
|
|
|
|
-- | Parse reStructuredText string and return Pandoc document.
|
|
|
|
readRST :: ParserState -> String -> Pandoc
|
|
|
|
readRST = readWith parseRST
|
|
|
|
|
|
|
|
-- | Parse a string and print result (for testing).
|
|
|
|
testString :: String -> IO ()
|
|
|
|
testString = testStringWith parseRST
|
|
|
|
|
|
|
|
--
|
|
|
|
-- Constants and data structure definitions
|
|
|
|
---
|
|
|
|
|
|
|
|
bulletListMarkers = "*+-"
|
|
|
|
underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
|
|
|
|
|
|
|
-- treat these as potentially non-text when parsing inline:
|
|
|
|
specialChars = "\\`|*_<>$:[-"
|
|
|
|
|
|
|
|
--
|
|
|
|
-- parsing documents
|
|
|
|
--
|
|
|
|
|
|
|
|
isAnonKeyBlock block = case block of
|
|
|
|
(Key [Str "_"] str) -> True
|
|
|
|
otherwise -> False
|
|
|
|
|
|
|
|
isNotAnonKeyBlock block = not (isAnonKeyBlock block)
|
|
|
|
|
|
|
|
isHeader1 :: Block -> Bool
|
|
|
|
isHeader1 (Header 1 _) = True
|
|
|
|
isHeader1 _ = False
|
|
|
|
|
|
|
|
isHeader2 :: Block -> Bool
|
|
|
|
isHeader2 (Header 2 _) = True
|
|
|
|
isHeader2 _ = False
|
|
|
|
|
|
|
|
-- | Promote all headers in a list of blocks. (Part of
|
|
|
|
-- title transformation for RST.)
|
|
|
|
promoteHeaders :: Int -> [Block] -> [Block]
|
|
|
|
promoteHeaders num ((Header level text):rest) =
|
|
|
|
(Header (level - num) text):(promoteHeaders num rest)
|
|
|
|
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
|
|
|
|
promoteHeaders num [] = []
|
|
|
|
|
|
|
|
-- | If list of blocks starts with a header (or a header and subheader)
|
|
|
|
-- of level that are not found elsewhere, return it as a title and
|
|
|
|
-- promote all the other headers.
|
|
|
|
titleTransform :: [Block] -- ^ list of blocks
|
|
|
|
-> ([Block], [Inline]) -- ^ modified list of blocks, title
|
2006-12-20 06:50:14 +00:00
|
|
|
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
|
|
|
|
if (any isHeader1 rest) || (any isHeader2 rest)
|
|
|
|
then ((Header 1 head1):(Header 2 head2):rest, [])
|
|
|
|
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
|
2006-10-17 14:22:29 +00:00
|
|
|
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
|
2006-12-20 06:50:14 +00:00
|
|
|
if (any isHeader1 rest)
|
|
|
|
then ((Header 1 head1):rest, [])
|
|
|
|
else ((promoteHeaders 1 rest), head1)
|
2006-10-17 14:22:29 +00:00
|
|
|
titleTransform blocks = (blocks, [])
|
|
|
|
|
|
|
|
parseRST = do
|
2006-12-31 17:34:26 +00:00
|
|
|
-- first pass: get anonymous keys
|
2007-02-15 01:33:35 +00:00
|
|
|
keyBlocks <- lookAhead $ manyTill (anonymousKey <|> (do{anyLine; return Null})) eof
|
2006-12-31 17:34:26 +00:00
|
|
|
let anonymousKeys = filter (/= Null) keyBlocks
|
|
|
|
-- run parser again to fill in anonymous links...
|
2007-02-15 01:43:12 +00:00
|
|
|
updateState (\st -> st { stateKeyBlocks = anonymousKeys })
|
|
|
|
state <- getState
|
2006-12-31 17:34:26 +00:00
|
|
|
blocks <- parseBlocks
|
|
|
|
let blocks' = filter isNotAnonKeyBlock blocks
|
2006-12-20 06:50:14 +00:00
|
|
|
let (blocks'', title) = if stateStandalone state
|
2006-12-31 17:34:26 +00:00
|
|
|
then titleTransform blocks'
|
|
|
|
else (blocks', [])
|
|
|
|
state' <- getState
|
|
|
|
let authors = stateAuthors state'
|
|
|
|
let date = stateDate state'
|
|
|
|
let title' = if (null title) then (stateTitle state') else title
|
2006-10-17 14:22:29 +00:00
|
|
|
return (Pandoc (Meta title' authors date) blocks'')
|
|
|
|
|
|
|
|
--
|
|
|
|
-- parsing blocks
|
|
|
|
--
|
|
|
|
|
2006-12-31 17:34:26 +00:00
|
|
|
parseBlocks = manyTill block eof
|
2006-10-17 14:22:29 +00:00
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
|
|
|
|
referenceKey, imageBlock, unknownDirective, header,
|
|
|
|
hrule, list, fieldList, lineBlock, para, plain,
|
2007-03-17 20:23:47 +00:00
|
|
|
nullBlock ] <?> "block"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- field list
|
|
|
|
--
|
|
|
|
|
|
|
|
fieldListItem = try (do
|
|
|
|
char ':'
|
|
|
|
name <- many1 alphaNum
|
|
|
|
string ": "
|
|
|
|
skipSpaces
|
|
|
|
first <- manyTill anyChar newline
|
2006-12-20 06:50:14 +00:00
|
|
|
rest <- many (do
|
|
|
|
notFollowedBy (char ':')
|
|
|
|
notFollowedBy blankline
|
|
|
|
skipSpaces
|
|
|
|
manyTill anyChar newline )
|
2006-10-17 14:22:29 +00:00
|
|
|
return (name, (joinWithSep " " (first:rest))))
|
|
|
|
|
|
|
|
fieldList = try (do
|
|
|
|
items <- many1 fieldListItem
|
|
|
|
blanklines
|
|
|
|
let authors = case (lookup "Authors" items) of
|
2006-12-20 06:50:14 +00:00
|
|
|
Just auth -> [auth]
|
|
|
|
Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
|
2006-10-17 14:22:29 +00:00
|
|
|
let date = case (lookup "Date" items) of
|
2006-12-20 06:50:14 +00:00
|
|
|
Just dat -> dat
|
|
|
|
Nothing -> ""
|
2006-10-17 14:22:29 +00:00
|
|
|
let title = case (lookup "Title" items) of
|
2006-12-20 06:50:14 +00:00
|
|
|
Just tit -> [Str tit]
|
|
|
|
Nothing -> []
|
|
|
|
let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&
|
|
|
|
(x /= "Date") && (x /= "Title")) items
|
|
|
|
let result = map (\(x,y) ->
|
|
|
|
Para [Strong [Str x], Str ":", Space, Str y]) remaining
|
|
|
|
updateState (\st -> st { stateAuthors = authors,
|
|
|
|
stateDate = date,
|
|
|
|
stateTitle = title })
|
2006-10-17 14:22:29 +00:00
|
|
|
return (BlockQuote result))
|
|
|
|
|
|
|
|
--
|
|
|
|
-- line block
|
|
|
|
--
|
|
|
|
|
|
|
|
lineBlockLine = try (do
|
|
|
|
string "| "
|
|
|
|
white <- many (oneOf " \t")
|
|
|
|
line <- manyTill inline newline
|
|
|
|
let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
|
|
|
|
return line')
|
|
|
|
|
|
|
|
lineBlock = try (do
|
|
|
|
lines <- many1 lineBlockLine
|
|
|
|
blanklines
|
|
|
|
return $ Para (concat lines))
|
|
|
|
|
|
|
|
--
|
|
|
|
-- paragraph block
|
|
|
|
--
|
|
|
|
|
|
|
|
para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
|
|
|
|
|
|
|
|
codeBlockStart = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string "::"
|
|
|
|
blankline
|
|
|
|
blankline)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- paragraph that ends in a :: starting a code block
|
|
|
|
paraBeforeCodeBlock = try (do
|
|
|
|
result <- many1 (do {notFollowedBy' codeBlockStart; inline})
|
2007-02-15 01:10:15 +00:00
|
|
|
lookAhead (string "::")
|
2006-12-20 06:50:14 +00:00
|
|
|
return (Para (if (last result == Space)
|
|
|
|
then normalizeSpaces result
|
|
|
|
else (normalizeSpaces result) ++ [Str ":"])))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- regular paragraph
|
|
|
|
paraNormal = try (do
|
|
|
|
result <- many1 inline
|
|
|
|
newline
|
|
|
|
blanklines
|
|
|
|
let result' = normalizeSpaces result
|
|
|
|
return (Para result'))
|
|
|
|
|
|
|
|
plain = do
|
|
|
|
result <- many1 inline
|
|
|
|
let result' = normalizeSpaces result
|
|
|
|
return (Plain result')
|
|
|
|
|
|
|
|
--
|
|
|
|
-- image block
|
|
|
|
--
|
|
|
|
|
|
|
|
imageBlock = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. image:: "
|
|
|
|
src <- manyTill anyChar newline
|
|
|
|
return (Plain [Image [Str "image"] (Src src "")]))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- header blocks
|
|
|
|
--
|
|
|
|
|
|
|
|
header = choice [ doubleHeader, singleHeader ] <?> "header"
|
|
|
|
|
|
|
|
-- a header with lines on top and bottom
|
|
|
|
doubleHeader = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
c <- oneOf underlineChars
|
|
|
|
rest <- many (char c) -- the top line
|
|
|
|
let lenTop = length (c:rest)
|
|
|
|
skipSpaces
|
|
|
|
newline
|
|
|
|
txt <- many1 (do {notFollowedBy blankline; inline})
|
|
|
|
pos <- getPosition
|
|
|
|
let len = (sourceColumn pos) - 1
|
|
|
|
if (len > lenTop) then fail "title longer than border" else (do {return ()})
|
|
|
|
blankline -- spaces and newline
|
|
|
|
count lenTop (char c) -- the bottom line
|
|
|
|
blanklines
|
|
|
|
-- check to see if we've had this kind of header before.
|
|
|
|
-- if so, get appropriate level. if not, add to list.
|
|
|
|
state <- getState
|
|
|
|
let headerTable = stateHeaderTable state
|
|
|
|
let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
|
|
|
|
Just ind -> (headerTable, ind + 1)
|
|
|
|
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
|
|
|
|
setState (state { stateHeaderTable = headerTable' })
|
|
|
|
return (Header level (normalizeSpaces txt)))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- a header with line on the bottom only
|
|
|
|
singleHeader = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
notFollowedBy' whitespace
|
|
|
|
txt <- many1 (do {notFollowedBy blankline; inline})
|
|
|
|
pos <- getPosition
|
|
|
|
let len = (sourceColumn pos) - 1
|
|
|
|
blankline
|
|
|
|
c <- oneOf underlineChars
|
|
|
|
rest <- count (len - 1) (char c)
|
|
|
|
many (char c)
|
|
|
|
blanklines
|
|
|
|
state <- getState
|
|
|
|
let headerTable = stateHeaderTable state
|
|
|
|
let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
|
|
|
|
Just ind -> (headerTable, ind + 1)
|
|
|
|
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
|
|
|
|
setState (state { stateHeaderTable = headerTable' })
|
|
|
|
return (Header level (normalizeSpaces txt)))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- hrule block
|
|
|
|
--
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
hruleWith chr = try (do
|
|
|
|
count 4 (char chr)
|
|
|
|
skipMany (char chr)
|
|
|
|
skipSpaces
|
|
|
|
newline
|
|
|
|
blanklines
|
|
|
|
return HorizontalRule)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
hrule = choice (map hruleWith underlineChars) <?> "hrule"
|
|
|
|
|
|
|
|
--
|
|
|
|
-- code blocks
|
|
|
|
--
|
|
|
|
|
|
|
|
-- read a line indented by a given string
|
|
|
|
indentedLine indents = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string indents
|
|
|
|
result <- manyTill anyChar newline
|
|
|
|
return (result ++ "\n"))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- two or more indented lines, possibly separated by blank lines
|
|
|
|
-- if variable = True, then any indent will work, but it must be consistent through the block
|
|
|
|
-- if variable = False, indent should be one tab or equivalent in spaces
|
|
|
|
indentedBlock variable = try (do
|
|
|
|
state <- getState
|
|
|
|
let tabStop = stateTabStop state
|
2006-12-20 06:50:14 +00:00
|
|
|
indents <- if variable
|
|
|
|
then many1 (oneOf " \t")
|
|
|
|
else oneOfStrings ["\t", (replicate tabStop ' ')]
|
2006-10-17 14:22:29 +00:00
|
|
|
firstline <- manyTill anyChar newline
|
|
|
|
rest <- many (choice [ indentedLine indents,
|
2006-12-20 06:50:14 +00:00
|
|
|
try (do
|
|
|
|
b <- blanklines
|
|
|
|
l <- indentedLine indents
|
|
|
|
return (b ++ l))])
|
2006-10-17 14:22:29 +00:00
|
|
|
option "" blanklines
|
|
|
|
return (firstline ++ "\n" ++ (concat rest)))
|
|
|
|
|
|
|
|
codeBlock = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
codeBlockStart
|
|
|
|
result <- indentedBlock False
|
|
|
|
-- the False means we want one tab stop indent on each line
|
|
|
|
return (CodeBlock (stripTrailingNewlines result)))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- raw html
|
|
|
|
--
|
|
|
|
|
|
|
|
rawHtmlBlock = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. raw:: html"
|
|
|
|
blanklines
|
|
|
|
result <- indentedBlock True
|
|
|
|
return (RawHtml result))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- raw latex
|
|
|
|
--
|
|
|
|
|
|
|
|
rawLaTeXBlock = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. raw:: latex"
|
|
|
|
blanklines
|
|
|
|
result <- indentedBlock True
|
|
|
|
return (Para [(TeX result)]))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- block quotes
|
|
|
|
--
|
|
|
|
|
|
|
|
blockQuote = try (do
|
2006-12-21 09:02:06 +00:00
|
|
|
raw <- indentedBlock True
|
2006-12-20 06:50:14 +00:00
|
|
|
-- parse the extracted block, which may contain various block elements:
|
2006-12-21 09:02:06 +00:00
|
|
|
rest <- getInput
|
|
|
|
setInput $ raw ++ "\n\n"
|
|
|
|
contents <- parseBlocks
|
|
|
|
setInput rest
|
|
|
|
return (BlockQuote contents))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- list blocks
|
|
|
|
--
|
|
|
|
|
|
|
|
list = choice [ bulletList, orderedList ] <?> "list"
|
|
|
|
|
|
|
|
-- parses bullet list start and returns its length (inc. following whitespace)
|
2006-12-20 06:50:14 +00:00
|
|
|
bulletListStart = try (do
|
|
|
|
notFollowedBy' hrule -- because hrules start out just like lists
|
|
|
|
marker <- oneOf bulletListMarkers
|
|
|
|
white <- many1 spaceChar
|
|
|
|
let len = length (marker:white)
|
|
|
|
return len)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
withPeriodSuffix parser = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
a <- parser
|
|
|
|
b <- char '.'
|
|
|
|
return (a ++ [b]))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
withParentheses parser = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
a <- char '('
|
|
|
|
b <- parser
|
|
|
|
c <- char ')'
|
|
|
|
return ([a] ++ b ++ [c]))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
withRightParen parser = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
a <- parser
|
|
|
|
b <- char ')'
|
|
|
|
return (a ++ [b]))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
upcaseWord = map toUpper
|
|
|
|
|
|
|
|
romanNumeral = do
|
2006-12-20 06:50:14 +00:00
|
|
|
let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi",
|
|
|
|
"vii", "viii", "ix", "x", "xi", "xii", "xiii",
|
|
|
|
"xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx",
|
|
|
|
"xxi", "xxii", "xxiii", "xxiv" ]
|
2006-10-17 14:22:29 +00:00
|
|
|
let upperNumerals = map upcaseWord lowerNumerals
|
|
|
|
result <- choice $ map string (lowerNumerals ++ upperNumerals)
|
|
|
|
return result
|
|
|
|
|
|
|
|
orderedListEnumerator = choice [ many1 digit,
|
2007-02-12 03:53:14 +00:00
|
|
|
count 1 (char '#'),
|
|
|
|
count 1 letter,
|
2006-10-17 14:22:29 +00:00
|
|
|
romanNumeral ]
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
-- parses ordered list start and returns its length (inc following whitespace)
|
|
|
|
orderedListStart = try (do
|
|
|
|
marker <- choice [ withPeriodSuffix orderedListEnumerator,
|
|
|
|
withParentheses orderedListEnumerator,
|
|
|
|
withRightParen orderedListEnumerator ]
|
|
|
|
white <- many1 spaceChar
|
|
|
|
let len = length (marker ++ white)
|
|
|
|
return len)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- parse a line of a list item
|
|
|
|
listLine markerLength = try (do
|
|
|
|
notFollowedBy blankline
|
|
|
|
indentWith markerLength
|
|
|
|
line <- manyTill anyChar newline
|
|
|
|
return (line ++ "\n"))
|
|
|
|
|
|
|
|
-- indent by specified number of spaces (or equiv. tabs)
|
|
|
|
indentWith num = do
|
|
|
|
state <- getState
|
|
|
|
let tabStop = stateTabStop state
|
2006-12-20 06:50:14 +00:00
|
|
|
if (num < tabStop)
|
|
|
|
then count num (char ' ')
|
|
|
|
else choice [ try (count num (char ' ')),
|
|
|
|
(try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
2006-12-20 06:50:14 +00:00
|
|
|
rawListItem start = try (do
|
|
|
|
markerLength <- start
|
|
|
|
firstLine <- manyTill anyChar newline
|
|
|
|
restLines <- many (listLine markerLength)
|
|
|
|
return (markerLength, (firstLine ++ "\n" ++ (concat restLines))))
|
|
|
|
|
|
|
|
-- continuation of a list item - indented and separated by blankline or
|
|
|
|
-- (in compact lists) endline.
|
|
|
|
-- Note: nested lists are parsed as continuations.
|
|
|
|
listContinuation markerLength = try (do
|
|
|
|
blanks <- many1 blankline
|
|
|
|
result <- many1 (listLine markerLength)
|
|
|
|
return (blanks ++ (concat result)))
|
|
|
|
|
|
|
|
listItem start = try (do
|
|
|
|
(markerLength, first) <- rawListItem start
|
|
|
|
rest <- many (listContinuation markerLength)
|
|
|
|
blanks <- choice [ try (do
|
|
|
|
b <- many blankline
|
2007-02-15 01:10:15 +00:00
|
|
|
lookAhead start
|
2006-12-20 06:50:14 +00:00
|
|
|
return b),
|
|
|
|
many1 blankline ] -- whole list must end with blank
|
|
|
|
-- parsing with ListItemState forces markers at beginning of lines to
|
|
|
|
-- count as list item markers, even if not separated by blank space.
|
|
|
|
-- see definition of "endline"
|
|
|
|
state <- getState
|
2006-12-31 18:14:51 +00:00
|
|
|
let oldContext = stateParserContext state
|
|
|
|
remaining <- getInput
|
|
|
|
setState $ state {stateParserContext = ListItemState}
|
|
|
|
-- parse the extracted block, which may itself contain block elements
|
|
|
|
setInput $ concat (first:rest) ++ blanks
|
|
|
|
parsed <- parseBlocks
|
|
|
|
setInput remaining
|
|
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
2006-12-20 06:50:14 +00:00
|
|
|
return parsed)
|
|
|
|
|
|
|
|
orderedList = try (do
|
|
|
|
items <- many1 (listItem orderedListStart)
|
|
|
|
let items' = compactify items
|
|
|
|
return (OrderedList items'))
|
|
|
|
|
|
|
|
bulletList = try (do
|
|
|
|
items <- many1 (listItem bulletListStart)
|
|
|
|
let items' = compactify items
|
|
|
|
return (BulletList items'))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- unknown directive (e.g. comment)
|
|
|
|
--
|
|
|
|
|
|
|
|
unknownDirective = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. "
|
|
|
|
manyTill anyChar newline
|
|
|
|
many (do
|
|
|
|
string " "
|
|
|
|
char ':'
|
|
|
|
many1 (noneOf "\n:")
|
|
|
|
char ':'
|
|
|
|
many1 (noneOf "\n")
|
|
|
|
newline)
|
|
|
|
option "" blanklines
|
|
|
|
return Null)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- reference key
|
|
|
|
--
|
|
|
|
|
|
|
|
referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
|
|
|
|
|
|
|
|
imageKey = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. |"
|
|
|
|
ref <- manyTill inline (char '|')
|
|
|
|
skipSpaces
|
|
|
|
string "image::"
|
|
|
|
src <- manyTill anyChar newline
|
|
|
|
return (Key (normalizeSpaces ref)
|
|
|
|
(Src (removeLeadingTrailingSpace src) "")))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
anonymousKey = try (do
|
2007-02-12 03:53:14 +00:00
|
|
|
oneOfStrings [".. __:", "__"]
|
2006-12-20 06:50:14 +00:00
|
|
|
skipSpaces
|
2007-02-12 02:46:39 +00:00
|
|
|
option ' ' newline
|
2006-12-20 06:50:14 +00:00
|
|
|
src <- manyTill anyChar newline
|
|
|
|
state <- getState
|
|
|
|
return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
regularKeyQuoted = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. _`"
|
2007-02-12 03:53:14 +00:00
|
|
|
ref <- manyTill inline (char '`')
|
|
|
|
char ':'
|
2006-12-20 06:50:14 +00:00
|
|
|
skipSpaces
|
2007-02-12 02:46:39 +00:00
|
|
|
option ' ' newline
|
2006-12-20 06:50:14 +00:00
|
|
|
src <- manyTill anyChar newline
|
|
|
|
return (Key (normalizeSpaces ref)
|
|
|
|
(Src (removeLeadingTrailingSpace src) "")))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
regularKey = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
string ".. _"
|
|
|
|
ref <- manyTill inline (char ':')
|
|
|
|
skipSpaces
|
2007-02-12 02:46:39 +00:00
|
|
|
option ' ' newline
|
2006-12-20 06:50:14 +00:00
|
|
|
src <- manyTill anyChar newline
|
|
|
|
return (Key (normalizeSpaces ref)
|
|
|
|
(Src (removeLeadingTrailingSpace src) "")))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- inline
|
|
|
|
--
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
text = choice [ strong, emph, code, str, tabchar, whitespace,
|
|
|
|
endline ] <?> "text"
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline"
|
|
|
|
|
|
|
|
special = choice [ link, image ] <?> "link, inline html, or image"
|
|
|
|
|
|
|
|
hyphens = try (do
|
|
|
|
result <- many1 (char '-')
|
2006-12-20 06:50:14 +00:00
|
|
|
option Space endline
|
|
|
|
-- don't want to treat endline after hyphen or dash as a space
|
2006-10-17 14:22:29 +00:00
|
|
|
return (Str result))
|
|
|
|
|
|
|
|
escapedChar = escaped anyChar
|
|
|
|
|
|
|
|
symbol = do
|
|
|
|
result <- oneOf specialChars
|
|
|
|
return (Str [result])
|
|
|
|
|
|
|
|
-- parses inline code, between codeStart and codeEnd
|
2006-12-20 06:50:14 +00:00
|
|
|
code = try (do
|
|
|
|
string "``"
|
2007-02-12 03:53:14 +00:00
|
|
|
result <- manyTill anyChar (try (string "``"))
|
2006-12-20 06:50:14 +00:00
|
|
|
let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
|
|
|
|
return (Code result'))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
emph = do
|
|
|
|
result <- enclosed (char '*') (char '*') inline
|
|
|
|
return (Emph (normalizeSpaces result))
|
|
|
|
|
|
|
|
strong = do
|
2007-02-12 17:14:11 +00:00
|
|
|
result <- enclosed (try (string "**")) (try (string "**")) inline
|
2006-10-17 14:22:29 +00:00
|
|
|
return (Strong (normalizeSpaces result))
|
|
|
|
|
|
|
|
whitespace = do
|
|
|
|
many1 spaceChar <?> "whitespace"
|
|
|
|
return Space
|
|
|
|
|
|
|
|
tabchar = do
|
|
|
|
tab
|
|
|
|
return (Str "\t")
|
|
|
|
|
|
|
|
str = do
|
|
|
|
notFollowedBy' oneWordReferenceLink
|
|
|
|
result <- many1 (noneOf (specialChars ++ "\t\n "))
|
|
|
|
return (Str result)
|
|
|
|
|
|
|
|
-- an endline character that can be treated as a space, not a structural break
|
2006-12-20 06:50:14 +00:00
|
|
|
endline = try (do
|
|
|
|
newline
|
|
|
|
notFollowedBy blankline
|
|
|
|
-- parse potential list-starts at beginning of line differently in a list:
|
|
|
|
st <- getState
|
|
|
|
if ((stateParserContext st) == ListItemState)
|
|
|
|
then notFollowedBy' (choice [orderedListStart, bulletListStart])
|
|
|
|
else option () pzero
|
|
|
|
return Space)
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
--
|
|
|
|
-- links
|
|
|
|
--
|
|
|
|
|
2006-12-20 06:50:14 +00:00
|
|
|
link = choice [explicitLink, referenceLink, autoLink,
|
|
|
|
oneWordReferenceLink] <?> "link"
|
|
|
|
|
|
|
|
explicitLink = try (do
|
|
|
|
char '`'
|
2007-02-14 06:57:23 +00:00
|
|
|
notFollowedBy (char '`') -- `` is marks start of inline code
|
2006-12-20 06:50:14 +00:00
|
|
|
label <- manyTill inline (try (do {spaces; char '<'}))
|
|
|
|
src <- manyTill (noneOf ">\n ") (char '>')
|
|
|
|
skipSpaces
|
|
|
|
string "`_"
|
|
|
|
return (Link (normalizeSpaces label)
|
|
|
|
(Src (removeLeadingTrailingSpace src) "")))
|
|
|
|
|
|
|
|
anonymousLinkEnding = try (do
|
|
|
|
char '_'
|
|
|
|
state <- getState
|
|
|
|
let anonKeys = stateKeyBlocks state
|
|
|
|
-- if there's a list of anon key refs (from previous pass), pop one off.
|
|
|
|
-- otherwise return an anon key ref for the next pass to take care of...
|
|
|
|
case anonKeys of
|
|
|
|
(Key [Str "_"] src):rest ->
|
|
|
|
do
|
|
|
|
setState (state { stateKeyBlocks = rest })
|
|
|
|
return src
|
|
|
|
otherwise -> return (Ref [Str "_"]))
|
|
|
|
|
|
|
|
referenceLink = try (do
|
|
|
|
char '`'
|
2007-02-14 06:57:23 +00:00
|
|
|
notFollowedBy (char '`')
|
2007-02-12 03:53:14 +00:00
|
|
|
label <- manyTill inline (char '`')
|
|
|
|
char '_'
|
2006-12-20 06:50:14 +00:00
|
|
|
src <- option (Ref []) anonymousLinkEnding
|
|
|
|
return (Link (normalizeSpaces label) src))
|
|
|
|
|
|
|
|
oneWordReferenceLink = try (do
|
|
|
|
label <- many1 alphaNum
|
|
|
|
char '_'
|
|
|
|
src <- option (Ref []) anonymousLinkEnding
|
|
|
|
notFollowedBy alphaNum -- because this_is_not a link
|
|
|
|
return (Link [Str label] src))
|
|
|
|
|
|
|
|
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
|
|
|
|
"mailto:", "news:", "telnet:" ]
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
uri = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
scheme <- uriScheme
|
|
|
|
identifier <- many1 (noneOf " \t\n")
|
|
|
|
return (scheme ++ identifier))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
autoURI = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
src <- uri
|
|
|
|
return (Link [Str src] (Src src "")))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
emailChar = alphaNum <|> oneOf "-+_."
|
|
|
|
|
|
|
|
emailAddress = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
firstLetter <- alphaNum
|
|
|
|
restAddr <- many emailChar
|
|
|
|
let addr = firstLetter:restAddr
|
|
|
|
char '@'
|
|
|
|
dom <- domain
|
|
|
|
return (addr ++ '@':dom))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
domainChar = alphaNum <|> char '-'
|
|
|
|
|
|
|
|
domain = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
first <- many1 domainChar
|
|
|
|
dom <- many1 (try (do{ char '.'; many1 domainChar }))
|
|
|
|
return (joinWithSep "." (first:dom)))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
autoEmail = try (do
|
2006-12-20 06:50:14 +00:00
|
|
|
src <- emailAddress
|
|
|
|
return (Link [Str src] (Src ("mailto:" ++ src) "")))
|
2006-10-17 14:22:29 +00:00
|
|
|
|
|
|
|
autoLink = autoURI <|> autoEmail
|
|
|
|
|
|
|
|
-- For now, we assume that all substitution references are for images.
|
2006-12-20 06:50:14 +00:00
|
|
|
image = try (do
|
|
|
|
char '|'
|
|
|
|
ref <- manyTill inline (char '|')
|
|
|
|
return (Image (normalizeSpaces ref) (Ref ref)))
|