c8a56a2864
Resolves Issue #73. Also made some structural changes to parsing of raw LaTeX environments. Previously there was a special block parser for LaTeX environments. It returned a Para element containing the raw TeX inline. This has been removed, and the raw LaTeX environment parser is now used in the rawLaTeXInline parser. The effect is exactly the same, except that we can now handle consecutive LaTeX and ConTeXt environments not separated by spaces. This new flexibility is required by the example in Issue #73: \placeformula \startformula L_{1} = L_{2} \stopformula API change: The LaTeX reader now exports rawLaTeXEnvironment' (which returns a string) rather than rawLaTeXEnvironment (which returns a block element). This is more likely to be useful in other applications. Added test cases for raw ConTeXt environments to markdown-reader-more.txt. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1405 788f1e2b-df1e-0410-8736-df70ead52e1b
1210 lines
40 KiB
Haskell
1210 lines
40 KiB
Haskell
{-# LANGUAGE CPP #-}
|
|
{-
|
|
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Readers.Markdown
|
|
Copyright : Copyright (C) 2006-8 John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
Conversion of markdown-formatted plain text to 'Pandoc' document.
|
|
-}
|
|
module Text.Pandoc.Readers.Markdown (
|
|
readMarkdown
|
|
) where
|
|
|
|
import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex )
|
|
import Data.Ord ( comparing )
|
|
import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit )
|
|
import Data.Maybe
|
|
import Text.Pandoc.Definition
|
|
import Text.Pandoc.Shared
|
|
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
|
|
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
|
|
anyHtmlInlineTag, anyHtmlTag,
|
|
anyHtmlEndTag, htmlEndTag, extractTagType,
|
|
htmlBlockElement, unsanitaryURI )
|
|
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
|
|
import Text.ParserCombinators.Parsec
|
|
|
|
-- | Read markdown from an input string and return a Pandoc document.
|
|
readMarkdown :: ParserState -> String -> Pandoc
|
|
readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
|
|
|
|
--
|
|
-- Constants and data structure definitions
|
|
--
|
|
|
|
spaceChars :: [Char]
|
|
spaceChars = " \t"
|
|
|
|
bulletListMarkers :: [Char]
|
|
bulletListMarkers = "*+-"
|
|
|
|
hruleChars :: [Char]
|
|
hruleChars = "*-_"
|
|
|
|
setextHChars :: [Char]
|
|
setextHChars = "=-"
|
|
|
|
-- treat these as potentially non-text when parsing inline:
|
|
specialChars :: [Char]
|
|
specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221"
|
|
|
|
--
|
|
-- auxiliary functions
|
|
--
|
|
|
|
indentSpaces :: GenParser Char ParserState [Char]
|
|
indentSpaces = try $ do
|
|
state <- getState
|
|
let tabStop = stateTabStop state
|
|
try (count tabStop (char ' ')) <|>
|
|
(many (char ' ') >> string "\t") <?> "indentation"
|
|
|
|
nonindentSpaces :: GenParser Char ParserState [Char]
|
|
nonindentSpaces = do
|
|
state <- getState
|
|
let tabStop = stateTabStop state
|
|
sps <- many (char ' ')
|
|
if length sps < tabStop
|
|
then return sps
|
|
else unexpected "indented line"
|
|
|
|
-- | Fail unless we're at beginning of a line.
|
|
failUnlessBeginningOfLine :: GenParser tok st ()
|
|
failUnlessBeginningOfLine = do
|
|
pos <- getPosition
|
|
if sourceColumn pos == 1 then return () else fail "not beginning of line"
|
|
|
|
-- | Fail unless we're in "smart typography" mode.
|
|
failUnlessSmart :: GenParser tok ParserState ()
|
|
failUnlessSmart = do
|
|
state <- getState
|
|
if stateSmart state then return () else fail "Smart typography feature"
|
|
|
|
-- | Parse a sequence of inline elements between square brackets,
|
|
-- including inlines between balanced pairs of square brackets.
|
|
inlinesInBalancedBrackets :: GenParser Char ParserState Inline
|
|
-> GenParser Char ParserState [Inline]
|
|
inlinesInBalancedBrackets parser = try $ do
|
|
char '['
|
|
result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
|
|
if res == "["
|
|
then return ()
|
|
else pzero
|
|
bal <- inlinesInBalancedBrackets parser
|
|
return $ [Str "["] ++ bal ++ [Str "]"])
|
|
<|> (count 1 parser))
|
|
(char ']')
|
|
return $ concat result
|
|
|
|
--
|
|
-- document structure
|
|
--
|
|
|
|
titleLine :: GenParser Char ParserState [Inline]
|
|
titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
|
|
|
|
authorsLine :: GenParser Char st [String]
|
|
authorsLine = try $ do
|
|
char '%'
|
|
skipSpaces
|
|
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
|
|
newline
|
|
return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
|
|
|
|
dateLine :: GenParser Char st String
|
|
dateLine = try $ do
|
|
char '%'
|
|
skipSpaces
|
|
date <- many (noneOf "\n")
|
|
newline
|
|
return $ decodeCharacterReferences $ removeTrailingSpace date
|
|
|
|
titleBlock :: GenParser Char ParserState ([Inline], [String], [Char])
|
|
titleBlock = try $ do
|
|
failIfStrict
|
|
title <- option [] titleLine
|
|
author <- option [] authorsLine
|
|
date <- option "" dateLine
|
|
optional blanklines
|
|
return (title, author, date)
|
|
|
|
parseMarkdown :: GenParser Char ParserState Pandoc
|
|
parseMarkdown = do
|
|
-- markdown allows raw HTML
|
|
updateState (\state -> state { stateParseRaw = True })
|
|
startPos <- getPosition
|
|
-- go through once just to get list of reference keys
|
|
-- docMinusKeys is the raw document with blanks where the keys were...
|
|
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
|
|
return . concat
|
|
setInput docMinusKeys
|
|
setPosition startPos
|
|
st <- getState
|
|
-- go through again for notes unless strict...
|
|
if stateStrict st
|
|
then return ()
|
|
else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
|
|
return . concat
|
|
st' <- getState
|
|
let reversedNotes = stateNotes st'
|
|
updateState $ \s -> s { stateNotes = reverse reversedNotes }
|
|
setInput docMinusNotes
|
|
setPosition startPos
|
|
-- now parse it for real...
|
|
(title, author, date) <- option ([],[],"") titleBlock
|
|
blocks <- parseBlocks
|
|
return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
|
|
|
|
--
|
|
-- initial pass for references and notes
|
|
--
|
|
|
|
referenceKey :: GenParser Char ParserState [Char]
|
|
referenceKey = try $ do
|
|
startPos <- getPosition
|
|
nonindentSpaces
|
|
lab <- reference
|
|
char ':'
|
|
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
|
|
src <- (char '<' >> many (noneOf "> \n\t") >>~ char '>')
|
|
<|> many (noneOf " \n\t")
|
|
tit <- option "" referenceTitle
|
|
blanklines
|
|
endPos <- getPosition
|
|
let newkey = (lab, (removeTrailingSpace src, tit))
|
|
st <- getState
|
|
let oldkeys = stateKeys st
|
|
updateState $ \s -> s { stateKeys = newkey : oldkeys }
|
|
-- return blanks so line count isn't affected
|
|
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
|
|
|
|
referenceTitle :: GenParser Char st String
|
|
referenceTitle = try $ do
|
|
skipSpaces >> optional newline >> skipSpaces
|
|
tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
|
|
<|> do delim <- char '\'' <|> char '"'
|
|
manyTill anyChar (try (char delim >> skipSpaces >>
|
|
notFollowedBy (noneOf ")\n")))
|
|
return $ decodeCharacterReferences tit
|
|
|
|
noteMarker :: GenParser Char st [Char]
|
|
noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
|
|
|
|
rawLine :: GenParser Char ParserState [Char]
|
|
rawLine = do
|
|
notFollowedBy blankline
|
|
notFollowedBy' noteMarker
|
|
contents <- many1 nonEndline
|
|
end <- option "" (newline >> optional indentSpaces >> return "\n")
|
|
return $ contents ++ end
|
|
|
|
rawLines :: GenParser Char ParserState [Char]
|
|
rawLines = many1 rawLine >>= return . concat
|
|
|
|
noteBlock :: GenParser Char ParserState [Char]
|
|
noteBlock = try $ do
|
|
startPos <- getPosition
|
|
ref <- noteMarker
|
|
char ':'
|
|
optional blankline
|
|
optional indentSpaces
|
|
raw <- sepBy rawLines (try (blankline >> indentSpaces))
|
|
optional blanklines
|
|
endPos <- getPosition
|
|
-- parse the extracted text, which may contain various block elements:
|
|
contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
|
|
let newnote = (ref, contents)
|
|
st <- getState
|
|
let oldnotes = stateNotes st
|
|
updateState $ \s -> s { stateNotes = newnote : oldnotes }
|
|
-- return blanks so line count isn't affected
|
|
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
|
|
|
|
--
|
|
-- parsing blocks
|
|
--
|
|
|
|
parseBlocks :: GenParser Char ParserState [Block]
|
|
parseBlocks = manyTill block eof
|
|
|
|
block :: GenParser Char ParserState Block
|
|
block = do
|
|
st <- getState
|
|
choice (if stateStrict st
|
|
then [ header
|
|
, codeBlockIndented
|
|
, blockQuote
|
|
, hrule
|
|
, bulletList
|
|
, orderedList
|
|
, htmlBlock
|
|
, para
|
|
, plain
|
|
, nullBlock ]
|
|
else [ codeBlockDelimited
|
|
, header
|
|
, table
|
|
, codeBlockIndented
|
|
, blockQuote
|
|
, hrule
|
|
, bulletList
|
|
, orderedList
|
|
, definitionList
|
|
, para
|
|
, rawHtmlBlocks
|
|
, plain
|
|
, nullBlock ]) <?> "block"
|
|
|
|
--
|
|
-- header blocks
|
|
--
|
|
|
|
header :: GenParser Char ParserState Block
|
|
header = setextHeader <|> atxHeader <?> "header"
|
|
|
|
atxHeader :: GenParser Char ParserState Block
|
|
atxHeader = try $ do
|
|
level <- many1 (char '#') >>= return . length
|
|
notFollowedBy (char '.' <|> char ')') -- this would be a list
|
|
skipSpaces
|
|
text <- manyTill inline atxClosing >>= return . normalizeSpaces
|
|
return $ Header level text
|
|
|
|
atxClosing :: GenParser Char st [Char]
|
|
atxClosing = try $ skipMany (char '#') >> blanklines
|
|
|
|
setextHeader :: GenParser Char ParserState Block
|
|
setextHeader = try $ do
|
|
text <- many1Till inline newline
|
|
underlineChar <- oneOf setextHChars
|
|
many (char underlineChar)
|
|
blanklines
|
|
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
|
|
return $ Header level (normalizeSpaces text)
|
|
|
|
--
|
|
-- hrule block
|
|
--
|
|
|
|
hrule :: GenParser Char st Block
|
|
hrule = try $ do
|
|
skipSpaces
|
|
start <- oneOf hruleChars
|
|
count 2 (skipSpaces >> char start)
|
|
skipMany (skipSpaces >> char start)
|
|
newline
|
|
optional blanklines
|
|
return HorizontalRule
|
|
|
|
--
|
|
-- code blocks
|
|
--
|
|
|
|
indentedLine :: GenParser Char ParserState [Char]
|
|
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
|
|
|
|
codeBlockDelimiter :: Maybe Int
|
|
-> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])]))
|
|
codeBlockDelimiter len = try $ do
|
|
size <- case len of
|
|
Just l -> count l (char '~') >> many (char '~') >> return l
|
|
Nothing -> count 3 (char '~') >> many (char '~') >>=
|
|
return . (+ 3) . length
|
|
many spaceChar
|
|
attr <- option ([],[],[]) attributes
|
|
blankline
|
|
return (size, attr)
|
|
|
|
attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
|
|
attributes = try $ do
|
|
char '{'
|
|
many spaceChar
|
|
attrs <- many (attribute >>~ many spaceChar)
|
|
char '}'
|
|
let (ids, classes, keyvals) = unzip3 attrs
|
|
let id' = if null ids then "" else head ids
|
|
return (id', concat classes, concat keyvals)
|
|
|
|
attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
|
|
attribute = identifierAttr <|> classAttr <|> keyValAttr
|
|
|
|
identifier :: GenParser Char st [Char]
|
|
identifier = do
|
|
first <- letter
|
|
rest <- many alphaNum
|
|
return (first:rest)
|
|
|
|
identifierAttr :: GenParser Char st ([Char], [a], [a1])
|
|
identifierAttr = try $ do
|
|
char '#'
|
|
result <- identifier
|
|
return (result,[],[])
|
|
|
|
classAttr :: GenParser Char st ([Char], [[Char]], [a])
|
|
classAttr = try $ do
|
|
char '.'
|
|
result <- identifier
|
|
return ("",[result],[])
|
|
|
|
keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
|
|
keyValAttr = try $ do
|
|
key <- identifier
|
|
char '='
|
|
char '"'
|
|
val <- manyTill (noneOf "\n") (char '"')
|
|
return ("",[],[(key,val)])
|
|
|
|
codeBlockDelimited :: GenParser Char st Block
|
|
codeBlockDelimited = try $ do
|
|
(size, attr) <- codeBlockDelimiter Nothing
|
|
contents <- manyTill anyLine (codeBlockDelimiter (Just size))
|
|
blanklines
|
|
return $ CodeBlock attr $ joinWithSep "\n" contents
|
|
|
|
codeBlockIndented :: GenParser Char ParserState Block
|
|
codeBlockIndented = do
|
|
contents <- many1 (indentedLine <|>
|
|
try (do b <- blanklines
|
|
l <- indentedLine
|
|
return $ b ++ l))
|
|
optional blanklines
|
|
return $ CodeBlock ("",[],[]) $ stripTrailingNewlines $ concat contents
|
|
|
|
--
|
|
-- block quotes
|
|
--
|
|
|
|
emailBlockQuoteStart :: GenParser Char ParserState Char
|
|
emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
|
|
|
|
emailBlockQuote :: GenParser Char ParserState [[Char]]
|
|
emailBlockQuote = try $ do
|
|
emailBlockQuoteStart
|
|
raw <- sepBy (many (nonEndline <|>
|
|
(try (endline >> notFollowedBy emailBlockQuoteStart >>
|
|
return '\n'))))
|
|
(try (newline >> emailBlockQuoteStart))
|
|
newline <|> (eof >> return '\n')
|
|
optional blanklines
|
|
return raw
|
|
|
|
blockQuote :: GenParser Char ParserState Block
|
|
blockQuote = do
|
|
raw <- emailBlockQuote
|
|
-- parse the extracted block, which may contain various block elements:
|
|
contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
|
|
return $ BlockQuote contents
|
|
|
|
--
|
|
-- list blocks
|
|
--
|
|
|
|
bulletListStart :: GenParser Char ParserState ()
|
|
bulletListStart = try $ do
|
|
optional newline -- if preceded by a Plain block in a list context
|
|
nonindentSpaces
|
|
notFollowedBy' hrule -- because hrules start out just like lists
|
|
oneOf bulletListMarkers
|
|
spaceChar
|
|
skipSpaces
|
|
|
|
anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim)
|
|
anyOrderedListStart = try $ do
|
|
optional newline -- if preceded by a Plain block in a list context
|
|
nonindentSpaces
|
|
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
|
|
state <- getState
|
|
if stateStrict state
|
|
then do many1 digit
|
|
char '.'
|
|
spaceChar
|
|
return (1, DefaultStyle, DefaultDelim)
|
|
else anyOrderedListMarker >>~ spaceChar
|
|
|
|
orderedListStart :: ListNumberStyle
|
|
-> ListNumberDelim
|
|
-> GenParser Char ParserState ()
|
|
orderedListStart style delim = try $ do
|
|
optional newline -- if preceded by a Plain block in a list context
|
|
nonindentSpaces
|
|
state <- getState
|
|
num <- if stateStrict state
|
|
then do many1 digit
|
|
char '.'
|
|
return 1
|
|
else orderedListMarker style delim
|
|
if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
|
|
num `elem` [1, 5, 10, 50, 100, 500, 1000]))
|
|
then char '\t' <|> (spaceChar >> spaceChar)
|
|
else spaceChar
|
|
skipSpaces
|
|
|
|
-- parse a line of a list item (start = parser for beginning of list item)
|
|
listLine :: GenParser Char ParserState ()
|
|
-> GenParser Char ParserState [Char]
|
|
listLine start = try $ do
|
|
notFollowedBy' start
|
|
notFollowedBy blankline
|
|
notFollowedBy' (do indentSpaces
|
|
many (spaceChar)
|
|
bulletListStart <|> (anyOrderedListStart >> return ()))
|
|
line <- manyTill anyChar newline
|
|
return $ line ++ "\n"
|
|
|
|
-- parse raw text for one list item, excluding start marker and continuations
|
|
rawListItem :: GenParser Char ParserState ()
|
|
-> GenParser Char ParserState [Char]
|
|
rawListItem start = try $ do
|
|
start
|
|
result <- many1 (listLine start)
|
|
blanks <- many blankline
|
|
return $ concat result ++ blanks
|
|
|
|
-- continuation of a list item - indented and separated by blankline
|
|
-- or (in compact lists) endline.
|
|
-- note: nested lists are parsed as continuations
|
|
listContinuation :: GenParser Char ParserState () -> GenParser Char ParserState [Char]
|
|
listContinuation start = try $ do
|
|
lookAhead indentSpaces
|
|
result <- many1 (listContinuationLine start)
|
|
blanks <- many blankline
|
|
return $ concat result ++ blanks
|
|
|
|
listContinuationLine :: GenParser Char ParserState ()
|
|
-> GenParser Char ParserState [Char]
|
|
listContinuationLine start = try $ do
|
|
notFollowedBy blankline
|
|
notFollowedBy' start
|
|
optional indentSpaces
|
|
result <- manyTill anyChar newline
|
|
return $ result ++ "\n"
|
|
|
|
listItem :: GenParser Char ParserState ()
|
|
-> GenParser Char ParserState [Block]
|
|
listItem start = try $ do
|
|
first <- rawListItem start
|
|
continuations <- many (listContinuation start)
|
|
-- parsing with ListItemState forces markers at beginning of lines to
|
|
-- count as list item markers, even if not separated by blank space.
|
|
-- see definition of "endline"
|
|
state <- getState
|
|
let oldContext = stateParserContext state
|
|
setState $ state {stateParserContext = ListItemState}
|
|
-- parse the extracted block, which may contain various block elements:
|
|
let raw = concat (first:continuations)
|
|
contents <- parseFromString parseBlocks raw
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
|
return contents
|
|
|
|
orderedList :: GenParser Char ParserState Block
|
|
orderedList = try $ do
|
|
(start, style, delim) <- lookAhead anyOrderedListStart
|
|
items <- many1 (listItem (orderedListStart style delim))
|
|
return $ OrderedList (start, style, delim) $ compactify items
|
|
|
|
bulletList :: GenParser Char ParserState Block
|
|
bulletList = many1 (listItem bulletListStart) >>=
|
|
return . BulletList . compactify
|
|
|
|
-- definition lists
|
|
|
|
definitionListItem :: GenParser Char ParserState ([Inline], [Block])
|
|
definitionListItem = try $ do
|
|
notFollowedBy blankline
|
|
notFollowedBy' indentSpaces
|
|
-- first, see if this has any chance of being a definition list:
|
|
lookAhead (anyLine >> char ':')
|
|
term <- manyTill inline newline
|
|
raw <- many1 defRawBlock
|
|
state <- getState
|
|
let oldContext = stateParserContext state
|
|
-- parse the extracted block, which may contain various block elements:
|
|
contents <- parseFromString parseBlocks $ concat raw
|
|
updateState (\st -> st {stateParserContext = oldContext})
|
|
return ((normalizeSpaces term), contents)
|
|
|
|
defRawBlock :: GenParser Char ParserState [Char]
|
|
defRawBlock = try $ do
|
|
char ':'
|
|
state <- getState
|
|
let tabStop = stateTabStop state
|
|
try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
|
|
firstline <- anyLine
|
|
rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
|
|
trailing <- option "" blanklines
|
|
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
|
|
|
|
definitionList :: GenParser Char ParserState Block
|
|
definitionList = do
|
|
items <- many1 definitionListItem
|
|
let (terms, defs) = unzip items
|
|
let defs' = compactify defs
|
|
let items' = zip terms defs'
|
|
return $ DefinitionList items'
|
|
|
|
--
|
|
-- paragraph block
|
|
--
|
|
|
|
isHtmlOrBlank :: Inline -> Bool
|
|
isHtmlOrBlank (HtmlInline _) = True
|
|
isHtmlOrBlank (Space) = True
|
|
isHtmlOrBlank (LineBreak) = True
|
|
isHtmlOrBlank _ = False
|
|
|
|
para :: GenParser Char ParserState Block
|
|
para = try $ do
|
|
result <- many1 inline
|
|
if all isHtmlOrBlank result
|
|
then fail "treat as raw HTML"
|
|
else return ()
|
|
newline
|
|
blanklines <|> do st <- getState
|
|
if stateStrict st
|
|
then lookAhead (blockQuote <|> header) >> return ""
|
|
else pzero
|
|
return $ Para $ normalizeSpaces result
|
|
|
|
plain :: GenParser Char ParserState Block
|
|
plain = many1 inline >>= return . Plain . normalizeSpaces
|
|
|
|
--
|
|
-- raw html
|
|
--
|
|
|
|
htmlElement :: GenParser Char ParserState [Char]
|
|
htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
|
|
|
|
htmlBlock :: GenParser Char ParserState Block
|
|
htmlBlock = try $ do
|
|
failUnlessBeginningOfLine
|
|
first <- htmlElement
|
|
finalSpace <- many (oneOf spaceChars)
|
|
finalNewlines <- many newline
|
|
return $ RawHtml $ first ++ finalSpace ++ finalNewlines
|
|
|
|
-- True if tag is self-closing
|
|
isSelfClosing :: [Char] -> Bool
|
|
isSelfClosing tag =
|
|
isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
|
|
|
|
strictHtmlBlock :: GenParser Char ParserState [Char]
|
|
strictHtmlBlock = try $ do
|
|
tag <- anyHtmlBlockTag
|
|
let tag' = extractTagType tag
|
|
if isSelfClosing tag || tag' == "hr"
|
|
then return tag
|
|
else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
|
|
(htmlElement <|> (count 1 anyChar)))
|
|
end <- htmlEndTag tag'
|
|
return $ tag ++ concat contents ++ end
|
|
|
|
rawHtmlBlocks :: GenParser Char ParserState Block
|
|
rawHtmlBlocks = do
|
|
htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock
|
|
sps <- do sp1 <- many spaceChar
|
|
sp2 <- option "" (blankline >> return "\n")
|
|
sp3 <- many spaceChar
|
|
sp4 <- option "" blanklines
|
|
return $ sp1 ++ sp2 ++ sp3 ++ sp4
|
|
-- note: we want raw html to be able to
|
|
-- precede a code block, when separated
|
|
-- by a blank line
|
|
return $ blk ++ sps
|
|
let combined = concat htmlBlocks
|
|
let combined' = if last combined == '\n' then init combined else combined
|
|
return $ RawHtml combined'
|
|
|
|
--
|
|
-- Tables
|
|
--
|
|
|
|
-- Parse a dashed line with optional trailing spaces; return its length
|
|
-- and the length including trailing space.
|
|
dashedLine :: Char
|
|
-> GenParser Char st (Int, Int)
|
|
dashedLine ch = do
|
|
dashes <- many1 (char ch)
|
|
sp <- many spaceChar
|
|
return $ (length dashes, length $ dashes ++ sp)
|
|
|
|
-- Parse a table header with dashed lines of '-' preceded by
|
|
-- one line of text.
|
|
simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
|
|
simpleTableHeader = try $ do
|
|
rawContent <- anyLine
|
|
initSp <- nonindentSpaces
|
|
dashes <- many1 (dashedLine '-')
|
|
newline
|
|
let (lengths, lines') = unzip dashes
|
|
let indices = scanl (+) (length initSp) lines'
|
|
let rawHeads = tail $ splitByIndices (init indices) rawContent
|
|
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
|
|
return (rawHeads, aligns, indices)
|
|
|
|
-- Parse a table footer - dashed lines followed by blank line.
|
|
tableFooter :: GenParser Char ParserState [Char]
|
|
tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
|
|
|
|
-- Parse a table separator - dashed line.
|
|
tableSep :: GenParser Char ParserState String
|
|
tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
|
|
|
|
-- Parse a raw line and split it into chunks by indices.
|
|
rawTableLine :: [Int]
|
|
-> GenParser Char ParserState [String]
|
|
rawTableLine indices = do
|
|
notFollowedBy' (blanklines <|> tableFooter)
|
|
line <- many1Till anyChar newline
|
|
return $ map removeLeadingTrailingSpace $ tail $
|
|
splitByIndices (init indices) line
|
|
|
|
-- Parse a table line and return a list of lists of blocks (columns).
|
|
tableLine :: [Int]
|
|
-> GenParser Char ParserState [[Block]]
|
|
tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
|
|
|
|
-- Parse a multiline table row and return a list of blocks (columns).
|
|
multilineRow :: [Int]
|
|
-> GenParser Char ParserState [[Block]]
|
|
multilineRow indices = do
|
|
colLines <- many1 (rawTableLine indices)
|
|
optional blanklines
|
|
let cols = map unlines $ transpose colLines
|
|
mapM (parseFromString (many plain)) cols
|
|
|
|
-- Calculate relative widths of table columns, based on indices
|
|
widthsFromIndices :: Int -- Number of columns on terminal
|
|
-> [Int] -- Indices
|
|
-> [Float] -- Fractional relative sizes of columns
|
|
widthsFromIndices _ [] = []
|
|
widthsFromIndices numColumns indices =
|
|
let lengths = zipWith (-) indices (0:indices)
|
|
totLength = sum lengths
|
|
quotient = if totLength > numColumns
|
|
then fromIntegral totLength
|
|
else fromIntegral numColumns
|
|
fracs = map (\l -> (fromIntegral l) / quotient) lengths in
|
|
tail fracs
|
|
|
|
-- Parses a table caption: inlines beginning with 'Table:'
|
|
-- and followed by blank lines.
|
|
tableCaption :: GenParser Char ParserState [Inline]
|
|
tableCaption = try $ do
|
|
nonindentSpaces
|
|
string "Table:"
|
|
result <- many1 inline
|
|
blanklines
|
|
return $ normalizeSpaces result
|
|
|
|
-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
|
|
tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
|
|
-> ([Int] -> GenParser Char ParserState [[Block]])
|
|
-> GenParser Char ParserState end
|
|
-> GenParser Char ParserState Block
|
|
tableWith headerParser lineParser footerParser = try $ do
|
|
(rawHeads, aligns, indices) <- headerParser
|
|
lines' <- many1Till (lineParser indices) footerParser
|
|
caption <- option [] tableCaption
|
|
heads <- mapM (parseFromString (many plain)) rawHeads
|
|
state <- getState
|
|
let numColumns = stateColumns state
|
|
let widths = widthsFromIndices numColumns indices
|
|
return $ Table caption aligns widths heads lines'
|
|
|
|
-- Parse a simple table with '---' header and one line per row.
|
|
simpleTable :: GenParser Char ParserState Block
|
|
simpleTable = tableWith simpleTableHeader tableLine blanklines
|
|
|
|
-- Parse a multiline table: starts with row of '-' on top, then header
|
|
-- (which may be multiline), then the rows,
|
|
-- which may be multiline, separated by blank lines, and
|
|
-- ending with a footer (dashed line followed by blank line).
|
|
multilineTable :: GenParser Char ParserState Block
|
|
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
|
|
|
|
multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int])
|
|
multilineTableHeader = try $ do
|
|
tableSep
|
|
rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
|
|
initSp <- nonindentSpaces
|
|
dashes <- many1 (dashedLine '-')
|
|
newline
|
|
let (lengths, lines') = unzip dashes
|
|
let indices = scanl (+) (length initSp) lines'
|
|
let rawHeadsList = transpose $ map
|
|
(\ln -> tail $ splitByIndices (init indices) ln)
|
|
rawContent
|
|
let rawHeads = map (joinWithSep " ") rawHeadsList
|
|
let aligns = zipWith alignType rawHeadsList lengths
|
|
return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
|
|
|
|
-- Returns an alignment type for a table, based on a list of strings
|
|
-- (the rows of the column header) and a number (the length of the
|
|
-- dashed line under the rows.
|
|
alignType :: [String]
|
|
-> Int
|
|
-> Alignment
|
|
alignType [] _ = AlignDefault
|
|
alignType strLst len =
|
|
let s = head $ sortBy (comparing length) $
|
|
map removeTrailingSpace strLst
|
|
leftSpace = if null s then False else (s !! 0) `elem` " \t"
|
|
rightSpace = length s < len || (s !! (len - 1)) `elem` " \t"
|
|
in case (leftSpace, rightSpace) of
|
|
(True, False) -> AlignRight
|
|
(False, True) -> AlignLeft
|
|
(True, True) -> AlignCenter
|
|
(False, False) -> AlignDefault
|
|
|
|
table :: GenParser Char ParserState Block
|
|
table = simpleTable <|> multilineTable <?> "table"
|
|
|
|
--
|
|
-- inline
|
|
--
|
|
|
|
inline :: GenParser Char ParserState Inline
|
|
inline = choice inlineParsers <?> "inline"
|
|
|
|
inlineParsers :: [GenParser Char ParserState Inline]
|
|
inlineParsers = [ abbrev
|
|
, str
|
|
, smartPunctuation
|
|
, whitespace
|
|
, endline
|
|
, code
|
|
, charRef
|
|
, strong
|
|
, emph
|
|
, note
|
|
, inlineNote
|
|
, link
|
|
#ifdef _CITEPROC
|
|
, inlineCitation
|
|
#endif
|
|
, image
|
|
, math
|
|
, strikeout
|
|
, superscript
|
|
, subscript
|
|
, autoLink
|
|
, rawHtmlInline'
|
|
, rawLaTeXInline'
|
|
, escapedChar
|
|
, symbol
|
|
, ltSign ]
|
|
|
|
inlineNonLink :: GenParser Char ParserState Inline
|
|
inlineNonLink = (choice $
|
|
map (\parser -> try (parser >>= failIfLink)) inlineParsers)
|
|
<?> "inline (non-link)"
|
|
|
|
failIfLink :: Inline -> GenParser tok st Inline
|
|
failIfLink (Link _ _) = pzero
|
|
failIfLink elt = return elt
|
|
|
|
escapedChar :: GenParser Char ParserState Inline
|
|
escapedChar = do
|
|
char '\\'
|
|
state <- getState
|
|
result <- option '\\' $ if stateStrict state
|
|
then oneOf "\\`*_{}[]()>#+-.!~"
|
|
else satisfy (not . isAlphaNum)
|
|
let result' = if result == ' '
|
|
then '\160' -- '\ ' is a nonbreaking space
|
|
else result
|
|
return $ Str [result']
|
|
|
|
ltSign :: GenParser Char ParserState Inline
|
|
ltSign = do
|
|
st <- getState
|
|
if stateStrict st
|
|
then char '<'
|
|
else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
|
|
return $ Str ['<']
|
|
|
|
specialCharsMinusLt :: [Char]
|
|
specialCharsMinusLt = filter (/= '<') specialChars
|
|
|
|
symbol :: GenParser Char ParserState Inline
|
|
symbol = do
|
|
result <- oneOf specialCharsMinusLt
|
|
return $ Str [result]
|
|
|
|
-- parses inline code, between n `s and n `s
|
|
code :: GenParser Char ParserState Inline
|
|
code = try $ do
|
|
starts <- many1 (char '`')
|
|
skipSpaces
|
|
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
|
|
(char '\n' >> return " "))
|
|
(try (skipSpaces >> count (length starts) (char '`') >>
|
|
notFollowedBy (char '`')))
|
|
return $ Code $ removeLeadingTrailingSpace $ concat result
|
|
|
|
mathWord :: GenParser Char st [Char]
|
|
mathWord = many1 ((noneOf " \t\n\\$") <|>
|
|
(try (char '\\') >>~ notFollowedBy (char '$')))
|
|
|
|
math :: GenParser Char ParserState Inline
|
|
math = try $ do
|
|
failIfStrict
|
|
char '$'
|
|
notFollowedBy space
|
|
words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline)))
|
|
char '$'
|
|
notFollowedBy digit
|
|
return $ Math $ joinWithSep " " words'
|
|
|
|
emph :: GenParser Char ParserState Inline
|
|
emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|>
|
|
(enclosed (char '_') (notFollowedBy' strong >> char '_' >>
|
|
notFollowedBy alphaNum) inline)) >>=
|
|
return . Emph . normalizeSpaces
|
|
|
|
strong :: GenParser Char ParserState Inline
|
|
strong = ((enclosed (string "**") (try $ string "**") inline) <|>
|
|
(enclosed (string "__") (try $ string "__") inline)) >>=
|
|
return . Strong . normalizeSpaces
|
|
|
|
strikeout :: GenParser Char ParserState Inline
|
|
strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
|
|
return . Strikeout . normalizeSpaces
|
|
|
|
superscript :: GenParser Char ParserState Inline
|
|
superscript = failIfStrict >> enclosed (char '^') (char '^')
|
|
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
|
|
return . Superscript
|
|
|
|
subscript :: GenParser Char ParserState Inline
|
|
subscript = failIfStrict >> enclosed (char '~') (char '~')
|
|
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
|
|
return . Subscript
|
|
|
|
abbrev :: GenParser Char ParserState Inline
|
|
abbrev = failUnlessSmart >>
|
|
(assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160")
|
|
|
|
-- an string of letters followed by a period that does not end a sentence
|
|
-- is assumed to be an abbreviation. It is assumed that sentences don't
|
|
-- start with lowercase letters or numerals.
|
|
assumedAbbrev :: GenParser Char ParserState [Char]
|
|
assumedAbbrev = try $ do
|
|
result <- many1 $ satisfy isAlpha
|
|
string ". "
|
|
lookAhead $ satisfy (\x -> isLower x || isDigit x)
|
|
return result
|
|
|
|
-- these strings are treated as abbreviations even if they are followed
|
|
-- by a capital letter (such as a name).
|
|
knownAbbrev :: GenParser Char ParserState [Char]
|
|
knownAbbrev = try $ do
|
|
result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen",
|
|
"Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs",
|
|
"Sen", "Rep", "Pres", "Hon", "Rev" ]
|
|
string ". "
|
|
return result
|
|
|
|
smartPunctuation :: GenParser Char ParserState Inline
|
|
smartPunctuation = failUnlessSmart >>
|
|
choice [ quoted, apostrophe, dash, ellipses ]
|
|
|
|
apostrophe :: GenParser Char ParserState Inline
|
|
apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
|
|
|
|
quoted :: GenParser Char ParserState Inline
|
|
quoted = doubleQuoted <|> singleQuoted
|
|
|
|
withQuoteContext :: QuoteContext
|
|
-> (GenParser Char ParserState Inline)
|
|
-> GenParser Char ParserState Inline
|
|
withQuoteContext context parser = do
|
|
oldState <- getState
|
|
let oldQuoteContext = stateQuoteContext oldState
|
|
setState oldState { stateQuoteContext = context }
|
|
result <- parser
|
|
newState <- getState
|
|
setState newState { stateQuoteContext = oldQuoteContext }
|
|
return result
|
|
|
|
singleQuoted :: GenParser Char ParserState Inline
|
|
singleQuoted = try $ do
|
|
singleQuoteStart
|
|
withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
|
|
return . Quoted SingleQuote . normalizeSpaces
|
|
|
|
doubleQuoted :: GenParser Char ParserState Inline
|
|
doubleQuoted = try $ do
|
|
doubleQuoteStart
|
|
withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
|
|
return . Quoted DoubleQuote . normalizeSpaces
|
|
|
|
failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
|
|
failIfInQuoteContext context = do
|
|
st <- getState
|
|
if stateQuoteContext st == context
|
|
then fail "already inside quotes"
|
|
else return ()
|
|
|
|
singleQuoteStart :: GenParser Char ParserState Char
|
|
singleQuoteStart = do
|
|
failIfInQuoteContext InSingleQuote
|
|
char '\8216' <|>
|
|
(try $ do char '\''
|
|
notFollowedBy (oneOf ")!],.;:-? \t\n")
|
|
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
|
|
satisfy (not . isAlphaNum)))
|
|
-- possess/contraction
|
|
return '\'')
|
|
|
|
singleQuoteEnd :: GenParser Char st Char
|
|
singleQuoteEnd = try $ do
|
|
char '\8217' <|> char '\''
|
|
notFollowedBy alphaNum
|
|
return '\''
|
|
|
|
doubleQuoteStart :: GenParser Char ParserState Char
|
|
doubleQuoteStart = do
|
|
failIfInQuoteContext InDoubleQuote
|
|
char '\8220' <|>
|
|
(try $ do char '"'
|
|
notFollowedBy (oneOf " \t\n")
|
|
return '"')
|
|
|
|
doubleQuoteEnd :: GenParser Char st Char
|
|
doubleQuoteEnd = char '\8221' <|> char '"'
|
|
|
|
ellipses :: GenParser Char st Inline
|
|
ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
|
|
|
|
dash :: GenParser Char st Inline
|
|
dash = enDash <|> emDash
|
|
|
|
enDash :: GenParser Char st Inline
|
|
enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
|
|
|
|
emDash :: GenParser Char st Inline
|
|
emDash = oneOfStrings ["---", "--"] >> return EmDash
|
|
|
|
whitespace :: GenParser Char ParserState Inline
|
|
whitespace = do
|
|
sps <- many1 (oneOf spaceChars)
|
|
if length sps >= 2
|
|
then option Space (endline >> return LineBreak)
|
|
else return Space <?> "whitespace"
|
|
|
|
nonEndline :: GenParser Char st Char
|
|
nonEndline = satisfy (/='\n')
|
|
|
|
strChar :: GenParser Char st Char
|
|
strChar = noneOf (specialChars ++ spaceChars ++ "\n")
|
|
|
|
str :: GenParser Char st Inline
|
|
str = many1 strChar >>= return . Str
|
|
|
|
-- an endline character that can be treated as a space, not a structural break
|
|
endline :: GenParser Char ParserState Inline
|
|
endline = try $ do
|
|
newline
|
|
notFollowedBy blankline
|
|
st <- getState
|
|
if stateStrict st
|
|
then do notFollowedBy emailBlockQuoteStart
|
|
notFollowedBy (char '#') -- atx header
|
|
else return ()
|
|
-- parse potential list-starts differently if in a list:
|
|
if stateParserContext st == ListItemState
|
|
then notFollowedBy' (bulletListStart <|>
|
|
(anyOrderedListStart >> return ()))
|
|
else return ()
|
|
return Space
|
|
|
|
--
|
|
-- links
|
|
--
|
|
|
|
-- a reference label for a link
|
|
reference :: GenParser Char ParserState [Inline]
|
|
reference = do notFollowedBy' (string "[^") -- footnote reference
|
|
result <- inlinesInBalancedBrackets inlineNonLink
|
|
return $ normalizeSpaces result
|
|
|
|
-- source for a link, with optional title
|
|
source :: GenParser Char st (String, [Char])
|
|
source =
|
|
(try $ charsInBalanced '(' ')' >>= parseFromString source') <|>
|
|
-- the following is needed for cases like: [ref](/url(a).
|
|
(enclosed (char '(') (char ')') anyChar >>=
|
|
parseFromString source')
|
|
|
|
-- auxiliary function for source
|
|
source' :: GenParser Char st (String, [Char])
|
|
source' = do
|
|
skipSpaces
|
|
src <- try (char '<' >>
|
|
many (optional (char '\\') >> noneOf "> \t\n") >>~
|
|
char '>')
|
|
<|> many (optional (char '\\') >> noneOf " \t\n")
|
|
tit <- option "" linkTitle
|
|
skipSpaces
|
|
eof
|
|
return (removeTrailingSpace src, tit)
|
|
|
|
linkTitle :: GenParser Char st String
|
|
linkTitle = try $ do
|
|
(many1 spaceChar >> option '\n' newline) <|> newline
|
|
skipSpaces
|
|
delim <- oneOf "'\""
|
|
tit <- manyTill (optional (char '\\') >> anyChar)
|
|
(try (char delim >> skipSpaces >> eof))
|
|
return $ decodeCharacterReferences tit
|
|
|
|
link :: GenParser Char ParserState Inline
|
|
link = try $ do
|
|
lab <- reference
|
|
src <- source <|> referenceLink lab
|
|
sanitize <- getState >>= return . stateSanitizeHTML
|
|
if sanitize && unsanitaryURI (fst src)
|
|
then fail "Unsanitary URI"
|
|
else return $ Link lab src
|
|
|
|
-- a link like [this][ref] or [this][] or [this]
|
|
referenceLink :: [Inline]
|
|
-> GenParser Char ParserState (String, [Char])
|
|
referenceLink lab = do
|
|
ref <- option [] (try (optional (char ' ') >>
|
|
optional (newline >> skipSpaces) >> reference))
|
|
let ref' = if null ref then lab else ref
|
|
state <- getState
|
|
case lookupKeySrc (stateKeys state) ref' of
|
|
Nothing -> fail "no corresponding key"
|
|
Just target -> return target
|
|
|
|
autoLink :: GenParser Char ParserState Inline
|
|
autoLink = try $ do
|
|
char '<'
|
|
src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
|
|
char '>'
|
|
let src' = if "mailto:" `isPrefixOf` src
|
|
then drop 7 src
|
|
else src
|
|
st <- getState
|
|
let sanitize = stateSanitizeHTML st
|
|
if sanitize && unsanitaryURI src
|
|
then fail "Unsanitary URI"
|
|
else return $ if stateStrict st
|
|
then Link [Str src'] (src, "")
|
|
else Link [Code src'] (src, "")
|
|
|
|
image :: GenParser Char ParserState Inline
|
|
image = try $ do
|
|
char '!'
|
|
(Link lab src) <- link
|
|
return $ Image lab src
|
|
|
|
note :: GenParser Char ParserState Inline
|
|
note = try $ do
|
|
failIfStrict
|
|
ref <- noteMarker
|
|
state <- getState
|
|
let notes = stateNotes state
|
|
case lookup ref notes of
|
|
Nothing -> fail "note not found"
|
|
Just contents -> return $ Note contents
|
|
|
|
inlineNote :: GenParser Char ParserState Inline
|
|
inlineNote = try $ do
|
|
failIfStrict
|
|
char '^'
|
|
contents <- inlinesInBalancedBrackets inline
|
|
return $ Note [Para contents]
|
|
|
|
rawLaTeXInline' :: GenParser Char ParserState Inline
|
|
rawLaTeXInline' = do
|
|
failIfStrict
|
|
(rawConTeXtEnvironment' >>= return . TeX)
|
|
<|> (rawLaTeXEnvironment' >>= return . TeX)
|
|
<|> rawLaTeXInline
|
|
|
|
rawConTeXtEnvironment' :: GenParser Char st String
|
|
rawConTeXtEnvironment' = try $ do
|
|
string "\\start"
|
|
completion <- inBrackets (letter <|> digit <|> spaceChar)
|
|
<|> (many1 letter)
|
|
contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
|
|
(try $ string "\\stop" >> string completion)
|
|
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
|
|
|
|
inBrackets :: (GenParser Char st Char) -> GenParser Char st String
|
|
inBrackets parser = do
|
|
char '['
|
|
contents <- many parser
|
|
char ']'
|
|
return $ "[" ++ contents ++ "]"
|
|
|
|
rawHtmlInline' :: GenParser Char ParserState Inline
|
|
rawHtmlInline' = do
|
|
st <- getState
|
|
result <- if stateStrict st
|
|
then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
|
|
else anyHtmlInlineTag
|
|
return $ HtmlInline result
|
|
|
|
#ifdef _CITEPROC
|
|
inlineCitation :: GenParser Char ParserState Inline
|
|
inlineCitation = try $ do
|
|
failIfStrict
|
|
cit <- citeMarker
|
|
let citations = readWith parseCitation defaultParserState cit
|
|
mr <- mapM chkCit citations
|
|
if catMaybes mr /= []
|
|
then return $ Cite citations []
|
|
else fail "no citation found"
|
|
|
|
chkCit :: Target -> GenParser Char ParserState (Maybe Target)
|
|
chkCit t = do
|
|
st <- getState
|
|
case lookupKeySrc (stateKeys st) [Str $ fst t] of
|
|
Just _ -> fail "This is a link"
|
|
Nothing -> if elem (fst t) $ stateCitations st
|
|
then return $ Just t
|
|
else return $ Nothing
|
|
|
|
citeMarker :: GenParser Char ParserState String
|
|
citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']')
|
|
|
|
parseCitation :: GenParser Char ParserState [(String,String)]
|
|
parseCitation = try $ sepBy (parseLabel) (oneOf ";")
|
|
|
|
parseLabel :: GenParser Char ParserState (String,String)
|
|
parseLabel = try $ do
|
|
res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@")
|
|
case res of
|
|
[lab,loc] -> return (lab, loc)
|
|
[lab] -> return (lab, "" )
|
|
_ -> return ("" , "" )
|
|
|
|
#endif
|