Refactored markdown reader so that Text.Regex is not used.

Replaced email regex test with a custom email autolink parser
(autoLinkEmail).  Also replaced 'selfClosingTag' with a
custom function 'isSelfClosingTag'.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@511 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-01-24 19:44:43 +00:00
parent 8f0cfe9bd0
commit 890fbe97ec

View file

@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown readMarkdown
) where ) where
import Data.List ( findIndex, sortBy, transpose ) import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect )
import Data.Char ( isAlphaNum ) import Data.Char ( isAlphaNum )
import Text.ParserCombinators.Pandoc import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition import Text.Pandoc.Definition
@ -43,7 +43,6 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
htmlEndTag, extractTagType, htmlEndTag, extractTagType,
htmlBlockElement ) htmlBlockElement )
import Text.Pandoc.Entities ( decodeEntities ) import Text.Pandoc.Entities ( decodeEntities )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document. -- | Read markdown from an input string and return a Pandoc document.
@ -474,14 +473,13 @@ htmlBlock = do
else rawHtmlBlocks else rawHtmlBlocks
-- True if tag is self-closing -- True if tag is self-closing
selfClosing tag = case (matchRegex (mkRegex "\\/[[:space:]]*>$") tag) of isSelfClosing tag =
Just _ -> True isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag
Nothing -> False
strictHtmlBlock = try (do strictHtmlBlock = try (do
tag <- anyHtmlBlockTag tag <- anyHtmlBlockTag
let tag' = extractTagType tag let tag' = extractTagType tag
if selfClosing tag || tag' == "hr" if isSelfClosing tag || tag' == "hr"
then return tag then return tag
else do else do
contents <- many (do{notFollowedBy' (htmlEndTag tag'); contents <- many (do{notFollowedBy' (htmlEndTag tag');
@ -930,17 +928,24 @@ referenceLinkSingle = try (do
else fail "no corresponding key" else fail "no corresponding key"
return (Link label (Ref label))) return (Link label (Ref label)))
-- a link <like.this.com> autoLink = do
autoLink = try (do
notFollowedBy' (anyHtmlTag <|> anyHtmlEndTag) notFollowedBy' (anyHtmlTag <|> anyHtmlEndTag)
autoLinkEmail <|> autoLinkRegular
-- a link <like@this.com>
autoLinkEmail = try $ do
char autoLinkStart
name <- many1Till (noneOf "/:<> \t\n") (char '@')
domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.')
let src = name ++ "@" ++ (joinWithSep "." domain)
char autoLinkEnd
return $ Link [Str src] (Src ("mailto:" ++ src) "")
-- a link <like.this.com>
autoLinkRegular = try $ do
src <- between (char autoLinkStart) (char autoLinkEnd) src <- between (char autoLinkStart) (char autoLinkEnd)
(many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
case (matchRegex emailAddress src) of return $ Link [Str src] (Src src "")
Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) ""))
Nothing -> return (Link [Str src] (Src src "")))
emailAddress =
mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace
image = try (do image = try (do
char imageStart char imageStart