Fixed handling of email addresses in markdown and reStructuredText.

Consolidated uri and email address parsers.  (Resolves Issue #37.)
+ New emailAddress and uri parsers in Text.Pandoc.Shared.
  uri parser uses parseURI from Network.URI.  emailAddress
  parser properly handles email addresses with periods in them.
+ Removed uri and emailAddress parsers from Text.Pandoc.Readers.RST.
+ Removed uri and emailAddress parsers from Text.Pandoc.Readers.Markdown.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1149 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-12-21 16:13:10 +00:00
parent 246e5f9ea3
commit 0681d1d3e7
3 changed files with 36 additions and 40 deletions

View file

@ -35,7 +35,6 @@ import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe ( fromMaybe )
import Network.URI ( isURI )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
@ -850,22 +849,9 @@ referenceLink label = do
Nothing -> fail "no corresponding key"
Just target -> return target
emailAddress = try $ do
name <- many1 (alphaNum <|> char '+')
char '@'
first <- many1 alphaNum
rest <- many1 (char '.' >> many1 alphaNum)
return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
uri = try $ do
str <- many1 (noneOf "\n\t >")
if isURI str
then return str
else fail "not a URI"
autoLink = try $ do
char '<'
src <- uri <|> emailAddress
src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
char '>'
let src' = if "mailto:" `isPrefixOf` src
then drop 7 src

View file

@ -592,35 +592,10 @@ referenceLink = try $ do
setState $ state { stateKeys = keyTable' }
return $ Link (normalizeSpaces label) src
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
"mailto:", "news:", "telnet:" ]
uri = try $ do
scheme <- uriScheme
identifier <- many1 (noneOf " \t\n")
return $ scheme ++ identifier
autoURI = do
src <- uri
return $ Link [Str src] (src, "")
emailChar = alphaNum <|> oneOf "-+_."
emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
let addr = firstLetter:restAddr
char '@'
dom <- domain
return $ addr ++ '@':dom
domainChar = alphaNum <|> char '-'
domain = do
first <- many1 domainChar
dom <- many1 (try (do{ char '.'; many1 domainChar }))
return $ joinWithSep "." (first:dom)
autoEmail = do
src <- emailAddress
return $ Link [Str src] ("mailto:" ++ src, "")

View file

@ -64,6 +64,8 @@ module Text.Pandoc.Shared (
charsInBalanced,
charsInBalanced',
romanNumeral,
emailAddress,
uri,
withHorizDisplacement,
nullBlock,
failIfStrict,
@ -105,6 +107,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
import Data.List ( find, isPrefixOf )
import Control.Monad ( join )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
--
-- List processing
@ -404,6 +407,38 @@ romanNumeral upperCase = do
then fail "not a roman numeral"
else return total
-- Parsers for email addresses and URIs
emailChar = alphaNum <|> oneOf "-+_."
domainChar = alphaNum <|> char '-'
domain = do
first <- many1 domainChar
dom <- many1 (try (do{ char '.'; many1 domainChar }))
return $ joinWithSep "." (first:dom)
-- | Parses an email address; returns string.
emailAddress :: GenParser Char st [Char]
emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
let addr = firstLetter:restAddr
char '@'
dom <- domain
return $ addr ++ '@':dom
-- | Parses a URI.
uri = try $ do
str <- many1 $ satisfy isAllowedInURI
case parseURI str of
Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
"file:", "mailto:",
"news:", "telnet:" ]
then return $ show uri'
else fail "not a URI"
Nothing -> fail "not a URI"
-- | Applies a parser, returns tuple of its results and its horizontal
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement