Fixed handling of email addresses in markdown and reStructuredText.
Consolidated uri and email address parsers. (Resolves Issue #37.) + New emailAddress and uri parsers in Text.Pandoc.Shared. uri parser uses parseURI from Network.URI. emailAddress parser properly handles email addresses with periods in them. + Removed uri and emailAddress parsers from Text.Pandoc.Readers.RST. + Removed uri and emailAddress parsers from Text.Pandoc.Readers.Markdown. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1149 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
246e5f9ea3
commit
0681d1d3e7
3 changed files with 36 additions and 40 deletions
|
@ -35,7 +35,6 @@ import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex
|
|||
import Data.Ord ( comparing )
|
||||
import Data.Char ( isAlphaNum )
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import Network.URI ( isURI )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Shared
|
||||
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
|
||||
|
@ -850,22 +849,9 @@ referenceLink label = do
|
|||
Nothing -> fail "no corresponding key"
|
||||
Just target -> return target
|
||||
|
||||
emailAddress = try $ do
|
||||
name <- many1 (alphaNum <|> char '+')
|
||||
char '@'
|
||||
first <- many1 alphaNum
|
||||
rest <- many1 (char '.' >> many1 alphaNum)
|
||||
return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
|
||||
|
||||
uri = try $ do
|
||||
str <- many1 (noneOf "\n\t >")
|
||||
if isURI str
|
||||
then return str
|
||||
else fail "not a URI"
|
||||
|
||||
autoLink = try $ do
|
||||
char '<'
|
||||
src <- uri <|> emailAddress
|
||||
src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
|
||||
char '>'
|
||||
let src' = if "mailto:" `isPrefixOf` src
|
||||
then drop 7 src
|
||||
|
|
|
@ -592,35 +592,10 @@ referenceLink = try $ do
|
|||
setState $ state { stateKeys = keyTable' }
|
||||
return $ Link (normalizeSpaces label) src
|
||||
|
||||
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
|
||||
"mailto:", "news:", "telnet:" ]
|
||||
|
||||
uri = try $ do
|
||||
scheme <- uriScheme
|
||||
identifier <- many1 (noneOf " \t\n")
|
||||
return $ scheme ++ identifier
|
||||
|
||||
autoURI = do
|
||||
src <- uri
|
||||
return $ Link [Str src] (src, "")
|
||||
|
||||
emailChar = alphaNum <|> oneOf "-+_."
|
||||
|
||||
emailAddress = try $ do
|
||||
firstLetter <- alphaNum
|
||||
restAddr <- many emailChar
|
||||
let addr = firstLetter:restAddr
|
||||
char '@'
|
||||
dom <- domain
|
||||
return $ addr ++ '@':dom
|
||||
|
||||
domainChar = alphaNum <|> char '-'
|
||||
|
||||
domain = do
|
||||
first <- many1 domainChar
|
||||
dom <- many1 (try (do{ char '.'; many1 domainChar }))
|
||||
return $ joinWithSep "." (first:dom)
|
||||
|
||||
autoEmail = do
|
||||
src <- emailAddress
|
||||
return $ Link [Str src] ("mailto:" ++ src, "")
|
||||
|
|
|
@ -64,6 +64,8 @@ module Text.Pandoc.Shared (
|
|||
charsInBalanced,
|
||||
charsInBalanced',
|
||||
romanNumeral,
|
||||
emailAddress,
|
||||
uri,
|
||||
withHorizDisplacement,
|
||||
nullBlock,
|
||||
failIfStrict,
|
||||
|
@ -105,6 +107,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
|
|||
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
|
||||
import Data.List ( find, isPrefixOf )
|
||||
import Control.Monad ( join )
|
||||
import Network.URI ( parseURI, URI (..), isAllowedInURI )
|
||||
|
||||
--
|
||||
-- List processing
|
||||
|
@ -404,6 +407,38 @@ romanNumeral upperCase = do
|
|||
then fail "not a roman numeral"
|
||||
else return total
|
||||
|
||||
-- Parsers for email addresses and URIs
|
||||
|
||||
emailChar = alphaNum <|> oneOf "-+_."
|
||||
|
||||
domainChar = alphaNum <|> char '-'
|
||||
|
||||
domain = do
|
||||
first <- many1 domainChar
|
||||
dom <- many1 (try (do{ char '.'; many1 domainChar }))
|
||||
return $ joinWithSep "." (first:dom)
|
||||
|
||||
-- | Parses an email address; returns string.
|
||||
emailAddress :: GenParser Char st [Char]
|
||||
emailAddress = try $ do
|
||||
firstLetter <- alphaNum
|
||||
restAddr <- many emailChar
|
||||
let addr = firstLetter:restAddr
|
||||
char '@'
|
||||
dom <- domain
|
||||
return $ addr ++ '@':dom
|
||||
|
||||
-- | Parses a URI.
|
||||
uri = try $ do
|
||||
str <- many1 $ satisfy isAllowedInURI
|
||||
case parseURI str of
|
||||
Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
|
||||
"file:", "mailto:",
|
||||
"news:", "telnet:" ]
|
||||
then return $ show uri'
|
||||
else fail "not a URI"
|
||||
Nothing -> fail "not a URI"
|
||||
|
||||
-- | Applies a parser, returns tuple of its results and its horizontal
|
||||
-- displacement (the difference between the source column at the end
|
||||
-- and the source column at the beginning). Vertical displacement
|
||||
|
|
Loading…
Reference in a new issue