Fixed handling of email addresses in markdown and reStructuredText.
Consolidated uri and email address parsers. (Resolves Issue #37.) + New emailAddress and uri parsers in Text.Pandoc.Shared. uri parser uses parseURI from Network.URI. emailAddress parser properly handles email addresses with periods in them. + Removed uri and emailAddress parsers from Text.Pandoc.Readers.RST. + Removed uri and emailAddress parsers from Text.Pandoc.Readers.Markdown. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1149 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
246e5f9ea3
commit
0681d1d3e7
3 changed files with 36 additions and 40 deletions
|
@ -35,7 +35,6 @@ import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex
|
||||||
import Data.Ord ( comparing )
|
import Data.Ord ( comparing )
|
||||||
import Data.Char ( isAlphaNum )
|
import Data.Char ( isAlphaNum )
|
||||||
import Data.Maybe ( fromMaybe )
|
import Data.Maybe ( fromMaybe )
|
||||||
import Network.URI ( isURI )
|
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Shared
|
import Text.Pandoc.Shared
|
||||||
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
|
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
|
||||||
|
@ -850,22 +849,9 @@ referenceLink label = do
|
||||||
Nothing -> fail "no corresponding key"
|
Nothing -> fail "no corresponding key"
|
||||||
Just target -> return target
|
Just target -> return target
|
||||||
|
|
||||||
emailAddress = try $ do
|
|
||||||
name <- many1 (alphaNum <|> char '+')
|
|
||||||
char '@'
|
|
||||||
first <- many1 alphaNum
|
|
||||||
rest <- many1 (char '.' >> many1 alphaNum)
|
|
||||||
return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
|
|
||||||
|
|
||||||
uri = try $ do
|
|
||||||
str <- many1 (noneOf "\n\t >")
|
|
||||||
if isURI str
|
|
||||||
then return str
|
|
||||||
else fail "not a URI"
|
|
||||||
|
|
||||||
autoLink = try $ do
|
autoLink = try $ do
|
||||||
char '<'
|
char '<'
|
||||||
src <- uri <|> emailAddress
|
src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
|
||||||
char '>'
|
char '>'
|
||||||
let src' = if "mailto:" `isPrefixOf` src
|
let src' = if "mailto:" `isPrefixOf` src
|
||||||
then drop 7 src
|
then drop 7 src
|
||||||
|
|
|
@ -592,35 +592,10 @@ referenceLink = try $ do
|
||||||
setState $ state { stateKeys = keyTable' }
|
setState $ state { stateKeys = keyTable' }
|
||||||
return $ Link (normalizeSpaces label) src
|
return $ Link (normalizeSpaces label) src
|
||||||
|
|
||||||
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
|
|
||||||
"mailto:", "news:", "telnet:" ]
|
|
||||||
|
|
||||||
uri = try $ do
|
|
||||||
scheme <- uriScheme
|
|
||||||
identifier <- many1 (noneOf " \t\n")
|
|
||||||
return $ scheme ++ identifier
|
|
||||||
|
|
||||||
autoURI = do
|
autoURI = do
|
||||||
src <- uri
|
src <- uri
|
||||||
return $ Link [Str src] (src, "")
|
return $ Link [Str src] (src, "")
|
||||||
|
|
||||||
emailChar = alphaNum <|> oneOf "-+_."
|
|
||||||
|
|
||||||
emailAddress = try $ do
|
|
||||||
firstLetter <- alphaNum
|
|
||||||
restAddr <- many emailChar
|
|
||||||
let addr = firstLetter:restAddr
|
|
||||||
char '@'
|
|
||||||
dom <- domain
|
|
||||||
return $ addr ++ '@':dom
|
|
||||||
|
|
||||||
domainChar = alphaNum <|> char '-'
|
|
||||||
|
|
||||||
domain = do
|
|
||||||
first <- many1 domainChar
|
|
||||||
dom <- many1 (try (do{ char '.'; many1 domainChar }))
|
|
||||||
return $ joinWithSep "." (first:dom)
|
|
||||||
|
|
||||||
autoEmail = do
|
autoEmail = do
|
||||||
src <- emailAddress
|
src <- emailAddress
|
||||||
return $ Link [Str src] ("mailto:" ++ src, "")
|
return $ Link [Str src] ("mailto:" ++ src, "")
|
||||||
|
|
|
@ -64,6 +64,8 @@ module Text.Pandoc.Shared (
|
||||||
charsInBalanced,
|
charsInBalanced,
|
||||||
charsInBalanced',
|
charsInBalanced',
|
||||||
romanNumeral,
|
romanNumeral,
|
||||||
|
emailAddress,
|
||||||
|
uri,
|
||||||
withHorizDisplacement,
|
withHorizDisplacement,
|
||||||
nullBlock,
|
nullBlock,
|
||||||
failIfStrict,
|
failIfStrict,
|
||||||
|
@ -105,6 +107,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
|
||||||
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
|
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
|
||||||
import Data.List ( find, isPrefixOf )
|
import Data.List ( find, isPrefixOf )
|
||||||
import Control.Monad ( join )
|
import Control.Monad ( join )
|
||||||
|
import Network.URI ( parseURI, URI (..), isAllowedInURI )
|
||||||
|
|
||||||
--
|
--
|
||||||
-- List processing
|
-- List processing
|
||||||
|
@ -404,6 +407,38 @@ romanNumeral upperCase = do
|
||||||
then fail "not a roman numeral"
|
then fail "not a roman numeral"
|
||||||
else return total
|
else return total
|
||||||
|
|
||||||
|
-- Parsers for email addresses and URIs
|
||||||
|
|
||||||
|
emailChar = alphaNum <|> oneOf "-+_."
|
||||||
|
|
||||||
|
domainChar = alphaNum <|> char '-'
|
||||||
|
|
||||||
|
domain = do
|
||||||
|
first <- many1 domainChar
|
||||||
|
dom <- many1 (try (do{ char '.'; many1 domainChar }))
|
||||||
|
return $ joinWithSep "." (first:dom)
|
||||||
|
|
||||||
|
-- | Parses an email address; returns string.
|
||||||
|
emailAddress :: GenParser Char st [Char]
|
||||||
|
emailAddress = try $ do
|
||||||
|
firstLetter <- alphaNum
|
||||||
|
restAddr <- many emailChar
|
||||||
|
let addr = firstLetter:restAddr
|
||||||
|
char '@'
|
||||||
|
dom <- domain
|
||||||
|
return $ addr ++ '@':dom
|
||||||
|
|
||||||
|
-- | Parses a URI.
|
||||||
|
uri = try $ do
|
||||||
|
str <- many1 $ satisfy isAllowedInURI
|
||||||
|
case parseURI str of
|
||||||
|
Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
|
||||||
|
"file:", "mailto:",
|
||||||
|
"news:", "telnet:" ]
|
||||||
|
then return $ show uri'
|
||||||
|
else fail "not a URI"
|
||||||
|
Nothing -> fail "not a URI"
|
||||||
|
|
||||||
-- | Applies a parser, returns tuple of its results and its horizontal
|
-- | Applies a parser, returns tuple of its results and its horizontal
|
||||||
-- displacement (the difference between the source column at the end
|
-- displacement (the difference between the source column at the end
|
||||||
-- and the source column at the beginning). Vertical displacement
|
-- and the source column at the beginning). Vertical displacement
|
||||||
|
|
Loading…
Reference in a new issue