diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs index d67778f08..3ca4e42bf 100644 --- a/src/Tests/Readers/RST.hs +++ b/src/Tests/Readers/RST.hs @@ -45,5 +45,13 @@ tests = [ "field list" =: , (str "Parameter i", [para "integer"]) , (str "Final", [para "item on two lines"]) ]) + , "URLs with following punctuation" =: + ("http://google.com, http://yahoo.com; http://foo.bar.baz.\n" ++ + "http://foo.bar/baz_(bam) (http://foo.bar)") =?> + para (link "http://google.com" "" "http://google.com" +++ ", " +++ + link "http://yahoo.com" "" "http://yahoo.com" +++ "; " +++ + link "http://foo.bar.baz" "" "http://foo.bar.baz" +++ ". " +++ + link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)" + +++ " (" +++ link "http://foo.bar" "" "http://foo.bar" +++ ")") ] diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 9ce064f91..187343f9c 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -78,7 +78,7 @@ import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) import Control.Monad ( join, liftM, guard ) @@ -264,8 +264,24 @@ uri = try $ do let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ] lookAhead $ oneOfStrings protocols - -- scan non-ascii characters and ascii characters allowed in a URI - str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) + -- Scan non-ascii characters and ascii characters allowed in a URI. + -- We allow punctuation except when followed by a space, since + -- we don't want the trailing '.' in 'http://google.com.' + let innerPunct = try $ satisfy isPunctuation >>~ + notFollowedBy (newline <|> spaceChar) + let uriChar = innerPunct <|> + satisfy (\c -> not (isPunctuation c) && + (not (isAscii c) || isAllowedInURI c)) + -- We want to allow + -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) + -- as a URL, while NOT picking up the closing paren in + -- (http://wikipedia.org) + -- So we include balanced parens in the URL. + let inParens = try $ do char '(' + res <- many uriChar + char ')' + return $ '(' : res ++ ")" + str <- liftM concat $ many1 $ inParens <|> count 1 (innerPunct <|> uriChar) -- now see if they amount to an absolute URI case parseURI (escapeURI str) of Just uri' -> if uriScheme uri' `elem` protocols diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 2cfab0f53..736219d68 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[-.\"'\8216\8217\8220\8221" +specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221" -- -- parsing documents diff --git a/tests/rst-reader.native b/tests/rst-reader.native index a88d499b0..3e521cb7c 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -222,7 +222,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ": ,Para [Str "But",Space,Str "not",Space,Str "here",Str ":"] ,CodeBlock ("",[],[]) "http://example.com/" ,Header 1 [Str "Images"] -,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902)",Str ":"] +,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(",Str "1902",Str ")",Str ":"] ,Plain [Image [Str "image"] ("lalune.jpg","")] ,Plain [Image [Str "Voyage dans la Lune"] ("lalune.jpg","")] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "movie"] ("movie.jpg",""),Space,Str "icon",Str "."]