Parsing: Fixed uri -- escape unicode URLs.

Otherwise Network.URI.parseURI fails on e.g. Chinese
URLs.  Changed an incorrect test in markdown-reader-more.
This commit is contained in:
John MacFarlane 2013-01-14 17:38:34 -08:00
parent 0ef24d1518
commit 51e0bd277a
2 changed files with 3 additions and 3 deletions

View file

@ -153,7 +153,7 @@ import Text.Parsec
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
import Network.URI ( parseURI, URI (..), isAllowedInURI, isUnescapedInURI, escapeURIString )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
@ -380,7 +380,7 @@ uri = try $ do
str <- liftM concat $ many1 $ inParens <|> count 1 (innerPunct <|> uriChar)
str' <- option str $ char '/' >> return (str ++ "/")
-- now see if they amount to an absolute URI
case parseURI (escapeURI str') of
case parseURI (escapeURIString isUnescapedInURI str') of
Just uri' -> if uriScheme uri' `elem` protocols
then return (str', show uri')
else fail "not a URI"

View file

@ -29,7 +29,7 @@
,Para [Str "`hi"]
,Para [Str "there`"]
,Header 2 ("multilingual-urls",[],[]) [Str "Multilingual",Space,Str "URLs"]
,Para [RawInline "html" "<http://\27979.com?\27979=\27979>"]
,Para [Link [Str "http://\27979.com?\27979=\27979"] ("http://%E6%B5%8B.com?%E6%B5%8B=%E6%B5%8B","")]
,Para [Link [Str "foo"] ("/bar/\27979?x=\27979","title")]
,Para [Link [Str "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
,Header 2 ("numbered-examples",[],[]) [Str "Numbered",Space,Str "examples"]