Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.
This commit is contained in:
John MacFarlane 2011-12-02 19:39:30 -08:00
parent c6456ef8a4
commit bdec07bac9
6 changed files with 18 additions and 33 deletions

View file

@ -44,7 +44,6 @@ module Text.Pandoc.Shared (
camelCaseToHyphenated, camelCaseToHyphenated,
toRomanNumeral, toRomanNumeral,
escapeURI, escapeURI,
unescapeURI,
tabFilter, tabFilter,
-- * Pandoc block and inline list processing -- * Pandoc block and inline list processing
orderedListMarkers, orderedListMarkers,
@ -73,11 +72,10 @@ module Text.Pandoc.Shared (
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Generic import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (readFile) import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit ) isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate ) import Data.List ( find, isPrefixOf, intercalate )
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) import Network.URI ( escapeURIString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
import System.Directory import System.Directory
import System.FilePath ( (</>) ) import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data) import Data.Generics (Typeable, Data)
@ -181,16 +179,9 @@ toRomanNumeral x =
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
_ -> "" _ -> ""
-- | Escape unicode characters in a URI. Characters that are -- | Escape whitespace in URI.
-- already valid in a URI, including % and ?, are left alone.
escapeURI :: String -> String escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString escapeURI = escapeURIString (not . isSpace)
-- | Unescape unicode and some special characters in a URI, but
-- without introducing spaces.
unescapeURI :: String -> String
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
decodeString . unEscapeString
-- | Convert tabs to spaces and filter out DOS line endings. -- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0. -- Tabs will be preserved if tab stop is set to 0.
@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) =
fromStr (Str z) = z fromStr (Str z) = z
fromStr _ = error "consolidateInlines - fromStr - not a Str" fromStr _ = error "consolidateInlines - fromStr - not a Str"
consolidateInlines (Space : ys) = Space : rest consolidateInlines (Space : ys) = Space : rest
where isSpace Space = True where isSp Space = True
isSpace _ = False isSp _ = False
rest = consolidateInlines $ dropWhile isSpace ys rest = consolidateInlines $ dropWhile isSp ys
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $ consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
Emph (xs ++ ys) : zs Emph (xs ++ ys) : zs
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $ consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $

View file

@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty
inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
inlineToAsciiDoc _ Space = return space inlineToAsciiDoc _ Space = return space
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
inlineToAsciiDoc opts (Link txt (src', _tit)) = do inlineToAsciiDoc opts (Link txt (src, _tit)) = do
-- relative: link:downloads/foo.zip[download foo.zip] -- relative: link:downloads/foo.zip[download foo.zip]
-- abs: http://google.cod[Google] -- abs: http://google.cod[Google]
-- or my@email.com[email john] -- or my@email.com[email john]
linktext <- inlineListToAsciiDoc opts txt linktext <- inlineListToAsciiDoc opts txt
let src = unescapeURI src'
let isRelative = ':' `notElem` src let isRelative = ':' `notElem` src
let prefix = if isRelative let prefix = if isRelative
then text "link:" then text "link:"
@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do
return $ if useAuto return $ if useAuto
then text srcSuffix then text srcSuffix
else prefix <> text src <> "[" <> linktext <> "]" else prefix <> text src <> "[" <> linktext <> "]"
inlineToAsciiDoc opts (Image alternate (src', tit)) = do inlineToAsciiDoc opts (Image alternate (src, tit)) = do
-- image:images/logo.png[Company logo, title="blah"] -- image:images/logo.png[Company logo, title="blah"]
let txt = if (null alternate) || (alternate == [Str ""]) let txt = if (null alternate) || (alternate == [Str ""])
then [Str "image"] then [Str "image"]
@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do
let linktitle = if null tit let linktitle = if null tit
then empty then empty
else text $ ",title=\"" ++ tit ++ "\"" else text $ ",title=\"" ++ tit ++ "\""
let src = unescapeURI src'
return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]" return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
inlineToAsciiDoc opts (Note [Para inlines]) = inlineToAsciiDoc opts (Note [Para inlines]) =
inlineToAsciiDoc opts (Note [Plain inlines]) inlineToAsciiDoc opts (Note [Plain inlines])

View file

@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst)
modekey SuppressAuthor = "-" modekey SuppressAuthor = "-"
modekey _ = "" modekey _ = ""
inlineToMarkdown _ (Cite _ _) = return $ text "" inlineToMarkdown _ (Cite _ _) = return $ text ""
inlineToMarkdown opts (Link txt (src', tit)) = do inlineToMarkdown opts (Link txt (src, tit)) = do
linktext <- inlineListToMarkdown opts txt linktext <- inlineListToMarkdown opts txt
let linktitle = if null tit let linktitle = if null tit
then empty then empty
else text $ " \"" ++ tit ++ "\"" else text $ " \"" ++ tit ++ "\""
let src = unescapeURI src'
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
let useRefLinks = writerReferenceLinks opts let useRefLinks = writerReferenceLinks opts
let useAuto = case (tit,txt) of let useAuto = case (tit,txt) of

View file

@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do
_ -> do contents <- inlineListToOrg txt _ -> do contents <- inlineListToOrg txt
modify $ \s -> s{ stLinks = True } modify $ \s -> s{ stLinks = True }
return $ "[[" <> text src <> "][" <> contents <> "]]" return $ "[[" <> text src <> "][" <> contents <> "]]"
inlineToOrg (Image _ (source', _)) = do inlineToOrg (Image _ (source, _)) = do
let source = unescapeURI source'
modify $ \s -> s{ stImages = True } modify $ \s -> s{ stImages = True }
return $ "[[" <> text source <> "]]" return $ "[[" <> text source <> "]]"
inlineToOrg (Note contents) = do inlineToOrg (Note contents) = do

View file

@ -298,9 +298,8 @@ inlineToRST Space = return space
inlineToRST (Link [Code _ str] (src, _)) | src == str || inlineToRST (Link [Code _ str] (src, _)) | src == str ||
src == "mailto:" ++ str = do src == "mailto:" ++ str = do
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
return $ text $ unescapeURI srcSuffix return $ text srcSuffix
inlineToRST (Link txt (src', tit)) = do inlineToRST (Link txt (src, tit)) = do
let src = unescapeURI src'
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
linktext <- inlineListToRST $ normalizeSpaces txt linktext <- inlineListToRST $ normalizeSpaces txt
if useReferenceLinks if useReferenceLinks
@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do
modify $ \st -> st { stLinks = refs' } modify $ \st -> st { stLinks = refs' }
return $ "`" <> linktext <> "`_" return $ "`" <> linktext <> "`_"
else return $ "`" <> linktext <> " <" <> text src <> ">`_" else return $ "`" <> linktext <> " <" <> text src <> ">`_"
inlineToRST (Image alternate (source', tit)) = do inlineToRST (Image alternate (source, tit)) = do
let source = unescapeURI source'
pics <- get >>= return . stImages pics <- get >>= return . stImages
let labelsUsed = map fst pics let labelsUsed = map fst pics
let txt = if null alternate || alternate == [Str ""] || let txt = if null alternate || alternate == [Str ""] ||

View file

@ -29,9 +29,9 @@
,Para [Str "`",Str "hi"] ,Para [Str "`",Str "hi"]
,Para [Str "there",Str "`"] ,Para [Str "there",Str "`"]
,Header 2 [Str "Multilingual",Space,Str "URLs"] ,Header 2 [Str "Multilingual",Space,Str "URLs"]
,Para [Link [Code ("",["url"],[]) "http://\27979.com?\27979=\27979"] ("http://%E6%B5%8B.com?%E6%B5%8B=%E6%B5%8B","")] ,Plain [RawInline "html" "<http://\27979.com?\27979=\27979>"]
,Para [Link [Str "foo"] ("/bar/%E6%B5%8B?x=%E6%B5%8B","title")] ,Para [Link [Str "foo"] ("/bar/\27979?x=\27979","title")]
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:%E6%B5%8B@foo.%E6%B5%8B.baz","")] ,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
,Header 2 [Str "Numbered",Space,Str "examples"] ,Header 2 [Str "Numbered",Space,Str "examples"]
,OrderedList (1,Example,TwoParens) ,OrderedList (1,Example,TwoParens)
[[Plain [Str "First",Space,Str "example",Str "."]] [[Plain [Str "First",Space,Str "example",Str "."]]