Shared: Removed unescapeURI, modified escapeURI.

escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before.  This gives more readable URIs.  User agents now do the
percent-encoding themselves.

URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.

Closes #349.
This commit is contained in:
John MacFarlane 2011-12-02 19:39:30 -08:00
parent c6456ef8a4
commit bdec07bac9
6 changed files with 18 additions and 33 deletions

View file

@ -44,7 +44,6 @@ module Text.Pandoc.Shared (
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
unescapeURI,
tabFilter,
-- * Pandoc block and inline list processing
orderedListMarkers,
@ -73,11 +72,10 @@ module Text.Pandoc.Shared (
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
isLetter, isDigit )
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
import Network.URI ( escapeURIString )
import System.Directory
import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data)
@ -181,16 +179,9 @@ toRomanNumeral x =
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
_ -> ""
-- | Escape unicode characters in a URI. Characters that are
-- already valid in a URI, including % and ?, are left alone.
-- | Escape whitespace in URI.
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString
-- | Unescape unicode and some special characters in a URI, but
-- without introducing spaces.
unescapeURI :: String -> String
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
decodeString . unEscapeString
escapeURI = escapeURIString (not . isSpace)
-- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0.
@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) =
fromStr (Str z) = z
fromStr _ = error "consolidateInlines - fromStr - not a Str"
consolidateInlines (Space : ys) = Space : rest
where isSpace Space = True
isSpace _ = False
rest = consolidateInlines $ dropWhile isSpace ys
where isSp Space = True
isSp _ = False
rest = consolidateInlines $ dropWhile isSp ys
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
Emph (xs ++ ys) : zs
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $

View file

@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty
inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
inlineToAsciiDoc _ Space = return space
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
inlineToAsciiDoc opts (Link txt (src', _tit)) = do
inlineToAsciiDoc opts (Link txt (src, _tit)) = do
-- relative: link:downloads/foo.zip[download foo.zip]
-- abs: http://google.cod[Google]
-- or my@email.com[email john]
linktext <- inlineListToAsciiDoc opts txt
let src = unescapeURI src'
let isRelative = ':' `notElem` src
let prefix = if isRelative
then text "link:"
@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do
return $ if useAuto
then text srcSuffix
else prefix <> text src <> "[" <> linktext <> "]"
inlineToAsciiDoc opts (Image alternate (src', tit)) = do
inlineToAsciiDoc opts (Image alternate (src, tit)) = do
-- image:images/logo.png[Company logo, title="blah"]
let txt = if (null alternate) || (alternate == [Str ""])
then [Str "image"]
@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do
let linktitle = if null tit
then empty
else text $ ",title=\"" ++ tit ++ "\""
let src = unescapeURI src'
return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
inlineToAsciiDoc opts (Note [Para inlines]) =
inlineToAsciiDoc opts (Note [Plain inlines])

View file

@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst)
modekey SuppressAuthor = "-"
modekey _ = ""
inlineToMarkdown _ (Cite _ _) = return $ text ""
inlineToMarkdown opts (Link txt (src', tit)) = do
inlineToMarkdown opts (Link txt (src, tit)) = do
linktext <- inlineListToMarkdown opts txt
let linktitle = if null tit
then empty
else text $ " \"" ++ tit ++ "\""
let src = unescapeURI src'
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
let useRefLinks = writerReferenceLinks opts
let useAuto = case (tit,txt) of

View file

@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do
_ -> do contents <- inlineListToOrg txt
modify $ \s -> s{ stLinks = True }
return $ "[[" <> text src <> "][" <> contents <> "]]"
inlineToOrg (Image _ (source', _)) = do
let source = unescapeURI source'
inlineToOrg (Image _ (source, _)) = do
modify $ \s -> s{ stImages = True }
return $ "[[" <> text source <> "]]"
inlineToOrg (Note contents) = do

View file

@ -298,9 +298,8 @@ inlineToRST Space = return space
inlineToRST (Link [Code _ str] (src, _)) | src == str ||
src == "mailto:" ++ str = do
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
return $ text $ unescapeURI srcSuffix
inlineToRST (Link txt (src', tit)) = do
let src = unescapeURI src'
return $ text srcSuffix
inlineToRST (Link txt (src, tit)) = do
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
linktext <- inlineListToRST $ normalizeSpaces txt
if useReferenceLinks
@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do
modify $ \st -> st { stLinks = refs' }
return $ "`" <> linktext <> "`_"
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
inlineToRST (Image alternate (source', tit)) = do
let source = unescapeURI source'
inlineToRST (Image alternate (source, tit)) = do
pics <- get >>= return . stImages
let labelsUsed = map fst pics
let txt = if null alternate || alternate == [Str ""] ||

View file

@ -29,9 +29,9 @@
,Para [Str "`",Str "hi"]
,Para [Str "there",Str "`"]
,Header 2 [Str "Multilingual",Space,Str "URLs"]
,Para [Link [Code ("",["url"],[]) "http://\27979.com?\27979=\27979"] ("http://%E6%B5%8B.com?%E6%B5%8B=%E6%B5%8B","")]
,Para [Link [Str "foo"] ("/bar/%E6%B5%8B?x=%E6%B5%8B","title")]
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:%E6%B5%8B@foo.%E6%B5%8B.baz","")]
,Plain [RawInline "html" "<http://\27979.com?\27979=\27979>"]
,Para [Link [Str "foo"] ("/bar/\27979?x=\27979","title")]
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
,Header 2 [Str "Numbered",Space,Str "examples"]
,OrderedList (1,Example,TwoParens)
[[Plain [Str "First",Space,Str "example",Str "."]]