Shared: Removed unescapeURI, modified escapeURI.
escapeURI now only escapes space characters, leaving unicode characters as they are, instead of converting them to octets and URL-encoding them, as before. This gives more readable URIs. User agents now do the percent-encoding themselves. URIs are no longer unescaped at all on conversion to markdown, asciidoc, rst, org. Closes #349.
This commit is contained in:
parent
c6456ef8a4
commit
bdec07bac9
6 changed files with 18 additions and 33 deletions
|
@ -44,7 +44,6 @@ module Text.Pandoc.Shared (
|
|||
camelCaseToHyphenated,
|
||||
toRomanNumeral,
|
||||
escapeURI,
|
||||
unescapeURI,
|
||||
tabFilter,
|
||||
-- * Pandoc block and inline list processing
|
||||
orderedListMarkers,
|
||||
|
@ -73,11 +72,10 @@ module Text.Pandoc.Shared (
|
|||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Generic
|
||||
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
|
||||
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
|
||||
isLetter, isDigit )
|
||||
import Data.Char ( toLower, isLower, isUpper, isAlpha,
|
||||
isLetter, isDigit, isSpace )
|
||||
import Data.List ( find, isPrefixOf, intercalate )
|
||||
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
|
||||
import Codec.Binary.UTF8.String ( encodeString, decodeString )
|
||||
import Network.URI ( escapeURIString )
|
||||
import System.Directory
|
||||
import System.FilePath ( (</>) )
|
||||
import Data.Generics (Typeable, Data)
|
||||
|
@ -181,16 +179,9 @@ toRomanNumeral x =
|
|||
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
||||
_ -> ""
|
||||
|
||||
-- | Escape unicode characters in a URI. Characters that are
|
||||
-- already valid in a URI, including % and ?, are left alone.
|
||||
-- | Escape whitespace in URI.
|
||||
escapeURI :: String -> String
|
||||
escapeURI = escapeURIString isAllowedInURI . encodeString
|
||||
|
||||
-- | Unescape unicode and some special characters in a URI, but
|
||||
-- without introducing spaces.
|
||||
unescapeURI :: String -> String
|
||||
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
|
||||
decodeString . unEscapeString
|
||||
escapeURI = escapeURIString (not . isSpace)
|
||||
|
||||
-- | Convert tabs to spaces and filter out DOS line endings.
|
||||
-- Tabs will be preserved if tab stop is set to 0.
|
||||
|
@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) =
|
|||
fromStr (Str z) = z
|
||||
fromStr _ = error "consolidateInlines - fromStr - not a Str"
|
||||
consolidateInlines (Space : ys) = Space : rest
|
||||
where isSpace Space = True
|
||||
isSpace _ = False
|
||||
rest = consolidateInlines $ dropWhile isSpace ys
|
||||
where isSp Space = True
|
||||
isSp _ = False
|
||||
rest = consolidateInlines $ dropWhile isSp ys
|
||||
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
|
||||
Emph (xs ++ ys) : zs
|
||||
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
|
||||
|
|
|
@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty
|
|||
inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
|
||||
inlineToAsciiDoc _ Space = return space
|
||||
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
|
||||
inlineToAsciiDoc opts (Link txt (src', _tit)) = do
|
||||
inlineToAsciiDoc opts (Link txt (src, _tit)) = do
|
||||
-- relative: link:downloads/foo.zip[download foo.zip]
|
||||
-- abs: http://google.cod[Google]
|
||||
-- or my@email.com[email john]
|
||||
linktext <- inlineListToAsciiDoc opts txt
|
||||
let src = unescapeURI src'
|
||||
let isRelative = ':' `notElem` src
|
||||
let prefix = if isRelative
|
||||
then text "link:"
|
||||
|
@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do
|
|||
return $ if useAuto
|
||||
then text srcSuffix
|
||||
else prefix <> text src <> "[" <> linktext <> "]"
|
||||
inlineToAsciiDoc opts (Image alternate (src', tit)) = do
|
||||
inlineToAsciiDoc opts (Image alternate (src, tit)) = do
|
||||
-- image:images/logo.png[Company logo, title="blah"]
|
||||
let txt = if (null alternate) || (alternate == [Str ""])
|
||||
then [Str "image"]
|
||||
|
@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do
|
|||
let linktitle = if null tit
|
||||
then empty
|
||||
else text $ ",title=\"" ++ tit ++ "\""
|
||||
let src = unescapeURI src'
|
||||
return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
|
||||
inlineToAsciiDoc opts (Note [Para inlines]) =
|
||||
inlineToAsciiDoc opts (Note [Plain inlines])
|
||||
|
|
|
@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst)
|
|||
modekey SuppressAuthor = "-"
|
||||
modekey _ = ""
|
||||
inlineToMarkdown _ (Cite _ _) = return $ text ""
|
||||
inlineToMarkdown opts (Link txt (src', tit)) = do
|
||||
inlineToMarkdown opts (Link txt (src, tit)) = do
|
||||
linktext <- inlineListToMarkdown opts txt
|
||||
let linktitle = if null tit
|
||||
then empty
|
||||
else text $ " \"" ++ tit ++ "\""
|
||||
let src = unescapeURI src'
|
||||
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
|
||||
let useRefLinks = writerReferenceLinks opts
|
||||
let useAuto = case (tit,txt) of
|
||||
|
|
|
@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do
|
|||
_ -> do contents <- inlineListToOrg txt
|
||||
modify $ \s -> s{ stLinks = True }
|
||||
return $ "[[" <> text src <> "][" <> contents <> "]]"
|
||||
inlineToOrg (Image _ (source', _)) = do
|
||||
let source = unescapeURI source'
|
||||
inlineToOrg (Image _ (source, _)) = do
|
||||
modify $ \s -> s{ stImages = True }
|
||||
return $ "[[" <> text source <> "]]"
|
||||
inlineToOrg (Note contents) = do
|
||||
|
|
|
@ -298,9 +298,8 @@ inlineToRST Space = return space
|
|||
inlineToRST (Link [Code _ str] (src, _)) | src == str ||
|
||||
src == "mailto:" ++ str = do
|
||||
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
|
||||
return $ text $ unescapeURI srcSuffix
|
||||
inlineToRST (Link txt (src', tit)) = do
|
||||
let src = unescapeURI src'
|
||||
return $ text srcSuffix
|
||||
inlineToRST (Link txt (src, tit)) = do
|
||||
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
|
||||
linktext <- inlineListToRST $ normalizeSpaces txt
|
||||
if useReferenceLinks
|
||||
|
@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do
|
|||
modify $ \st -> st { stLinks = refs' }
|
||||
return $ "`" <> linktext <> "`_"
|
||||
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
|
||||
inlineToRST (Image alternate (source', tit)) = do
|
||||
let source = unescapeURI source'
|
||||
inlineToRST (Image alternate (source, tit)) = do
|
||||
pics <- get >>= return . stImages
|
||||
let labelsUsed = map fst pics
|
||||
let txt = if null alternate || alternate == [Str ""] ||
|
||||
|
|
|
@ -29,9 +29,9 @@
|
|||
,Para [Str "`",Str "hi"]
|
||||
,Para [Str "there",Str "`"]
|
||||
,Header 2 [Str "Multilingual",Space,Str "URLs"]
|
||||
,Para [Link [Code ("",["url"],[]) "http://\27979.com?\27979=\27979"] ("http://%E6%B5%8B.com?%E6%B5%8B=%E6%B5%8B","")]
|
||||
,Para [Link [Str "foo"] ("/bar/%E6%B5%8B?x=%E6%B5%8B","title")]
|
||||
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:%E6%B5%8B@foo.%E6%B5%8B.baz","")]
|
||||
,Plain [RawInline "html" "<http://\27979.com?\27979=\27979>"]
|
||||
,Para [Link [Str "foo"] ("/bar/\27979?x=\27979","title")]
|
||||
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
|
||||
,Header 2 [Str "Numbered",Space,Str "examples"]
|
||||
,OrderedList (1,Example,TwoParens)
|
||||
[[Plain [Str "First",Space,Str "example",Str "."]]
|
||||
|
|
Loading…
Reference in a new issue