Shared: Removed unescapeURI, modified escapeURI.
escapeURI now only escapes space characters, leaving unicode characters as they are, instead of converting them to octets and URL-encoding them, as before. This gives more readable URIs. User agents now do the percent-encoding themselves. URIs are no longer unescaped at all on conversion to markdown, asciidoc, rst, org. Closes #349.
This commit is contained in:
parent
c6456ef8a4
commit
bdec07bac9
6 changed files with 18 additions and 33 deletions
|
@ -44,7 +44,6 @@ module Text.Pandoc.Shared (
|
||||||
camelCaseToHyphenated,
|
camelCaseToHyphenated,
|
||||||
toRomanNumeral,
|
toRomanNumeral,
|
||||||
escapeURI,
|
escapeURI,
|
||||||
unescapeURI,
|
|
||||||
tabFilter,
|
tabFilter,
|
||||||
-- * Pandoc block and inline list processing
|
-- * Pandoc block and inline list processing
|
||||||
orderedListMarkers,
|
orderedListMarkers,
|
||||||
|
@ -73,11 +72,10 @@ module Text.Pandoc.Shared (
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Generic
|
import Text.Pandoc.Generic
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
|
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
|
||||||
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
|
import Data.Char ( toLower, isLower, isUpper, isAlpha,
|
||||||
isLetter, isDigit )
|
isLetter, isDigit, isSpace )
|
||||||
import Data.List ( find, isPrefixOf, intercalate )
|
import Data.List ( find, isPrefixOf, intercalate )
|
||||||
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
|
import Network.URI ( escapeURIString )
|
||||||
import Codec.Binary.UTF8.String ( encodeString, decodeString )
|
|
||||||
import System.Directory
|
import System.Directory
|
||||||
import System.FilePath ( (</>) )
|
import System.FilePath ( (</>) )
|
||||||
import Data.Generics (Typeable, Data)
|
import Data.Generics (Typeable, Data)
|
||||||
|
@ -181,16 +179,9 @@ toRomanNumeral x =
|
||||||
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
|
||||||
_ -> ""
|
_ -> ""
|
||||||
|
|
||||||
-- | Escape unicode characters in a URI. Characters that are
|
-- | Escape whitespace in URI.
|
||||||
-- already valid in a URI, including % and ?, are left alone.
|
|
||||||
escapeURI :: String -> String
|
escapeURI :: String -> String
|
||||||
escapeURI = escapeURIString isAllowedInURI . encodeString
|
escapeURI = escapeURIString (not . isSpace)
|
||||||
|
|
||||||
-- | Unescape unicode and some special characters in a URI, but
|
|
||||||
-- without introducing spaces.
|
|
||||||
unescapeURI :: String -> String
|
|
||||||
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
|
|
||||||
decodeString . unEscapeString
|
|
||||||
|
|
||||||
-- | Convert tabs to spaces and filter out DOS line endings.
|
-- | Convert tabs to spaces and filter out DOS line endings.
|
||||||
-- Tabs will be preserved if tab stop is set to 0.
|
-- Tabs will be preserved if tab stop is set to 0.
|
||||||
|
@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) =
|
||||||
fromStr (Str z) = z
|
fromStr (Str z) = z
|
||||||
fromStr _ = error "consolidateInlines - fromStr - not a Str"
|
fromStr _ = error "consolidateInlines - fromStr - not a Str"
|
||||||
consolidateInlines (Space : ys) = Space : rest
|
consolidateInlines (Space : ys) = Space : rest
|
||||||
where isSpace Space = True
|
where isSp Space = True
|
||||||
isSpace _ = False
|
isSp _ = False
|
||||||
rest = consolidateInlines $ dropWhile isSpace ys
|
rest = consolidateInlines $ dropWhile isSp ys
|
||||||
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
|
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
|
||||||
Emph (xs ++ ys) : zs
|
Emph (xs ++ ys) : zs
|
||||||
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
|
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
|
||||||
|
|
|
@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty
|
||||||
inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
|
inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
|
||||||
inlineToAsciiDoc _ Space = return space
|
inlineToAsciiDoc _ Space = return space
|
||||||
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
|
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
|
||||||
inlineToAsciiDoc opts (Link txt (src', _tit)) = do
|
inlineToAsciiDoc opts (Link txt (src, _tit)) = do
|
||||||
-- relative: link:downloads/foo.zip[download foo.zip]
|
-- relative: link:downloads/foo.zip[download foo.zip]
|
||||||
-- abs: http://google.cod[Google]
|
-- abs: http://google.cod[Google]
|
||||||
-- or my@email.com[email john]
|
-- or my@email.com[email john]
|
||||||
linktext <- inlineListToAsciiDoc opts txt
|
linktext <- inlineListToAsciiDoc opts txt
|
||||||
let src = unescapeURI src'
|
|
||||||
let isRelative = ':' `notElem` src
|
let isRelative = ':' `notElem` src
|
||||||
let prefix = if isRelative
|
let prefix = if isRelative
|
||||||
then text "link:"
|
then text "link:"
|
||||||
|
@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do
|
||||||
return $ if useAuto
|
return $ if useAuto
|
||||||
then text srcSuffix
|
then text srcSuffix
|
||||||
else prefix <> text src <> "[" <> linktext <> "]"
|
else prefix <> text src <> "[" <> linktext <> "]"
|
||||||
inlineToAsciiDoc opts (Image alternate (src', tit)) = do
|
inlineToAsciiDoc opts (Image alternate (src, tit)) = do
|
||||||
-- image:images/logo.png[Company logo, title="blah"]
|
-- image:images/logo.png[Company logo, title="blah"]
|
||||||
let txt = if (null alternate) || (alternate == [Str ""])
|
let txt = if (null alternate) || (alternate == [Str ""])
|
||||||
then [Str "image"]
|
then [Str "image"]
|
||||||
|
@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do
|
||||||
let linktitle = if null tit
|
let linktitle = if null tit
|
||||||
then empty
|
then empty
|
||||||
else text $ ",title=\"" ++ tit ++ "\""
|
else text $ ",title=\"" ++ tit ++ "\""
|
||||||
let src = unescapeURI src'
|
|
||||||
return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
|
return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
|
||||||
inlineToAsciiDoc opts (Note [Para inlines]) =
|
inlineToAsciiDoc opts (Note [Para inlines]) =
|
||||||
inlineToAsciiDoc opts (Note [Plain inlines])
|
inlineToAsciiDoc opts (Note [Plain inlines])
|
||||||
|
|
|
@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst)
|
||||||
modekey SuppressAuthor = "-"
|
modekey SuppressAuthor = "-"
|
||||||
modekey _ = ""
|
modekey _ = ""
|
||||||
inlineToMarkdown _ (Cite _ _) = return $ text ""
|
inlineToMarkdown _ (Cite _ _) = return $ text ""
|
||||||
inlineToMarkdown opts (Link txt (src', tit)) = do
|
inlineToMarkdown opts (Link txt (src, tit)) = do
|
||||||
linktext <- inlineListToMarkdown opts txt
|
linktext <- inlineListToMarkdown opts txt
|
||||||
let linktitle = if null tit
|
let linktitle = if null tit
|
||||||
then empty
|
then empty
|
||||||
else text $ " \"" ++ tit ++ "\""
|
else text $ " \"" ++ tit ++ "\""
|
||||||
let src = unescapeURI src'
|
|
||||||
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
|
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
|
||||||
let useRefLinks = writerReferenceLinks opts
|
let useRefLinks = writerReferenceLinks opts
|
||||||
let useAuto = case (tit,txt) of
|
let useAuto = case (tit,txt) of
|
||||||
|
|
|
@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do
|
||||||
_ -> do contents <- inlineListToOrg txt
|
_ -> do contents <- inlineListToOrg txt
|
||||||
modify $ \s -> s{ stLinks = True }
|
modify $ \s -> s{ stLinks = True }
|
||||||
return $ "[[" <> text src <> "][" <> contents <> "]]"
|
return $ "[[" <> text src <> "][" <> contents <> "]]"
|
||||||
inlineToOrg (Image _ (source', _)) = do
|
inlineToOrg (Image _ (source, _)) = do
|
||||||
let source = unescapeURI source'
|
|
||||||
modify $ \s -> s{ stImages = True }
|
modify $ \s -> s{ stImages = True }
|
||||||
return $ "[[" <> text source <> "]]"
|
return $ "[[" <> text source <> "]]"
|
||||||
inlineToOrg (Note contents) = do
|
inlineToOrg (Note contents) = do
|
||||||
|
|
|
@ -298,9 +298,8 @@ inlineToRST Space = return space
|
||||||
inlineToRST (Link [Code _ str] (src, _)) | src == str ||
|
inlineToRST (Link [Code _ str] (src, _)) | src == str ||
|
||||||
src == "mailto:" ++ str = do
|
src == "mailto:" ++ str = do
|
||||||
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
|
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
|
||||||
return $ text $ unescapeURI srcSuffix
|
return $ text srcSuffix
|
||||||
inlineToRST (Link txt (src', tit)) = do
|
inlineToRST (Link txt (src, tit)) = do
|
||||||
let src = unescapeURI src'
|
|
||||||
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
|
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
|
||||||
linktext <- inlineListToRST $ normalizeSpaces txt
|
linktext <- inlineListToRST $ normalizeSpaces txt
|
||||||
if useReferenceLinks
|
if useReferenceLinks
|
||||||
|
@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do
|
||||||
modify $ \st -> st { stLinks = refs' }
|
modify $ \st -> st { stLinks = refs' }
|
||||||
return $ "`" <> linktext <> "`_"
|
return $ "`" <> linktext <> "`_"
|
||||||
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
|
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
|
||||||
inlineToRST (Image alternate (source', tit)) = do
|
inlineToRST (Image alternate (source, tit)) = do
|
||||||
let source = unescapeURI source'
|
|
||||||
pics <- get >>= return . stImages
|
pics <- get >>= return . stImages
|
||||||
let labelsUsed = map fst pics
|
let labelsUsed = map fst pics
|
||||||
let txt = if null alternate || alternate == [Str ""] ||
|
let txt = if null alternate || alternate == [Str ""] ||
|
||||||
|
|
|
@ -29,9 +29,9 @@
|
||||||
,Para [Str "`",Str "hi"]
|
,Para [Str "`",Str "hi"]
|
||||||
,Para [Str "there",Str "`"]
|
,Para [Str "there",Str "`"]
|
||||||
,Header 2 [Str "Multilingual",Space,Str "URLs"]
|
,Header 2 [Str "Multilingual",Space,Str "URLs"]
|
||||||
,Para [Link [Code ("",["url"],[]) "http://\27979.com?\27979=\27979"] ("http://%E6%B5%8B.com?%E6%B5%8B=%E6%B5%8B","")]
|
,Plain [RawInline "html" "<http://\27979.com?\27979=\27979>"]
|
||||||
,Para [Link [Str "foo"] ("/bar/%E6%B5%8B?x=%E6%B5%8B","title")]
|
,Para [Link [Str "foo"] ("/bar/\27979?x=\27979","title")]
|
||||||
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:%E6%B5%8B@foo.%E6%B5%8B.baz","")]
|
,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
|
||||||
,Header 2 [Str "Numbered",Space,Str "examples"]
|
,Header 2 [Str "Numbered",Space,Str "examples"]
|
||||||
,OrderedList (1,Example,TwoParens)
|
,OrderedList (1,Example,TwoParens)
|
||||||
[[Plain [Str "First",Space,Str "example",Str "."]]
|
[[Plain [Str "First",Space,Str "example",Str "."]]
|
||||||
|
|
Loading…
Add table
Reference in a new issue