diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index be34bc823..86797dcf2 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -44,7 +44,6 @@ module Text.Pandoc.Shared ( camelCaseToHyphenated, toRomanNumeral, escapeURI, - unescapeURI, tabFilter, -- * Pandoc block and inline list processing orderedListMarkers, @@ -73,11 +72,10 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (readFile) -import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, - isLetter, isDigit ) +import Data.Char ( toLower, isLower, isUpper, isAlpha, + isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) -import Codec.Binary.UTF8.String ( encodeString, decodeString ) +import Network.URI ( escapeURIString ) import System.Directory import System.FilePath ( () ) import Data.Generics (Typeable, Data) @@ -181,16 +179,9 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape unicode characters in a URI. Characters that are --- already valid in a URI, including % and ?, are left alone. +-- | Escape whitespace in URI. escapeURI :: String -> String -escapeURI = escapeURIString isAllowedInURI . encodeString - --- | Unescape unicode and some special characters in a URI, but --- without introducing spaces. -unescapeURI :: String -> String -unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . - decodeString . unEscapeString +escapeURI = escapeURIString (not . isSpace) -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. @@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) = fromStr (Str z) = z fromStr _ = error "consolidateInlines - fromStr - not a Str" consolidateInlines (Space : ys) = Space : rest - where isSpace Space = True - isSpace _ = False - rest = consolidateInlines $ dropWhile isSpace ys + where isSp Space = True + isSp _ = False + rest = consolidateInlines $ dropWhile isSp ys consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $ Emph (xs ++ ys) : zs consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $ diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index ab2b01656..f45c20e9e 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst -inlineToAsciiDoc opts (Link txt (src', _tit)) = do +inlineToAsciiDoc opts (Link txt (src, _tit)) = do -- relative: link:downloads/foo.zip[download foo.zip] -- abs: http://google.cod[Google] -- or my@email.com[email john] linktext <- inlineListToAsciiDoc opts txt - let src = unescapeURI src' let isRelative = ':' `notElem` src let prefix = if isRelative then text "link:" @@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do return $ if useAuto then text srcSuffix else prefix <> text src <> "[" <> linktext <> "]" -inlineToAsciiDoc opts (Image alternate (src', tit)) = do +inlineToAsciiDoc opts (Image alternate (src, tit)) = do -- image:images/logo.png[Company logo, title="blah"] let txt = if (null alternate) || (alternate == [Str ""]) then [Str "image"] @@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do let linktitle = if null tit then empty else text $ ",title=\"" ++ tit ++ "\"" - let src = unescapeURI src' return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]" inlineToAsciiDoc opts (Note [Para inlines]) = inlineToAsciiDoc opts (Note [Plain inlines]) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 42a59cc5f..70202294f 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown _ (Cite _ _) = return $ text "" -inlineToMarkdown opts (Link txt (src', tit)) = do +inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let src = unescapeURI src' let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src let useRefLinks = writerReferenceLinks opts let useAuto = case (tit,txt) of diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index f7f314428..6e0fb98e1 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do _ -> do contents <- inlineListToOrg txt modify $ \s -> s{ stLinks = True } return $ "[[" <> text src <> "][" <> contents <> "]]" -inlineToOrg (Image _ (source', _)) = do - let source = unescapeURI source' +inlineToOrg (Image _ (source, _)) = do modify $ \s -> s{ stImages = True } return $ "[[" <> text source <> "]]" inlineToOrg (Note contents) = do diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 4cf64c267..0f0479e16 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -298,9 +298,8 @@ inlineToRST Space = return space inlineToRST (Link [Code _ str] (src, _)) | src == str || src == "mailto:" ++ str = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ text $ unescapeURI srcSuffix -inlineToRST (Link txt (src', tit)) = do - let src = unescapeURI src' + return $ text srcSuffix +inlineToRST (Link txt (src, tit)) = do useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions linktext <- inlineListToRST $ normalizeSpaces txt if useReferenceLinks @@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do modify $ \st -> st { stLinks = refs' } return $ "`" <> linktext <> "`_" else return $ "`" <> linktext <> " <" <> text src <> ">`_" -inlineToRST (Image alternate (source', tit)) = do - let source = unescapeURI source' +inlineToRST (Image alternate (source, tit)) = do pics <- get >>= return . stImages let labelsUsed = map fst pics let txt = if null alternate || alternate == [Str ""] || diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index e5e079e9f..3b2221d7b 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -29,9 +29,9 @@ ,Para [Str "`",Str "hi"] ,Para [Str "there",Str "`"] ,Header 2 [Str "Multilingual",Space,Str "URLs"] -,Para [Link [Code ("",["url"],[]) "http://\27979.com?\27979=\27979"] ("http://%E6%B5%8B.com?%E6%B5%8B=%E6%B5%8B","")] -,Para [Link [Str "foo"] ("/bar/%E6%B5%8B?x=%E6%B5%8B","title")] -,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:%E6%B5%8B@foo.%E6%B5%8B.baz","")] +,Plain [RawInline "html" ""] +,Para [Link [Str "foo"] ("/bar/\27979?x=\27979","title")] +,Para [Link [Code ("",["url"],[]) "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")] ,Header 2 [Str "Numbered",Space,Str "examples"] ,OrderedList (1,Example,TwoParens) [[Plain [Str "First",Space,Str "example",Str "."]]