Shared: Provide custom isURI that rejects unknown schemes [isURI]
We also export the set of known `schemes`. The new function replaces the function of the same name from `Network.URI`, as the latter did not check whether a scheme is well-known. E.g. MediaWiki wikis frequently feature pages with names like `User:John`. These links were interpreted as URIs, thus turning internal links into global links. This is prevented by also checking whether the scheme of a URI is frequently used (i.e. is IANA registered or an otherwise well-known scheme). Fixes: #2713 Update set of well-known URIs from IANA list All official IANA schemes (as of 2017-05-22) are included in the set of known schemes. The four non-official schemes doi, isbn, javascript, and pmid are kept.
This commit is contained in:
parent
4d1e9b8e41
commit
5debb0da0f
16 changed files with 81 additions and 48 deletions
|
@ -57,7 +57,7 @@ import Data.Maybe (fromMaybe, isJust, isNothing)
|
|||
import qualified Data.Text as T
|
||||
import Data.Yaml (decode)
|
||||
import qualified Data.Yaml as Yaml
|
||||
import Network.URI (URI (..), isURI, parseURI)
|
||||
import Network.URI (URI (..), parseURI)
|
||||
import Paths_pandoc (getDataDir)
|
||||
import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme)
|
||||
import Skylighting.Parser (missingIncludes, parseSyntaxDefinition,
|
||||
|
@ -80,7 +80,7 @@ import Text.Pandoc.Lua ( runLuaFilter )
|
|||
import Text.Pandoc.PDF (makePDF)
|
||||
import Text.Pandoc.Process (pipeProcess)
|
||||
import Text.Pandoc.SelfContained (makeSelfContained, makeDataURI)
|
||||
import Text.Pandoc.Shared (headerShift, openURL, readDataFile,
|
||||
import Text.Pandoc.Shared (isURI, headerShift, openURL, readDataFile,
|
||||
readDataFileUTF8, safeRead, tabFilter)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.XML (toEntities)
|
||||
|
|
|
@ -465,33 +465,8 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
|
|||
sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
|
||||
|
||||
|
||||
-- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
|
||||
-- the unofficial schemes coap, doi, javascript, isbn, pmid
|
||||
schemes :: [String]
|
||||
schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
|
||||
"crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
|
||||
"h323","http","https","iax","icap","im","imap","info","ipp","iris",
|
||||
"iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
|
||||
"msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
|
||||
"opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
|
||||
"sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
|
||||
"tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
|
||||
"xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
|
||||
"adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
|
||||
"bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
|
||||
"content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
|
||||
"ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
|
||||
"gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
|
||||
"keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
|
||||
"ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
|
||||
"platform","proxy","psyc","query","res","resource","rmi","rsync",
|
||||
"rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
|
||||
"ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
|
||||
"ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
|
||||
"ymsgr", "isbn", "pmid"]
|
||||
|
||||
uriScheme :: Stream s m Char => ParserT s st m String
|
||||
uriScheme = oneOfStringsCI schemes
|
||||
uriScheme = oneOfStringsCI (Set.toList schemes)
|
||||
|
||||
-- | Parses a URI. Returns pair of original and URI-escaped version.
|
||||
uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
|
||||
|
|
|
@ -42,7 +42,6 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Parsing hiding (macro, space, spaces, uri)
|
||||
import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
|
||||
--import Network.URI (isURI) -- Not sure whether to use this function
|
||||
import Control.Monad (guard, void, when)
|
||||
import Control.Monad.Reader (Reader, asks, runReader)
|
||||
import Data.Default
|
||||
|
|
|
@ -42,7 +42,7 @@ import qualified Data.ByteString.Char8 as B
|
|||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.Char (isAlphaNum, isAscii, toLower)
|
||||
import Data.List (isPrefixOf)
|
||||
import Network.URI (URI (..), escapeURIString, isURI, parseURI)
|
||||
import Network.URI (URI (..), escapeURIString, parseURI)
|
||||
import System.FilePath (takeDirectory, takeExtension, (</>))
|
||||
import Text.HTML.TagSoup
|
||||
import Text.Pandoc.Class (PandocMonad (..), fetchItem, report)
|
||||
|
@ -50,7 +50,7 @@ import Text.Pandoc.Error
|
|||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.MIME (MimeType)
|
||||
import Text.Pandoc.Options (WriterOptions (..))
|
||||
import Text.Pandoc.Shared (renderTags', trim)
|
||||
import Text.Pandoc.Shared (isURI, renderTags', trim)
|
||||
import Text.Pandoc.UTF8 (toString)
|
||||
import Text.Parsec (ParsecT, runParserT)
|
||||
import qualified Text.Parsec as P
|
||||
|
|
|
@ -81,6 +81,9 @@ module Text.Pandoc.Shared (
|
|||
openURL,
|
||||
collapseFilePath,
|
||||
filteredFilesFromArchive,
|
||||
-- * URI handling
|
||||
schemes,
|
||||
isURI,
|
||||
-- * Error handling
|
||||
mapLeft,
|
||||
-- * for squashing blocks
|
||||
|
@ -104,7 +107,7 @@ import Data.List ( find, stripPrefix, intercalate )
|
|||
import Data.Maybe (mapMaybe)
|
||||
import Data.Version ( showVersion )
|
||||
import qualified Data.Map as M
|
||||
import Network.URI ( escapeURIString, unEscapeString )
|
||||
import Network.URI ( URI(uriScheme), escapeURIString, unEscapeString, parseURI )
|
||||
import qualified Data.Set as Set
|
||||
import System.Directory
|
||||
import System.FilePath (splitDirectories, isPathSeparator)
|
||||
|
@ -774,6 +777,70 @@ filteredFilesFromArchive zf f =
|
|||
fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
|
||||
fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
|
||||
|
||||
|
||||
--
|
||||
-- IANA URIs
|
||||
--
|
||||
|
||||
-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
|
||||
-- the unofficial schemes doi, javascript, isbn, pmid.
|
||||
schemes :: Set.Set String
|
||||
schemes = Set.fromList
|
||||
-- Official IANA schemes
|
||||
[ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
|
||||
, "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
|
||||
, "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
|
||||
, "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
|
||||
, "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
|
||||
, "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
|
||||
, "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
|
||||
, "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
|
||||
, "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
|
||||
, "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
|
||||
, "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
|
||||
, "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
|
||||
, "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
|
||||
, "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
|
||||
, "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
|
||||
, "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
|
||||
, "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
|
||||
, "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
|
||||
, "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
|
||||
, "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
|
||||
, "ms-settings-cloudstorage", "ms-settings-connectabledevices"
|
||||
, "ms-settings-displays-topology", "ms-settings-emailandaccounts"
|
||||
, "ms-settings-language", "ms-settings-location", "ms-settings-lock"
|
||||
, "ms-settings-nfctransactions", "ms-settings-notifications"
|
||||
, "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
|
||||
, "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
|
||||
, "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
|
||||
, "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
|
||||
, "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
|
||||
, "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
|
||||
, "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
|
||||
, "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
|
||||
, "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
|
||||
, "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
|
||||
, "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
|
||||
, "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
|
||||
, "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
|
||||
, "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
|
||||
, "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
|
||||
, "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
|
||||
, "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
|
||||
, "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
|
||||
, "z39.50s"
|
||||
-- Inofficial schemes
|
||||
, "doi", "isbn", "javascript", "pmid"
|
||||
]
|
||||
|
||||
-- | Check if the string is a valid URL with a IANA or frequently used but
|
||||
-- unofficial scheme (see @schemes@).
|
||||
isURI :: String -> Bool
|
||||
isURI = maybe False hasKnownScheme . parseURI
|
||||
where
|
||||
hasKnownScheme = (`Set.member` schemes) . filter (/= ':') . uriScheme
|
||||
|
||||
---
|
||||
--- Squash blocks into inlines
|
||||
---
|
||||
|
|
|
@ -33,7 +33,7 @@ import Control.Monad.State
|
|||
import Data.Char (ord)
|
||||
import Data.List (intercalate, intersperse)
|
||||
import Data.Maybe (catMaybes)
|
||||
import Network.URI (isURI, unEscapeString)
|
||||
import Network.URI (unEscapeString)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Definition
|
||||
|
|
|
@ -44,13 +44,12 @@ import Control.Monad.Reader (ReaderT, ask, local, runReaderT)
|
|||
import Control.Monad.State (StateT, evalStateT, gets, modify)
|
||||
import Data.Default (Default (..))
|
||||
import Data.List (intercalate, intersect, isPrefixOf, transpose)
|
||||
import Network.URI (isURI)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.ImageSize
|
||||
import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText))
|
||||
import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, linesToPara,
|
||||
import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara,
|
||||
removeFormatting, substitute, trimr)
|
||||
import Text.Pandoc.Templates (renderTemplate')
|
||||
import Text.Pandoc.Writers.Shared (defField, metaToJSON)
|
||||
|
|
|
@ -46,7 +46,6 @@ import Data.Char (isAscii, isControl, isSpace, toLower)
|
|||
import Data.Either (lefts, rights)
|
||||
import Data.List (intercalate, intersperse, isPrefixOf, stripPrefix)
|
||||
import Network.HTTP (urlEncode)
|
||||
import Network.URI (isURI)
|
||||
import Text.XML.Light
|
||||
import qualified Text.XML.Light as X
|
||||
import qualified Text.XML.Light.Cursor as XC
|
||||
|
@ -57,7 +56,7 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
|
||||
import Text.Pandoc.Shared (capitalize, isHeaderBlock, linesToPara,
|
||||
import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, linesToPara,
|
||||
orderedListMarkers)
|
||||
|
||||
-- | Data to be written at the end of the document:
|
||||
|
|
|
@ -36,7 +36,6 @@ module Text.Pandoc.Writers.Haddock (writeHaddock) where
|
|||
import Control.Monad.State
|
||||
import Data.Default
|
||||
import Data.List (intersperse, transpose)
|
||||
import Network.URI (isURI)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Logging
|
||||
|
|
|
@ -21,7 +21,6 @@ import Control.Monad.State
|
|||
import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix)
|
||||
import qualified Data.Set as Set
|
||||
import Data.Text as Text (breakOnAll, pack)
|
||||
import Network.URI (isURI)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import qualified Text.Pandoc.Class as P
|
||||
import Text.Pandoc.Definition
|
||||
|
@ -29,7 +28,7 @@ import Text.Pandoc.ImageSize
|
|||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Pretty
|
||||
import Text.Pandoc.Shared (linesToPara, splitBy)
|
||||
import Text.Pandoc.Shared (isURI, linesToPara, splitBy)
|
||||
import Text.Pandoc.Templates (renderTemplate')
|
||||
import Text.Pandoc.Writers.Math (texMathToInlines)
|
||||
import Text.Pandoc.Writers.Shared
|
||||
|
|
|
@ -43,7 +43,7 @@ import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy,
|
|||
stripPrefix, (\\))
|
||||
import Data.Maybe (catMaybes, fromMaybe, isJust)
|
||||
import qualified Data.Text as T
|
||||
import Network.URI (isURI, unEscapeString)
|
||||
import Network.URI (unEscapeString)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
|
||||
|
|
|
@ -49,7 +49,6 @@ import qualified Data.Text as T
|
|||
import qualified Data.Vector as V
|
||||
import Data.Yaml (Value (Array, Bool, Number, Object, String))
|
||||
import Network.HTTP (urlEncode)
|
||||
import Network.URI (isURI)
|
||||
import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Definition
|
||||
|
|
|
@ -34,7 +34,6 @@ import Control.Monad.Reader
|
|||
import Control.Monad.State
|
||||
import Data.List (intercalate)
|
||||
import qualified Data.Set as Set
|
||||
import Network.URI (isURI)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Definition
|
||||
|
|
|
@ -35,7 +35,6 @@ import Control.Monad.State
|
|||
import Data.Char (isSpace, toLower)
|
||||
import Data.List (isPrefixOf, stripPrefix)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Network.URI (isURI)
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
|
|
|
@ -37,7 +37,7 @@ import Data.Char (chr, ord)
|
|||
import Data.List (maximumBy, transpose)
|
||||
import Data.Ord (comparing)
|
||||
import qualified Data.Set as Set
|
||||
import Network.URI (isURI, unEscapeString)
|
||||
import Network.URI (unEscapeString)
|
||||
import System.FilePath
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Definition
|
||||
|
|
|
@ -38,14 +38,13 @@ import Data.Default (Default (..))
|
|||
import Data.List (intercalate, isInfixOf, isPrefixOf, transpose)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Text (breakOnAll, pack)
|
||||
import Network.URI (isURI)
|
||||
import Text.Pandoc.Class (PandocMonad, report)
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.ImageSize
|
||||
import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText))
|
||||
import Text.Pandoc.Shared (escapeURI, linesToPara, removeFormatting, substitute,
|
||||
trimr)
|
||||
import Text.Pandoc.Shared (isURI, escapeURI, linesToPara, removeFormatting,
|
||||
substitute, trimr)
|
||||
import Text.Pandoc.Templates (renderTemplate')
|
||||
import Text.Pandoc.Writers.Shared (defField, metaToJSON)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue