Be explicit about scheme_chars -- string.letters is locale dependent

so we can't use it. While I'm at it, got rid of string module use. (Found several new hard special cases for a hypothetical conversion tool: from string import join, find, rfind; and a local assignment "find=string.find".)
2025-11-25 04:34:37 +00:00 · 2000-12-19 16:48:13 +00:00 · 2000-12-19 16:48:13 +00:00 · fad81f0838
commit fad81f0838
parent 68abe832b9
1 changed files with 18 additions and 20 deletions
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
 UC Irvine, June 1995.
 """

-# Standard/builtin Python modules
-import string
-from string import join, split, rfind
-
 # A classification of schemes ('' means apply by default)
 uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
 		 'https', 'shttp',
@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
 		 'file', 'prospero', '']

 # Characters valid in scheme names
-scheme_chars = string.letters + string.digits + '+-.'
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+                'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                '0123456789'
+                '+-.')

 MAX_CACHE_SIZE = 20
 _parse_cache = {}
@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
 		return cached
 	if len(_parse_cache) >= MAX_CACHE_SIZE:	# avoid runaway growth
 		clear_cache()
-	find = string.find
 	netloc = path = params = query = fragment = ''
-	i = find(url, ':')
+	i = url.find(':')
 	if i > 0:
 		if url[:i] == 'http': # optimize the common case
-			scheme = string.lower(url[:i])
+			scheme = url[:i].lower()
 			url = url[i+1:]
 			if url[:2] == '//':
-				i = find(url, '/', 2)
+				i = url.find('/', 2)
 				if i < 0:
 					i = len(url)
 				netloc = url[2:i]
 				url = url[i:]
 			if allow_fragments:
-				i = string.rfind(url, '#')
+				i = url.rfind('#')
 				if i >= 0:
 					fragment = url[i+1:]
 					url = url[:i]
-			i = find(url, '?')
+			i = url.find('?')
 			if i >= 0:
 				query = url[i+1:]
 				url = url[:i]
-			i = find(url, ';')
+			i = url.find(';')
 			if i >= 0:
 				params = url[i+1:]
 				url = url[:i]
@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
 			if c not in scheme_chars:
 				break
 		else:
-			scheme, url = string.lower(url[:i]), url[i+1:]
+			scheme, url = url[:i].lower(), url[i+1:]
 	if scheme in uses_netloc:
 		if url[:2] == '//':
-			i = find(url, '/', 2)
+			i = url.find('/', 2)
 			if i < 0:
 				i = len(url)
 			netloc, url = url[2:i], url[i:]
 	if allow_fragments and scheme in uses_fragment:
-		i = string.rfind(url, '#')
+		i = url.rfind('#')
 		if i >= 0:
 			url, fragment = url[:i], url[i+1:]
 	if scheme in uses_query:
-		i = find(url, '?')
+		i = url.find('?')
 		if i >= 0:
 			url, query = url[:i], url[i+1:]
 	if scheme in uses_params:
-		i = find(url, ';')
+		i = url.find(';')
 		if i >= 0:
 			url, params = url[:i], url[i+1:]
 	tuple = scheme, netloc, url, params, query, fragment
@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
 	if not path:
 		return urlunparse((scheme, netloc, bpath,
 				   params, query or bquery, fragment))
-	segments = split(bpath, '/')[:-1] + split(path, '/')
+	segments = bpath.split('/')[:-1] + path.split('/')
 	# XXX The stuff below is bogus in various ways...
 	if segments[-1] == '.':
 		segments[-1] = ''
@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
 		segments[-1] = ''
 	elif len(segments) >= 2 and segments[-1] == '..':
 		segments[-2:] = ['']
-	return urlunparse((scheme, netloc, join(segments, '/'),
+	return urlunparse((scheme, netloc, '/'.join(segments),
 			   params, query, fragment))

 def urldefrag(url):
@ -236,7 +234,7 @@ def test():
 	while 1:
 		line = fp.readline()
 		if not line: break
-		words = string.split(line)
+		words = line.split()
 		if not words:
 			continue
 		url = words[0]