mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Be explicit about scheme_chars -- string.letters is locale dependent
so we can't use it. While I'm at it, got rid of string module use. (Found several new hard special cases for a hypothetical conversion tool: from string import join, find, rfind; and a local assignment "find=string.find".)
This commit is contained in:
parent
68abe832b9
commit
fad81f0838
1 changed files with 18 additions and 20 deletions
|
@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
|
||||||
UC Irvine, June 1995.
|
UC Irvine, June 1995.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Standard/builtin Python modules
|
|
||||||
import string
|
|
||||||
from string import join, split, rfind
|
|
||||||
|
|
||||||
# A classification of schemes ('' means apply by default)
|
# A classification of schemes ('' means apply by default)
|
||||||
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
|
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
|
||||||
'https', 'shttp',
|
'https', 'shttp',
|
||||||
|
@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
|
||||||
'file', 'prospero', '']
|
'file', 'prospero', '']
|
||||||
|
|
||||||
# Characters valid in scheme names
|
# Characters valid in scheme names
|
||||||
scheme_chars = string.letters + string.digits + '+-.'
|
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
|
||||||
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||||
|
'0123456789'
|
||||||
|
'+-.')
|
||||||
|
|
||||||
MAX_CACHE_SIZE = 20
|
MAX_CACHE_SIZE = 20
|
||||||
_parse_cache = {}
|
_parse_cache = {}
|
||||||
|
@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
|
||||||
return cached
|
return cached
|
||||||
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
|
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
|
||||||
clear_cache()
|
clear_cache()
|
||||||
find = string.find
|
|
||||||
netloc = path = params = query = fragment = ''
|
netloc = path = params = query = fragment = ''
|
||||||
i = find(url, ':')
|
i = url.find(':')
|
||||||
if i > 0:
|
if i > 0:
|
||||||
if url[:i] == 'http': # optimize the common case
|
if url[:i] == 'http': # optimize the common case
|
||||||
scheme = string.lower(url[:i])
|
scheme = url[:i].lower()
|
||||||
url = url[i+1:]
|
url = url[i+1:]
|
||||||
if url[:2] == '//':
|
if url[:2] == '//':
|
||||||
i = find(url, '/', 2)
|
i = url.find('/', 2)
|
||||||
if i < 0:
|
if i < 0:
|
||||||
i = len(url)
|
i = len(url)
|
||||||
netloc = url[2:i]
|
netloc = url[2:i]
|
||||||
url = url[i:]
|
url = url[i:]
|
||||||
if allow_fragments:
|
if allow_fragments:
|
||||||
i = string.rfind(url, '#')
|
i = url.rfind('#')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
fragment = url[i+1:]
|
fragment = url[i+1:]
|
||||||
url = url[:i]
|
url = url[:i]
|
||||||
i = find(url, '?')
|
i = url.find('?')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
query = url[i+1:]
|
query = url[i+1:]
|
||||||
url = url[:i]
|
url = url[:i]
|
||||||
i = find(url, ';')
|
i = url.find(';')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
params = url[i+1:]
|
params = url[i+1:]
|
||||||
url = url[:i]
|
url = url[:i]
|
||||||
|
@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
|
||||||
if c not in scheme_chars:
|
if c not in scheme_chars:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
scheme, url = string.lower(url[:i]), url[i+1:]
|
scheme, url = url[:i].lower(), url[i+1:]
|
||||||
if scheme in uses_netloc:
|
if scheme in uses_netloc:
|
||||||
if url[:2] == '//':
|
if url[:2] == '//':
|
||||||
i = find(url, '/', 2)
|
i = url.find('/', 2)
|
||||||
if i < 0:
|
if i < 0:
|
||||||
i = len(url)
|
i = len(url)
|
||||||
netloc, url = url[2:i], url[i:]
|
netloc, url = url[2:i], url[i:]
|
||||||
if allow_fragments and scheme in uses_fragment:
|
if allow_fragments and scheme in uses_fragment:
|
||||||
i = string.rfind(url, '#')
|
i = url.rfind('#')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
url, fragment = url[:i], url[i+1:]
|
url, fragment = url[:i], url[i+1:]
|
||||||
if scheme in uses_query:
|
if scheme in uses_query:
|
||||||
i = find(url, '?')
|
i = url.find('?')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
url, query = url[:i], url[i+1:]
|
url, query = url[:i], url[i+1:]
|
||||||
if scheme in uses_params:
|
if scheme in uses_params:
|
||||||
i = find(url, ';')
|
i = url.find(';')
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
url, params = url[:i], url[i+1:]
|
url, params = url[:i], url[i+1:]
|
||||||
tuple = scheme, netloc, url, params, query, fragment
|
tuple = scheme, netloc, url, params, query, fragment
|
||||||
|
@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
|
||||||
if not path:
|
if not path:
|
||||||
return urlunparse((scheme, netloc, bpath,
|
return urlunparse((scheme, netloc, bpath,
|
||||||
params, query or bquery, fragment))
|
params, query or bquery, fragment))
|
||||||
segments = split(bpath, '/')[:-1] + split(path, '/')
|
segments = bpath.split('/')[:-1] + path.split('/')
|
||||||
# XXX The stuff below is bogus in various ways...
|
# XXX The stuff below is bogus in various ways...
|
||||||
if segments[-1] == '.':
|
if segments[-1] == '.':
|
||||||
segments[-1] = ''
|
segments[-1] = ''
|
||||||
|
@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
|
||||||
segments[-1] = ''
|
segments[-1] = ''
|
||||||
elif len(segments) >= 2 and segments[-1] == '..':
|
elif len(segments) >= 2 and segments[-1] == '..':
|
||||||
segments[-2:] = ['']
|
segments[-2:] = ['']
|
||||||
return urlunparse((scheme, netloc, join(segments, '/'),
|
return urlunparse((scheme, netloc, '/'.join(segments),
|
||||||
params, query, fragment))
|
params, query, fragment))
|
||||||
|
|
||||||
def urldefrag(url):
|
def urldefrag(url):
|
||||||
|
@ -236,7 +234,7 @@ def test():
|
||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if not line: break
|
if not line: break
|
||||||
words = string.split(line)
|
words = line.split()
|
||||||
if not words:
|
if not words:
|
||||||
continue
|
continue
|
||||||
url = words[0]
|
url = words[0]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue