Patch #712317: In URLs such as http://www.example.com?query=spam, treat '?' as

a delimiter. Previously, the 'network location' (<authority> in RFC 2396) would
become 'www.example.com?query=spam', while RFC 2396 does not allow a '?' in
<authority>. See bug #548176 for further discussion.
This commit is contained in:
Johannes Gijsbers 2005-01-09 15:29:10 +00:00
parent cdd625a770
commit 41e4faa82b
2 changed files with 62 additions and 40 deletions

View file

@ -63,6 +63,15 @@ def _splitparams(url):
i = url.find(';')
return url[:i], url[i+1:]
def _splitnetloc(url, start=0):
for c in '/?#': # the order is important!
delim = url.find(c, start)
if delim >= 0:
break
else:
delim = len(url)
return url[start:delim], url[delim:]
def urlsplit(url, scheme='', allow_fragments=1):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@ -82,13 +91,7 @@ def urlsplit(url, scheme='', allow_fragments=1):
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
i = url.find('/', 2)
if i < 0:
i = url.find('#')
if i < 0:
i = len(url)
netloc = url[2:i]
url = url[i:]
netloc, url = _splitnetloc(url, 2)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
@ -101,12 +104,8 @@ def urlsplit(url, scheme='', allow_fragments=1):
break
else:
scheme, url = url[:i].lower(), url[i+1:]
if scheme in uses_netloc:
if url[:2] == '//':
i = url.find('/', 2)
if i < 0:
i = len(url)
netloc, url = url[2:i], url[i:]
if scheme in uses_netloc and url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if allow_fragments and scheme in uses_fragment and '#' in url:
url, fragment = url.split('#', 1)
if scheme in uses_query and '?' in url: