mirror of
https://github.com/python/cpython.git
synced 2025-11-26 13:22:51 +00:00
Patch #712317: In URLs such as http://www.example.com?query=spam, treat '?' as
a delimiter. Previously, the 'network location' (<authority> in RFC 2396) would become 'www.example.com?query=spam', while RFC 2396 does not allow a '?' in <authority>. See bug #548176 for further discussion.
This commit is contained in:
parent
cdd625a770
commit
41e4faa82b
2 changed files with 62 additions and 40 deletions
|
|
@ -8,20 +8,22 @@ RFC1808_BASE = "http://a/b/c/d;p?q#f"
|
||||||
RFC2396_BASE = "http://a/b/c/d;p?q"
|
RFC2396_BASE = "http://a/b/c/d;p?q"
|
||||||
|
|
||||||
class UrlParseTestCase(unittest.TestCase):
|
class UrlParseTestCase(unittest.TestCase):
|
||||||
def test_frags(self):
|
|
||||||
for url, parsed, split in [
|
def checkRoundtrips(self, url, parsed, split):
|
||||||
('http://www.python.org',
|
result = urlparse.urlparse(url)
|
||||||
('http', 'www.python.org', '', '', '', ''),
|
self.assertEqual(result, parsed)
|
||||||
('http', 'www.python.org', '', '', '')),
|
# put it back together and it should be the same
|
||||||
('http://www.python.org#abc',
|
result2 = urlparse.urlunparse(result)
|
||||||
('http', 'www.python.org', '', '', '', 'abc'),
|
self.assertEqual(result2, url)
|
||||||
('http', 'www.python.org', '', '', 'abc')),
|
|
||||||
('http://www.python.org/#abc',
|
# check the roundtrip using urlsplit() as well
|
||||||
('http', 'www.python.org', '/', '', '', 'abc'),
|
result = urlparse.urlsplit(url)
|
||||||
('http', 'www.python.org', '/', '', 'abc')),
|
self.assertEqual(result, split)
|
||||||
(RFC1808_BASE,
|
result2 = urlparse.urlunsplit(result)
|
||||||
('http', 'a', '/b/c/d', 'p', 'q', 'f'),
|
self.assertEqual(result2, url)
|
||||||
('http', 'a', '/b/c/d;p', 'q', 'f')),
|
|
||||||
|
def test_roundtrips(self):
|
||||||
|
testcases = [
|
||||||
('file:///tmp/junk.txt',
|
('file:///tmp/junk.txt',
|
||||||
('file', '', '/tmp/junk.txt', '', '', ''),
|
('file', '', '/tmp/junk.txt', '', '', ''),
|
||||||
('file', '', '/tmp/junk.txt', '', '')),
|
('file', '', '/tmp/junk.txt', '', '')),
|
||||||
|
|
@ -29,20 +31,41 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
('imap', 'mail.python.org', '/mbox1', '', '', ''),
|
('imap', 'mail.python.org', '/mbox1', '', '', ''),
|
||||||
('imap', 'mail.python.org', '/mbox1', '', '')),
|
('imap', 'mail.python.org', '/mbox1', '', '')),
|
||||||
('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
|
('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
|
||||||
('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', '', '', ''),
|
('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
|
||||||
('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', '', '')),
|
'', '', ''),
|
||||||
]:
|
('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
|
||||||
result = urlparse.urlparse(url)
|
'', '')),
|
||||||
self.assertEqual(result, parsed)
|
]
|
||||||
# put it back together and it should be the same
|
for url, parsed, split in testcases:
|
||||||
result2 = urlparse.urlunparse(result)
|
self.checkRoundtrips(url, parsed, split)
|
||||||
self.assertEqual(result2, url)
|
|
||||||
|
|
||||||
# check the roundtrip using urlsplit() as well
|
def test_http_roundtrips(self):
|
||||||
result = urlparse.urlsplit(url)
|
# urlparse.urlsplit treats 'http:' as an optimized special case,
|
||||||
self.assertEqual(result, split)
|
# so we test both 'http:' and 'https:' in all the following.
|
||||||
result2 = urlparse.urlunsplit(result)
|
# Three cheers for white box knowledge!
|
||||||
self.assertEqual(result2, url)
|
testcases = [
|
||||||
|
('://www.python.org',
|
||||||
|
('www.python.org', '', '', '', ''),
|
||||||
|
('www.python.org', '', '', '')),
|
||||||
|
('://www.python.org#abc',
|
||||||
|
('www.python.org', '', '', '', 'abc'),
|
||||||
|
('www.python.org', '', '', 'abc')),
|
||||||
|
('://www.python.org?q=abc',
|
||||||
|
('www.python.org', '', '', 'q=abc', ''),
|
||||||
|
('www.python.org', '', 'q=abc', '')),
|
||||||
|
('://www.python.org/#abc',
|
||||||
|
('www.python.org', '/', '', '', 'abc'),
|
||||||
|
('www.python.org', '/', '', 'abc')),
|
||||||
|
('://a/b/c/d;p?q#f',
|
||||||
|
('a', '/b/c/d', 'p', 'q', 'f'),
|
||||||
|
('a', '/b/c/d;p', 'q', 'f')),
|
||||||
|
]
|
||||||
|
for scheme in ('http', 'https'):
|
||||||
|
for url, parsed, split in testcases:
|
||||||
|
url = scheme + url
|
||||||
|
parsed = (scheme,) + parsed
|
||||||
|
split = (scheme,) + split
|
||||||
|
self.checkRoundtrips(url, parsed, split)
|
||||||
|
|
||||||
def checkJoin(self, base, relurl, expected):
|
def checkJoin(self, base, relurl, expected):
|
||||||
self.assertEqual(urlparse.urljoin(base, relurl), expected,
|
self.assertEqual(urlparse.urljoin(base, relurl), expected,
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,15 @@ def _splitparams(url):
|
||||||
i = url.find(';')
|
i = url.find(';')
|
||||||
return url[:i], url[i+1:]
|
return url[:i], url[i+1:]
|
||||||
|
|
||||||
|
def _splitnetloc(url, start=0):
|
||||||
|
for c in '/?#': # the order is important!
|
||||||
|
delim = url.find(c, start)
|
||||||
|
if delim >= 0:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
delim = len(url)
|
||||||
|
return url[start:delim], url[delim:]
|
||||||
|
|
||||||
def urlsplit(url, scheme='', allow_fragments=1):
|
def urlsplit(url, scheme='', allow_fragments=1):
|
||||||
"""Parse a URL into 5 components:
|
"""Parse a URL into 5 components:
|
||||||
<scheme>://<netloc>/<path>?<query>#<fragment>
|
<scheme>://<netloc>/<path>?<query>#<fragment>
|
||||||
|
|
@ -82,13 +91,7 @@ def urlsplit(url, scheme='', allow_fragments=1):
|
||||||
scheme = url[:i].lower()
|
scheme = url[:i].lower()
|
||||||
url = url[i+1:]
|
url = url[i+1:]
|
||||||
if url[:2] == '//':
|
if url[:2] == '//':
|
||||||
i = url.find('/', 2)
|
netloc, url = _splitnetloc(url, 2)
|
||||||
if i < 0:
|
|
||||||
i = url.find('#')
|
|
||||||
if i < 0:
|
|
||||||
i = len(url)
|
|
||||||
netloc = url[2:i]
|
|
||||||
url = url[i:]
|
|
||||||
if allow_fragments and '#' in url:
|
if allow_fragments and '#' in url:
|
||||||
url, fragment = url.split('#', 1)
|
url, fragment = url.split('#', 1)
|
||||||
if '?' in url:
|
if '?' in url:
|
||||||
|
|
@ -101,12 +104,8 @@ def urlsplit(url, scheme='', allow_fragments=1):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
scheme, url = url[:i].lower(), url[i+1:]
|
scheme, url = url[:i].lower(), url[i+1:]
|
||||||
if scheme in uses_netloc:
|
if scheme in uses_netloc and url[:2] == '//':
|
||||||
if url[:2] == '//':
|
netloc, url = _splitnetloc(url, 2)
|
||||||
i = url.find('/', 2)
|
|
||||||
if i < 0:
|
|
||||||
i = len(url)
|
|
||||||
netloc, url = url[2:i], url[i:]
|
|
||||||
if allow_fragments and scheme in uses_fragment and '#' in url:
|
if allow_fragments and scheme in uses_fragment and '#' in url:
|
||||||
url, fragment = url.split('#', 1)
|
url, fragment = url.split('#', 1)
|
||||||
if scheme in uses_query and '?' in url:
|
if scheme in uses_query and '?' in url:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue