mirror of
https://github.com/python/cpython.git
synced 2025-08-17 15:21:26 +00:00
[3.12] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563) (GH-119024)
(cherry picked from commit e237b25a4f
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
5bf7f5c6a8
commit
387ff96e95
3 changed files with 70 additions and 4 deletions
|
@ -103,7 +103,9 @@ parse_qs_test_cases = [
|
||||||
|
|
||||||
class UrlParseTestCase(unittest.TestCase):
|
class UrlParseTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def checkRoundtrips(self, url, parsed, split):
|
def checkRoundtrips(self, url, parsed, split, url2=None):
|
||||||
|
if url2 is None:
|
||||||
|
url2 = url
|
||||||
result = urllib.parse.urlparse(url)
|
result = urllib.parse.urlparse(url)
|
||||||
self.assertSequenceEqual(result, parsed)
|
self.assertSequenceEqual(result, parsed)
|
||||||
t = (result.scheme, result.netloc, result.path,
|
t = (result.scheme, result.netloc, result.path,
|
||||||
|
@ -111,7 +113,7 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
self.assertSequenceEqual(t, parsed)
|
self.assertSequenceEqual(t, parsed)
|
||||||
# put it back together and it should be the same
|
# put it back together and it should be the same
|
||||||
result2 = urllib.parse.urlunparse(result)
|
result2 = urllib.parse.urlunparse(result)
|
||||||
self.assertSequenceEqual(result2, url)
|
self.assertSequenceEqual(result2, url2)
|
||||||
self.assertSequenceEqual(result2, result.geturl())
|
self.assertSequenceEqual(result2, result.geturl())
|
||||||
|
|
||||||
# the result of geturl() is a fixpoint; we can always parse it
|
# the result of geturl() is a fixpoint; we can always parse it
|
||||||
|
@ -137,7 +139,7 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
result.query, result.fragment)
|
result.query, result.fragment)
|
||||||
self.assertSequenceEqual(t, split)
|
self.assertSequenceEqual(t, split)
|
||||||
result2 = urllib.parse.urlunsplit(result)
|
result2 = urllib.parse.urlunsplit(result)
|
||||||
self.assertSequenceEqual(result2, url)
|
self.assertSequenceEqual(result2, url2)
|
||||||
self.assertSequenceEqual(result2, result.geturl())
|
self.assertSequenceEqual(result2, result.geturl())
|
||||||
|
|
||||||
# check the fixpoint property of re-parsing the result of geturl()
|
# check the fixpoint property of re-parsing the result of geturl()
|
||||||
|
@ -175,9 +177,39 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def test_roundtrips(self):
|
def test_roundtrips(self):
|
||||||
str_cases = [
|
str_cases = [
|
||||||
|
('path/to/file',
|
||||||
|
('', '', 'path/to/file', '', '', ''),
|
||||||
|
('', '', 'path/to/file', '', '')),
|
||||||
|
('/path/to/file',
|
||||||
|
('', '', '/path/to/file', '', '', ''),
|
||||||
|
('', '', '/path/to/file', '', '')),
|
||||||
|
('//path/to/file',
|
||||||
|
('', 'path', '/to/file', '', '', ''),
|
||||||
|
('', 'path', '/to/file', '', '')),
|
||||||
|
('////path/to/file',
|
||||||
|
('', '', '//path/to/file', '', '', ''),
|
||||||
|
('', '', '//path/to/file', '', '')),
|
||||||
|
('scheme:path/to/file',
|
||||||
|
('scheme', '', 'path/to/file', '', '', ''),
|
||||||
|
('scheme', '', 'path/to/file', '', '')),
|
||||||
|
('scheme:/path/to/file',
|
||||||
|
('scheme', '', '/path/to/file', '', '', ''),
|
||||||
|
('scheme', '', '/path/to/file', '', '')),
|
||||||
|
('scheme://path/to/file',
|
||||||
|
('scheme', 'path', '/to/file', '', '', ''),
|
||||||
|
('scheme', 'path', '/to/file', '', '')),
|
||||||
|
('scheme:////path/to/file',
|
||||||
|
('scheme', '', '//path/to/file', '', '', ''),
|
||||||
|
('scheme', '', '//path/to/file', '', '')),
|
||||||
('file:///tmp/junk.txt',
|
('file:///tmp/junk.txt',
|
||||||
('file', '', '/tmp/junk.txt', '', '', ''),
|
('file', '', '/tmp/junk.txt', '', '', ''),
|
||||||
('file', '', '/tmp/junk.txt', '', '')),
|
('file', '', '/tmp/junk.txt', '', '')),
|
||||||
|
('file:////tmp/junk.txt',
|
||||||
|
('file', '', '//tmp/junk.txt', '', '', ''),
|
||||||
|
('file', '', '//tmp/junk.txt', '', '')),
|
||||||
|
('file://///tmp/junk.txt',
|
||||||
|
('file', '', '///tmp/junk.txt', '', '', ''),
|
||||||
|
('file', '', '///tmp/junk.txt', '', '')),
|
||||||
('imap://mail.python.org/mbox1',
|
('imap://mail.python.org/mbox1',
|
||||||
('imap', 'mail.python.org', '/mbox1', '', '', ''),
|
('imap', 'mail.python.org', '/mbox1', '', '', ''),
|
||||||
('imap', 'mail.python.org', '/mbox1', '', '')),
|
('imap', 'mail.python.org', '/mbox1', '', '')),
|
||||||
|
@ -213,6 +245,38 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
for url, parsed, split in str_cases + bytes_cases:
|
for url, parsed, split in str_cases + bytes_cases:
|
||||||
self.checkRoundtrips(url, parsed, split)
|
self.checkRoundtrips(url, parsed, split)
|
||||||
|
|
||||||
|
def test_roundtrips_normalization(self):
|
||||||
|
str_cases = [
|
||||||
|
('///path/to/file',
|
||||||
|
'/path/to/file',
|
||||||
|
('', '', '/path/to/file', '', '', ''),
|
||||||
|
('', '', '/path/to/file', '', '')),
|
||||||
|
('scheme:///path/to/file',
|
||||||
|
'scheme:/path/to/file',
|
||||||
|
('scheme', '', '/path/to/file', '', '', ''),
|
||||||
|
('scheme', '', '/path/to/file', '', '')),
|
||||||
|
('file:/tmp/junk.txt',
|
||||||
|
'file:///tmp/junk.txt',
|
||||||
|
('file', '', '/tmp/junk.txt', '', '', ''),
|
||||||
|
('file', '', '/tmp/junk.txt', '', '')),
|
||||||
|
('http:/tmp/junk.txt',
|
||||||
|
'http:///tmp/junk.txt',
|
||||||
|
('http', '', '/tmp/junk.txt', '', '', ''),
|
||||||
|
('http', '', '/tmp/junk.txt', '', '')),
|
||||||
|
('https:/tmp/junk.txt',
|
||||||
|
'https:///tmp/junk.txt',
|
||||||
|
('https', '', '/tmp/junk.txt', '', '', ''),
|
||||||
|
('https', '', '/tmp/junk.txt', '', '')),
|
||||||
|
]
|
||||||
|
def _encode(t):
|
||||||
|
return (t[0].encode('ascii'),
|
||||||
|
t[1].encode('ascii'),
|
||||||
|
tuple(x.encode('ascii') for x in t[2]),
|
||||||
|
tuple(x.encode('ascii') for x in t[3]))
|
||||||
|
bytes_cases = [_encode(x) for x in str_cases]
|
||||||
|
for url, url2, parsed, split in str_cases + bytes_cases:
|
||||||
|
self.checkRoundtrips(url, parsed, split, url2)
|
||||||
|
|
||||||
def test_http_roundtrips(self):
|
def test_http_roundtrips(self):
|
||||||
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
|
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
|
||||||
# so we test both 'http:' and 'https:' in all the following.
|
# so we test both 'http:' and 'https:' in all the following.
|
||||||
|
|
|
@ -525,7 +525,7 @@ def urlunsplit(components):
|
||||||
empty query; the RFC states that these are equivalent)."""
|
empty query; the RFC states that these are equivalent)."""
|
||||||
scheme, netloc, url, query, fragment, _coerce_result = (
|
scheme, netloc, url, query, fragment, _coerce_result = (
|
||||||
_coerce_args(*components))
|
_coerce_args(*components))
|
||||||
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
|
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
|
||||||
if url and url[:1] != '/': url = '/' + url
|
if url and url[:1] != '/': url = '/' + url
|
||||||
url = '//' + (netloc or '') + url
|
url = '//' + (netloc or '') + url
|
||||||
if scheme:
|
if scheme:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
|
||||||
|
Based on patch by Ashwin Ramaswami.
|
Loading…
Add table
Add a link
Reference in a new issue