[3.12] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563) (GH-119024)

(cherry picked from commit e237b25a4f) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
2025-08-17 15:21:26 +00:00 · 2024-05-14 11:47:11 +02:00 · 2024-05-14 11:47:11 +02:00 · 387ff96e95
commit 387ff96e95
parent 5bf7f5c6a8
3 changed files with 70 additions and 4 deletions
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@ -103,7 +103,9 @@ parse_qs_test_cases = [
 class UrlParseTestCase(unittest.TestCase):
-    def checkRoundtrips(self, url, parsed, split):
+    def checkRoundtrips(self, url, parsed, split, url2=None):
        if url2 is None:
            url2 = url
        result = urllib.parse.urlparse(url)
        self.assertSequenceEqual(result, parsed)
        t = (result.scheme, result.netloc, result.path,
@ -111,7 +113,7 @@ class UrlParseTestCase(unittest.TestCase):
        self.assertSequenceEqual(t, parsed)
        # put it back together and it should be the same
        result2 = urllib.parse.urlunparse(result)
-        self.assertSequenceEqual(result2, url)
+        self.assertSequenceEqual(result2, url2)
        self.assertSequenceEqual(result2, result.geturl())
        # the result of geturl() is a fixpoint; we can always parse it
@ -137,7 +139,7 @@ class UrlParseTestCase(unittest.TestCase):
             result.query, result.fragment)
        self.assertSequenceEqual(t, split)
        result2 = urllib.parse.urlunsplit(result)
-        self.assertSequenceEqual(result2, url)
+        self.assertSequenceEqual(result2, url2)
        self.assertSequenceEqual(result2, result.geturl())
        # check the fixpoint property of re-parsing the result of geturl()
@ -175,9 +177,39 @@ class UrlParseTestCase(unittest.TestCase):
    def test_roundtrips(self):
        str_cases = [
            ('path/to/file',
             ('', '', 'path/to/file', '', '', ''),
             ('', '', 'path/to/file', '', '')),
            ('/path/to/file',
             ('', '', '/path/to/file', '', '', ''),
             ('', '', '/path/to/file', '', '')),
            ('//path/to/file',
             ('', 'path', '/to/file', '', '', ''),
             ('', 'path', '/to/file', '', '')),
            ('////path/to/file',
             ('', '', '//path/to/file', '', '', ''),
             ('', '', '//path/to/file', '', '')),
            ('scheme:path/to/file',
             ('scheme', '', 'path/to/file', '', '', ''),
             ('scheme', '', 'path/to/file', '', '')),
            ('scheme:/path/to/file',
             ('scheme', '', '/path/to/file', '', '', ''),
             ('scheme', '', '/path/to/file', '', '')),
            ('scheme://path/to/file',
             ('scheme', 'path', '/to/file', '', '', ''),
             ('scheme', 'path', '/to/file', '', '')),
            ('scheme:////path/to/file',
             ('scheme', '', '//path/to/file', '', '', ''),
             ('scheme', '', '//path/to/file', '', '')),
            ('file:///tmp/junk.txt',
             ('file', '', '/tmp/junk.txt', '', '', ''),
             ('file', '', '/tmp/junk.txt', '', '')),
            ('file:////tmp/junk.txt',
             ('file', '', '//tmp/junk.txt', '', '', ''),
             ('file', '', '//tmp/junk.txt', '', '')),
            ('file://///tmp/junk.txt',
             ('file', '', '///tmp/junk.txt', '', '', ''),
             ('file', '', '///tmp/junk.txt', '', '')),
            ('imap://mail.python.org/mbox1',
             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
             ('imap', 'mail.python.org', '/mbox1', '', '')),
@ -213,6 +245,38 @@ class UrlParseTestCase(unittest.TestCase):
        for url, parsed, split in str_cases + bytes_cases:
            self.checkRoundtrips(url, parsed, split)
    def test_roundtrips_normalization(self):
        str_cases = [
            ('///path/to/file',
             '/path/to/file',
             ('', '', '/path/to/file', '', '', ''),
             ('', '', '/path/to/file', '', '')),
            ('scheme:///path/to/file',
             'scheme:/path/to/file',
             ('scheme', '', '/path/to/file', '', '', ''),
             ('scheme', '', '/path/to/file', '', '')),
            ('file:/tmp/junk.txt',
             'file:///tmp/junk.txt',
             ('file', '', '/tmp/junk.txt', '', '', ''),
             ('file', '', '/tmp/junk.txt', '', '')),
            ('http:/tmp/junk.txt',
             'http:///tmp/junk.txt',
             ('http', '', '/tmp/junk.txt', '', '', ''),
             ('http', '', '/tmp/junk.txt', '', '')),
            ('https:/tmp/junk.txt',
             'https:///tmp/junk.txt',
             ('https', '', '/tmp/junk.txt', '', '', ''),
             ('https', '', '/tmp/junk.txt', '', '')),
        ]
        def _encode(t):
            return (t[0].encode('ascii'),
                    t[1].encode('ascii'),
                    tuple(x.encode('ascii') for x in t[2]),
                    tuple(x.encode('ascii') for x in t[3]))
        bytes_cases = [_encode(x) for x in str_cases]
        for url, url2, parsed, split in str_cases + bytes_cases:
            self.checkRoundtrips(url, parsed, split, url2)
    def test_http_roundtrips(self):
        # urllib.parse.urlsplit treats 'http:' as an optimized special case,
        # so we test both 'http:' and 'https:' in all the following.
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@ -525,7 +525,7 @@ def urlunsplit(components):
    empty query; the RFC states that these are equivalent)."""
    scheme, netloc, url, query, fragment, _coerce_result = (
                                          _coerce_args(*components))
-    if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+    if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
        if url and url[:1] != '/': url = '/' + url
        url = '//' + (netloc or '') + url
    if scheme:
--- a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst
+++ b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst
@ -0,0 +1,2 @@
 Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
 Based on patch by Ashwin Ramaswami.
		`@ -0,0 +1,2 @@`
							Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
							`Based on patch by Ashwin Ramaswami.`