Merge 1eaa949466 into 61dd9fdad7

2025-08-04 17:08:35 +00:00 · 2025-07-10 06:54:30 +03:00 · 2025-07-10 06:54:30 +03:00 · 36af2187da
commit 36af2187da
parent 61dd9fdad7 1eaa949466
4 changed files with 113 additions and 23 deletions
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@ -352,6 +352,11 @@ class UrlParseTestCase(unittest.TestCase):
                self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
                relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
                self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
+            else:
+                relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
+                self.assertNotEqual(urllib.parse.urljoin(base, relurl), expected)
+                relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
+                self.assertNotEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)

    @support.subTests('bytes', (False, True))
    @support.subTests('u', ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',])
@ -559,6 +564,9 @@ class UrlParseTestCase(unittest.TestCase):
        # slashes
        self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
        self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
+        self.checkJoin('http://a/b/c/d/e//', '../../f/g/', 'http://a/b/c/d/f/g/')
+        self.checkJoin('http://a/b/c/d/e///', '../../f/g/', 'http://a/b/c/d/e/f/g/')
+        self.checkJoin('http://a/b/c/d/e////', '../../f/g/', 'http://a/b/c/d/e//f/g/')
        self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
        self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
        self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
@ -636,6 +644,16 @@ class UrlParseTestCase(unittest.TestCase):
        self.checkJoin('//', '/w', '///w')
        self.checkJoin('//', '///w', '///w')
        self.checkJoin('//', 'w', '///w')
+        self.checkJoin('//', '../w', '///w')
+        self.checkJoin('//', './w', '///w')
+        self.checkJoin('//', '..//w', '///w')
+        self.checkJoin('//', './/w', '///w')
+        self.checkJoin('//', '..', '//')
+        self.checkJoin('//', '.', '//')
+        self.checkJoin('//', '../', '//')
+        self.checkJoin('//', './', '//')
+        self.checkJoin('//', '..//', '///')
+        self.checkJoin('//', './/', '///')

        self.checkJoin('//a', '', '//a')
        self.checkJoin('//a', '//', '//a')
@ -644,6 +662,16 @@ class UrlParseTestCase(unittest.TestCase):
        self.checkJoin('//a', '/w', '//a/w')
        self.checkJoin('//a', '///w', '//a/w')
        self.checkJoin('//a', 'w', '//a/w')
+        self.checkJoin('//a', '../w', '//a/w')
+        self.checkJoin('//a', './w', '//a/w')
+        self.checkJoin('//a', '..//w', '//a/w')
+        self.checkJoin('//a', './/w', '//a/w')
+        self.checkJoin('//a', '..', '//a')
+        self.checkJoin('//a', '.', '//a')
+        self.checkJoin('//a', '../', '//a')
+        self.checkJoin('//a', './', '//a')
+        self.checkJoin('//a', '..//', '//a/')
+        self.checkJoin('//a', './/', '//a/')

        for scheme in '', 'http:':
            self.checkJoin('http:', scheme + '', 'http:')
@ -652,7 +680,21 @@ class UrlParseTestCase(unittest.TestCase):
            self.checkJoin('http:', scheme + '//v/w', 'http://v/w')
            self.checkJoin('http:', scheme + '/w', 'http:/w')
            self.checkJoin('http:', scheme + '///w', 'http:/w')
-            self.checkJoin('http:', scheme + 'w', 'http:/w')
+            self.checkJoin('http:', scheme + 'w', 'http:w')
+            self.checkJoin('http:', scheme + '../w', 'http:w')
+            self.checkJoin('http:', scheme + './w', 'http:w')
+            self.checkJoin('http:', scheme + '..//w', 'http:/w')
+            self.checkJoin('http:', scheme + './/w', 'http:/w')
+            self.checkJoin('http:', scheme + '..///w', 'http:////w')
+            self.checkJoin('http:', scheme + './//w', 'http:////w')
+            self.checkJoin('http:', scheme + '..', 'http:')
+            self.checkJoin('http:', scheme + '.', 'http:')
+            self.checkJoin('http:', scheme + '../', 'http:')
+            self.checkJoin('http:', scheme + './', 'http:')
+            self.checkJoin('http:', scheme + '..//', 'http:/')
+            self.checkJoin('http:', scheme + './/', 'http:/')
+            self.checkJoin('http:', scheme + '..///', 'http:////')
+            self.checkJoin('http:', scheme + './//', 'http:////')

            self.checkJoin('http://', scheme + '', 'http://')
            self.checkJoin('http://', scheme + '//', 'http://')
@ -661,6 +703,20 @@ class UrlParseTestCase(unittest.TestCase):
            self.checkJoin('http://', scheme + '/w', 'http:///w')
            self.checkJoin('http://', scheme + '///w', 'http:///w')
            self.checkJoin('http://', scheme + 'w', 'http:///w')
+            self.checkJoin('http://', scheme + '../w', 'http:///w')
+            self.checkJoin('http://', scheme + './w', 'http:///w')
+            self.checkJoin('http://', scheme + '..//w', 'http:///w')
+            self.checkJoin('http://', scheme + './/w', 'http:///w')
+            self.checkJoin('http://', scheme + '..///w', 'http:////w')
+            self.checkJoin('http://', scheme + './//w', 'http:////w')
+            self.checkJoin('http://', scheme + '..', 'http://')
+            self.checkJoin('http://', scheme + '.', 'http://')
+            self.checkJoin('http://', scheme + '../', 'http://')
+            self.checkJoin('http://', scheme + './', 'http://')
+            self.checkJoin('http://', scheme + '..//', 'http:///')
+            self.checkJoin('http://', scheme + './/', 'http:///')
+            self.checkJoin('http://', scheme + '..///', 'http:////')
+            self.checkJoin('http://', scheme + './//', 'http:////')

            self.checkJoin('http://a', scheme + '', 'http://a')
            self.checkJoin('http://a', scheme + '//', 'http://a')
@ -669,6 +725,38 @@ class UrlParseTestCase(unittest.TestCase):
            self.checkJoin('http://a', scheme + '/w', 'http://a/w')
            self.checkJoin('http://a', scheme + '///w', 'http://a/w')
            self.checkJoin('http://a', scheme + 'w', 'http://a/w')
+            self.checkJoin('http://a', scheme + '../w', 'http://a/w')
+            self.checkJoin('http://a', scheme + './w', 'http://a/w')
+            self.checkJoin('http://a', scheme + '..//w', 'http://a/w')
+            self.checkJoin('http://a', scheme + './/w', 'http://a/w')
+            self.checkJoin('http://a', scheme + '..///w', 'http://a//w')
+            self.checkJoin('http://a', scheme + './//w', 'http://a//w')
+            self.checkJoin('http://a', scheme + '..', 'http://a')
+            self.checkJoin('http://a', scheme + '.', 'http://a')
+            self.checkJoin('http://a', scheme + '../', 'http://a')
+            self.checkJoin('http://a', scheme + './', 'http://a')
+            self.checkJoin('http://a', scheme + '..//', 'http://a/')
+            self.checkJoin('http://a', scheme + './/', 'http://a/')
+            self.checkJoin('http://a', scheme + '..///', 'http://a//')
+            self.checkJoin('http://a', scheme + './//', 'http://a//')
+
+        self.checkJoin('b/c', '', 'b/c')
+        self.checkJoin('b/c', '//', 'b/c')
+        self.checkJoin('b/c', '//v', '//v')
+        self.checkJoin('b/c', '//v/w', '//v/w')
+        self.checkJoin('b/c', '/w', '/w')
+        self.checkJoin('b/c', '///w', '/w')
+        self.checkJoin('b/c', 'w', 'b/w')
+        self.checkJoin('b/c', '../w', 'w')
+        self.checkJoin('b/c', '../../w', 'w')
+        self.checkJoin('b/c', '../../../w', 'w')
+        self.checkJoin('b/c', 'w/.', 'b/w/')
+        self.checkJoin('b/c', '../w/.', 'w/')
+        self.checkJoin('b/c', '../../w/.', 'w/')
+        self.checkJoin('b/c', '../../../w/.', 'w/')
+        self.checkJoin('b/c', '..', '')
+        self.checkJoin('b/c', '../..', '')
+        self.checkJoin('b/c', '../../..', '')

        self.checkJoin('/b/c', '', '/b/c')
        self.checkJoin('/b/c', '//', '/b/c')
@ -677,6 +765,16 @@ class UrlParseTestCase(unittest.TestCase):
        self.checkJoin('/b/c', '/w', '/w')
        self.checkJoin('/b/c', '///w', '/w')
        self.checkJoin('/b/c', 'w', '/b/w')
+        self.checkJoin('/b/c', '../w', '/w')
+        self.checkJoin('/b/c', '../../w', '/w')
+        self.checkJoin('/b/c', '../../../w', '/w')
+        self.checkJoin('/b/c', 'w/.', '/b/w/')
+        self.checkJoin('/b/c', '../w/.', '/w/')
+        self.checkJoin('/b/c', '../../w/.', '/w/')
+        self.checkJoin('/b/c', '../../../w/.', '/w/')
+        self.checkJoin('/b/c', '..', '/')
+        self.checkJoin('/b/c', '../..', '/')
+        self.checkJoin('/b/c', '../../..', '/')

        self.checkJoin('///b/c', '', '///b/c')
        self.checkJoin('///b/c', '//', '///b/c')
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@ -610,31 +610,22 @@ def urljoin(base, url, allow_fragments=True):
        return _coerce_result(_urlunsplit(scheme, netloc, path,
                                          query, fragment))

-    base_parts = bpath.split('/')
-    if base_parts[-1] != '':
-        # the last item is not a directory, so will not be taken into account
-        # in resolving the relative path
-        del base_parts[-1]
-
    # for rfc3986, ignore all base path should the first character be root.
-    if path[:1] == '/':
-        segments = path.split('/')
-    else:
-        segments = base_parts + path.split('/')
-        # filter out elements that would cause redundant slashes on re-joining
-        # the resolved_path
-        segments[1:-1] = filter(None, segments[1:-1])
+    if path[:1] != '/' and '/' in bpath:
+        path = bpath.rsplit('/', 1)[0] + '/' + path
+
+    path = _remove_dot_segments(path)
+    return _coerce_result(_urlunsplit(scheme, netloc, path, query, fragment))
+
+def _remove_dot_segments(path):
+    segments = path.split('/')
+    min_len = 0 if segments[0] else 1

    resolved_path = []
-
    for seg in segments:
        if seg == '..':
-            try:
+            if len(resolved_path) > min_len:
                resolved_path.pop()
-            except IndexError:
-                # ignore any .. segments that would otherwise cause an IndexError
-                # when popped from resolved_path if resolving for rfc3986
-                pass
        elif seg == '.':
            continue
        else:
@ -645,9 +636,7 @@ def urljoin(base, url, allow_fragments=True):
        # then we need to append the trailing '/'
        resolved_path.append('')

-    return _coerce_result(_urlunsplit(scheme, netloc, '/'.join(
-        resolved_path) or '/', query, fragment))
-
+    return '/'.join(resolved_path)

 def urldefrag(url):
    """Removes any existing fragment from URL.
--- a/Misc/NEWS.d/next/Library/2024-11-11-13-05-38.gh-issue-69589.7Un1Ua.rst
+++ b/Misc/NEWS.d/next/Library/2024-11-11-13-05-38.gh-issue-69589.7Un1Ua.rst
@ -0,0 +1,2 @@
+Fix `urllib.parse.urljoin` for the case when the base path is relative
+and the relative reference path starts with '..'.
--- a/Misc/NEWS.d/next/Library/2024-11-11-13-33-56.gh-issue-84774.KxbM6e.rst
+++ b/Misc/NEWS.d/next/Library/2024-11-11-13-33-56.gh-issue-84774.KxbM6e.rst
@ -0,0 +1 @@
+Preserve double slashes in the path in :func:`urllib.parse.urljoin`.
				`@ -0,0 +1 @@`
				Preserve double slashes in the path in :func:`urllib.parse.urljoin`.