From 20043d5cf4815028c4a1a75a109bf97e2feeb207 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 8 Nov 2024 18:31:44 +0100 Subject: [PATCH] [3.13] GH-126212: Fix removal of slashes in file URIs on Windows (GH-126214) (#126590) GH-126212: Fix removal of slashes in file URIs on Windows (GH-126214) Adjust `urllib.request.pathname2url()` and `url2pathname()` so that they don't remove slashes from Windows DOS drive paths and URLs. There was no basis for this behaviour, and it conflicts with how UNC and POSIX paths are handled. (cherry picked from commit 54c63a32d06cb5f07a66245c375eac7d7efb964a) Co-authored-by: Barney Gale --- Lib/nturl2path.py | 25 +++++-------------- Lib/test/test_urllib.py | 11 ++++++-- ...-10-30-23-59-36.gh-issue-126212._9uYjT.rst | 3 +++ 3 files changed, 18 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 6453f202c26..2f9fec7893a 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -24,23 +24,15 @@ def url2pathname(url): # convert this to \\host\path\on\remote\host # (notice halving of slashes at the start of the path) url = url[2:] - components = url.split('/') # make sure not to convert quoted slashes :-) - return urllib.parse.unquote('\\'.join(components)) + return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: error = 'Bad URL: ' + url raise OSError(error) drive = comp[0][-1].upper() - components = comp[1].split('/') - path = drive + ':' - for comp in components: - if comp: - path = path + '\\' + urllib.parse.unquote(comp) - # Issue #11474 - handing url such as |c/| - if path.endswith(':') and url.endswith('/'): - path += '\\' - return path + tail = urllib.parse.unquote(comp[1].replace('/', '\\')) + return drive + ':' + tail def pathname2url(p): """OS-specific conversion from a file system path to a relative URL @@ -60,17 +52,12 @@ def pathname2url(p): raise OSError('Bad path: ' + p) if not ':' in p: # No drive specifier, just convert slashes and quote the name - components = p.split('\\') - return urllib.parse.quote('/'.join(components)) + return urllib.parse.quote(p.replace('\\', '/')) comp = p.split(':', maxsplit=2) if len(comp) != 2 or len(comp[0]) > 1: error = 'Bad path: ' + p raise OSError(error) drive = urllib.parse.quote(comp[0].upper()) - components = comp[1].split('\\') - path = '///' + drive + ':' - for comp in components: - if comp: - path = path + '/' + urllib.parse.quote(comp) - return path + tail = urllib.parse.quote(comp[1].replace('\\', '/')) + return '///' + drive + ':' + tail diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3ee17f96b81..28369b21db0 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1526,8 +1526,10 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir') self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '//server/share/dir') self.assertEqual(fn("C:"), '///C:') - self.assertEqual(fn("C:\\"), '///C:') + self.assertEqual(fn("C:\\"), '///C:/') self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c') + self.assertEqual(fn('C:\\a\\b.c\\'), '///C:/a/b.c/') + self.assertEqual(fn('C:\\a\\\\b.c'), '///C:/a//b.c') self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c') self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9') self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo") @@ -1563,13 +1565,15 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') self.assertEqual(fn('///C:/'), 'C:\\') - self.assertEqual(fn('/C|//'), 'C:\\') + self.assertEqual(fn('/C|//'), 'C:\\\\') self.assertEqual(fn('///C|/path'), 'C:\\path') # No DOS drive self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\') self.assertEqual(fn("////C/test/"), '\\\\C\\test\\') # DOS drive paths self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('C:/path/to/file/'), 'C:\\path\\to\\file\\') + self.assertEqual(fn('C:/path/to//file'), 'C:\\path\\to\\\\file') self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') @@ -1583,6 +1587,9 @@ class Pathname_Tests(unittest.TestCase): # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') + # Percent-encoded forward slashes are preserved for backwards compatibility + self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar') + self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') # Round-tripping paths = ['C:', r'\\\C\test\\', diff --git a/Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst b/Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst new file mode 100644 index 00000000000..047fe0f6804 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst @@ -0,0 +1,3 @@ +Fix issue where :func:`urllib.request.pathname2url` and +:func:`~urllib.request.url2pathname` removed slashes from Windows DOS drive +paths and URLs.