mirror of
https://github.com/python/cpython.git
synced 2025-08-25 03:04:55 +00:00
GH-126367: url2pathname()
: handle NTFS alternate data streams (#131428)
Adjust `url2pathname()` to decode embedded colon characters in Windows URIs, rather than bailing out with an `OSError`.
This commit is contained in:
parent
01b5abbc53
commit
d783d7b51d
4 changed files with 20 additions and 17 deletions
|
@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following functions:
|
||||||
'C:\\Program Files'
|
'C:\\Program Files'
|
||||||
|
|
||||||
.. versionchanged:: 3.14
|
.. versionchanged:: 3.14
|
||||||
Windows drive letters are no longer converted to uppercase.
|
Windows drive letters are no longer converted to uppercase, and ``:``
|
||||||
|
characters not following a drive letter no longer cause an
|
||||||
|
:exc:`OSError` exception to be raised on Windows.
|
||||||
|
|
||||||
|
|
||||||
.. function:: getproxies()
|
.. function:: getproxies()
|
||||||
|
|
|
@ -14,7 +14,7 @@ def url2pathname(url):
|
||||||
# ///C:/foo/bar/spam.foo
|
# ///C:/foo/bar/spam.foo
|
||||||
# become
|
# become
|
||||||
# C:\foo\bar\spam.foo
|
# C:\foo\bar\spam.foo
|
||||||
import string, urllib.parse
|
import urllib.parse
|
||||||
if url[:3] == '///':
|
if url[:3] == '///':
|
||||||
# URL has an empty authority section, so the path begins on the third
|
# URL has an empty authority section, so the path begins on the third
|
||||||
# character.
|
# character.
|
||||||
|
@ -25,19 +25,14 @@ def url2pathname(url):
|
||||||
if url[:3] == '///':
|
if url[:3] == '///':
|
||||||
# Skip past extra slash before UNC drive in URL path.
|
# Skip past extra slash before UNC drive in URL path.
|
||||||
url = url[1:]
|
url = url[1:]
|
||||||
# Windows itself uses ":" even in URLs.
|
else:
|
||||||
url = url.replace(':', '|')
|
if url[:1] == '/' and url[2:3] in (':', '|'):
|
||||||
if not '|' in url:
|
# Skip past extra slash before DOS drive in URL path.
|
||||||
# No drive specifier, just convert slashes
|
url = url[1:]
|
||||||
# make sure not to convert quoted slashes :-)
|
if url[1:2] == '|':
|
||||||
return urllib.parse.unquote(url.replace('/', '\\'))
|
# Older URLs use a pipe after a drive letter
|
||||||
comp = url.split('|')
|
url = url[:1] + ':' + url[2:]
|
||||||
if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
|
return urllib.parse.unquote(url.replace('/', '\\'))
|
||||||
error = 'Bad URL: ' + url
|
|
||||||
raise OSError(error)
|
|
||||||
drive = comp[0][-1]
|
|
||||||
tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
|
|
||||||
return drive + ':' + tail
|
|
||||||
|
|
||||||
def pathname2url(p):
|
def pathname2url(p):
|
||||||
"""OS-specific conversion from a file system path to a relative URL
|
"""OS-specific conversion from a file system path to a relative URL
|
||||||
|
|
|
@ -1484,6 +1484,7 @@ class Pathname_Tests(unittest.TestCase):
|
||||||
'test specific to Windows pathnames.')
|
'test specific to Windows pathnames.')
|
||||||
def test_url2pathname_win(self):
|
def test_url2pathname_win(self):
|
||||||
fn = urllib.request.url2pathname
|
fn = urllib.request.url2pathname
|
||||||
|
self.assertEqual(fn('/'), '\\')
|
||||||
self.assertEqual(fn('/C:/'), 'C:\\')
|
self.assertEqual(fn('/C:/'), 'C:\\')
|
||||||
self.assertEqual(fn("///C|"), 'C:')
|
self.assertEqual(fn("///C|"), 'C:')
|
||||||
self.assertEqual(fn("///C:"), 'C:')
|
self.assertEqual(fn("///C:"), 'C:')
|
||||||
|
@ -1502,8 +1503,10 @@ class Pathname_Tests(unittest.TestCase):
|
||||||
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
|
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
|
||||||
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
|
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
|
||||||
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
|
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
|
||||||
# Non-ASCII drive letter
|
# Colons in URI
|
||||||
self.assertRaises(IOError, fn, "///\u00e8|/")
|
self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\')
|
||||||
|
self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs')
|
||||||
|
self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs')
|
||||||
# UNC paths
|
# UNC paths
|
||||||
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
|
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
|
||||||
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
|
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError`
|
||||||
|
when given a Windows URI containing a colon character not following a drive
|
||||||
|
letter, such as before an NTFS alternate data stream.
|
Loading…
Add table
Add a link
Reference in a new issue