mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
GH-123599: url2pathname()
: handle authority section in file URL (#126844)
In `urllib.request.url2pathname()`, if the authority resolves to the current host, discard it. If an authority is present but resolves somewhere else, then on Windows we return a UNC path (as before), and on other platforms we raise `URLError`. Affects `pathlib.Path.from_uri()` in the same way. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
a214db0c54
commit
66cdb2bd8a
8 changed files with 106 additions and 48 deletions
|
@ -871,6 +871,12 @@ conforming to :rfc:`8089`.
|
|||
|
||||
.. versionadded:: 3.13
|
||||
|
||||
.. versionchanged:: next
|
||||
If a URL authority (e.g. a hostname) is present and resolves to a local
|
||||
address, it is discarded. If an authority is present and *doesn't*
|
||||
resolve to a local address, then on Windows a UNC path is returned (as
|
||||
before), and on other platforms a :exc:`ValueError` is raised.
|
||||
|
||||
|
||||
.. method:: Path.as_uri()
|
||||
|
||||
|
|
|
@ -158,16 +158,16 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
>>> 'file:' + pathname2url(path)
|
||||
'file:///C:/Program%20Files'
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
Paths beginning with a slash are converted to URLs with authority
|
||||
sections. For example, the path ``/etc/hosts`` is converted to
|
||||
the URL ``///etc/hosts``.
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
Windows drive letters are no longer converted to uppercase, and ``:``
|
||||
characters not following a drive letter no longer cause an
|
||||
:exc:`OSError` exception to be raised on Windows.
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
Paths beginning with a slash are converted to URLs with authority
|
||||
sections. For example, the path ``/etc/hosts`` is converted to
|
||||
the URL ``///etc/hosts``.
|
||||
|
||||
|
||||
.. function:: url2pathname(url)
|
||||
|
||||
|
@ -186,6 +186,13 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
characters not following a drive letter no longer cause an
|
||||
:exc:`OSError` exception to be raised on Windows.
|
||||
|
||||
.. versionchanged:: next
|
||||
This function calls :func:`socket.gethostbyname` if the URL authority
|
||||
isn't empty or ``localhost``. If the authority resolves to a local IP
|
||||
address then it is discarded; otherwise, on Windows a UNC path is
|
||||
returned (as before), and on other platforms a
|
||||
:exc:`~urllib.error.URLError` is raised.
|
||||
|
||||
|
||||
.. function:: getproxies()
|
||||
|
||||
|
|
|
@ -1197,6 +1197,25 @@ urllib
|
|||
supporting SHA-256 digest authentication as specified in :rfc:`7616`.
|
||||
(Contributed by Calvin Bui in :gh:`128193`.)
|
||||
|
||||
* Improve standards compliance when parsing and emitting ``file:`` URLs.
|
||||
|
||||
In :func:`urllib.request.url2pathname`:
|
||||
|
||||
- Discard URL authorities that resolve to a local IP address.
|
||||
- Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve
|
||||
to ``localhost``, except on Windows where we return a UNC path.
|
||||
|
||||
In :func:`urllib.request.pathname2url`:
|
||||
|
||||
- Include an empty URL authority when a path begins with a slash. For
|
||||
example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``.
|
||||
|
||||
On Windows, drive letters are no longer converted to uppercase, and ``:``
|
||||
characters not following a drive letter no longer cause an :exc:`OSError`
|
||||
exception to be raised.
|
||||
|
||||
(Contributed by Barney Gale in :gh:`125866`.)
|
||||
|
||||
|
||||
uuid
|
||||
----
|
||||
|
|
|
@ -1278,8 +1278,12 @@ class Path(PurePath):
|
|||
"""Return a new path from the given 'file' URI."""
|
||||
if not uri.startswith('file:'):
|
||||
raise ValueError(f"URI does not start with 'file:': {uri!r}")
|
||||
from urllib.error import URLError
|
||||
from urllib.request import url2pathname
|
||||
path = cls(url2pathname(uri.removeprefix('file:')))
|
||||
try:
|
||||
path = cls(url2pathname(uri.removeprefix('file:')))
|
||||
except URLError as exc:
|
||||
raise ValueError(exc.reason) from None
|
||||
if not path.is_absolute():
|
||||
raise ValueError(f"URI is not absolute: {uri!r}")
|
||||
return path
|
||||
|
|
|
@ -3285,10 +3285,14 @@ class PathTest(PurePathTest):
|
|||
def test_from_uri_posix(self):
|
||||
P = self.cls
|
||||
self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
|
||||
self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
|
||||
self.assertRaises(ValueError, P.from_uri, 'file://foo/bar')
|
||||
self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
|
||||
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
|
||||
self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))
|
||||
if not is_wasi:
|
||||
self.assertEqual(P.from_uri('file://127.0.0.1/foo/bar'), P('/foo/bar'))
|
||||
self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'),
|
||||
P('/foo/bar'))
|
||||
self.assertRaises(ValueError, P.from_uri, 'foo/bar')
|
||||
self.assertRaises(ValueError, P.from_uri, '/foo/bar')
|
||||
self.assertRaises(ValueError, P.from_uri, '//foo/bar')
|
||||
|
|
|
@ -11,6 +11,7 @@ from test import support
|
|||
from test.support import os_helper
|
||||
from test.support import socket_helper
|
||||
import os
|
||||
import socket
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
|
@ -1424,6 +1425,17 @@ class Pathname_Tests(unittest.TestCase):
|
|||
"url2pathname() failed; %s != %s" %
|
||||
(expect, result))
|
||||
|
||||
def test_pathname2url(self):
|
||||
# Test cases common to Windows and POSIX.
|
||||
fn = urllib.request.pathname2url
|
||||
sep = os.path.sep
|
||||
self.assertEqual(fn(''), '')
|
||||
self.assertEqual(fn(sep), '///')
|
||||
self.assertEqual(fn('a'), 'a')
|
||||
self.assertEqual(fn(f'a{sep}b.c'), 'a/b.c')
|
||||
self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c')
|
||||
self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c')
|
||||
|
||||
@unittest.skipUnless(sys.platform == 'win32',
|
||||
'test specific to Windows pathnames.')
|
||||
def test_pathname2url_win(self):
|
||||
|
@ -1466,12 +1478,9 @@ class Pathname_Tests(unittest.TestCase):
|
|||
'test specific to POSIX pathnames')
|
||||
def test_pathname2url_posix(self):
|
||||
fn = urllib.request.pathname2url
|
||||
self.assertEqual(fn('/'), '///')
|
||||
self.assertEqual(fn('/a/b.c'), '///a/b.c')
|
||||
self.assertEqual(fn('//a/b.c'), '////a/b.c')
|
||||
self.assertEqual(fn('///a/b.c'), '/////a/b.c')
|
||||
self.assertEqual(fn('////a/b.c'), '//////a/b.c')
|
||||
self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')
|
||||
|
||||
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
|
||||
def test_pathname2url_nonascii(self):
|
||||
|
@ -1480,11 +1489,25 @@ class Pathname_Tests(unittest.TestCase):
|
|||
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
|
||||
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
|
||||
|
||||
def test_url2pathname(self):
|
||||
# Test cases common to Windows and POSIX.
|
||||
fn = urllib.request.url2pathname
|
||||
sep = os.path.sep
|
||||
self.assertEqual(fn(''), '')
|
||||
self.assertEqual(fn('/'), f'{sep}')
|
||||
self.assertEqual(fn('///'), f'{sep}')
|
||||
self.assertEqual(fn('////'), f'{sep}{sep}')
|
||||
self.assertEqual(fn('foo'), 'foo')
|
||||
self.assertEqual(fn('foo/bar'), f'foo{sep}bar')
|
||||
self.assertEqual(fn('/foo/bar'), f'{sep}foo{sep}bar')
|
||||
self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar')
|
||||
self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar')
|
||||
self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar')
|
||||
|
||||
@unittest.skipUnless(sys.platform == 'win32',
|
||||
'test specific to Windows pathnames.')
|
||||
def test_url2pathname_win(self):
|
||||
fn = urllib.request.url2pathname
|
||||
self.assertEqual(fn('/'), '\\')
|
||||
self.assertEqual(fn('/C:/'), 'C:\\')
|
||||
self.assertEqual(fn("///C|"), 'C:')
|
||||
self.assertEqual(fn("///C:"), 'C:')
|
||||
|
@ -1530,11 +1553,13 @@ class Pathname_Tests(unittest.TestCase):
|
|||
'test specific to POSIX pathnames')
|
||||
def test_url2pathname_posix(self):
|
||||
fn = urllib.request.url2pathname
|
||||
self.assertEqual(fn('/foo/bar'), '/foo/bar')
|
||||
self.assertEqual(fn('//foo/bar'), '//foo/bar')
|
||||
self.assertEqual(fn('///foo/bar'), '/foo/bar')
|
||||
self.assertEqual(fn('////foo/bar'), '//foo/bar')
|
||||
self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar')
|
||||
self.assertRaises(urllib.error.URLError, fn, '//foo/bar')
|
||||
self.assertRaises(urllib.error.URLError, fn, '//localhost:/foo/bar')
|
||||
self.assertRaises(urllib.error.URLError, fn, '//:80/foo/bar')
|
||||
self.assertRaises(urllib.error.URLError, fn, '//:/foo/bar')
|
||||
self.assertRaises(urllib.error.URLError, fn, '//c:80/foo/bar')
|
||||
self.assertEqual(fn('//127.0.0.1/foo/bar'), '/foo/bar')
|
||||
self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), '/foo/bar')
|
||||
|
||||
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
|
||||
def test_url2pathname_nonascii(self):
|
||||
|
|
|
@ -1450,16 +1450,6 @@ def parse_http_list(s):
|
|||
return [part.strip() for part in res]
|
||||
|
||||
class FileHandler(BaseHandler):
|
||||
# Use local file or FTP depending on form of URL
|
||||
def file_open(self, req):
|
||||
url = req.selector
|
||||
if url[:2] == '//' and url[2:3] != '/' and (req.host and
|
||||
req.host != 'localhost'):
|
||||
if not req.host in self.get_names():
|
||||
raise URLError("file:// scheme is supported only on localhost")
|
||||
else:
|
||||
return self.open_local_file(req)
|
||||
|
||||
# names for the localhost
|
||||
names = None
|
||||
def get_names(self):
|
||||
|
@ -1476,8 +1466,7 @@ class FileHandler(BaseHandler):
|
|||
def open_local_file(self, req):
|
||||
import email.utils
|
||||
import mimetypes
|
||||
host = req.host
|
||||
filename = req.selector
|
||||
filename = _splittype(req.full_url)[1]
|
||||
localfile = url2pathname(filename)
|
||||
try:
|
||||
stats = os.stat(localfile)
|
||||
|
@ -1487,21 +1476,21 @@ class FileHandler(BaseHandler):
|
|||
headers = email.message_from_string(
|
||||
'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
|
||||
(mtype or 'text/plain', size, modified))
|
||||
if host:
|
||||
host, port = _splitport(host)
|
||||
if not host or \
|
||||
(not port and _safe_gethostbyname(host) in self.get_names()):
|
||||
origurl = 'file:' + pathname2url(localfile)
|
||||
return addinfourl(open(localfile, 'rb'), headers, origurl)
|
||||
origurl = f'file:{pathname2url(localfile)}'
|
||||
return addinfourl(open(localfile, 'rb'), headers, origurl)
|
||||
except OSError as exp:
|
||||
raise URLError(exp, exp.filename)
|
||||
raise URLError('file not on local host')
|
||||
|
||||
def _safe_gethostbyname(host):
|
||||
file_open = open_local_file
|
||||
|
||||
def _is_local_authority(authority):
|
||||
if not authority or authority == 'localhost':
|
||||
return True
|
||||
try:
|
||||
return socket.gethostbyname(host)
|
||||
except socket.gaierror:
|
||||
return None
|
||||
address = socket.gethostbyname(authority)
|
||||
except (socket.gaierror, AttributeError):
|
||||
return False
|
||||
return address in FileHandler().get_names()
|
||||
|
||||
class FTPHandler(BaseHandler):
|
||||
def ftp_open(self, req):
|
||||
|
@ -1649,16 +1638,13 @@ class DataHandler(BaseHandler):
|
|||
def url2pathname(url):
|
||||
"""OS-specific conversion from a relative URL of the 'file' scheme
|
||||
to a file system path; not recommended for general use."""
|
||||
if url[:3] == '///':
|
||||
# Empty authority section, so the path begins on the third character.
|
||||
url = url[2:]
|
||||
elif url[:12] == '//localhost/':
|
||||
# Skip past 'localhost' authority.
|
||||
url = url[11:]
|
||||
|
||||
authority, url = _splithost(url)
|
||||
if os.name == 'nt':
|
||||
if url[:3] == '///':
|
||||
# Skip past extra slash before UNC drive in URL path.
|
||||
if not _is_local_authority(authority):
|
||||
# e.g. file://server/share/file.txt
|
||||
url = '//' + authority + url
|
||||
elif url[:3] == '///':
|
||||
# e.g. file://///server/share/file.txt
|
||||
url = url[1:]
|
||||
else:
|
||||
if url[:1] == '/' and url[2:3] in (':', '|'):
|
||||
|
@ -1668,6 +1654,8 @@ def url2pathname(url):
|
|||
# Older URLs use a pipe after a drive letter
|
||||
url = url[:1] + ':' + url[2:]
|
||||
url = url.replace('/', '\\')
|
||||
elif not _is_local_authority(authority):
|
||||
raise URLError("file:// scheme is supported only on localhost")
|
||||
encoding = sys.getfilesystemencoding()
|
||||
errors = sys.getfilesystemencodeerrors()
|
||||
return unquote(url, encoding=encoding, errors=errors)
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
Fix issue where :func:`urllib.request.url2pathname` mishandled file URLs with
|
||||
authorities. If an authority is present and resolves to ``localhost``, it is
|
||||
now discarded. If an authority is present but *doesn't* resolve to
|
||||
``localhost``, then on Windows a UNC path is returned (as before), and on
|
||||
other platforms a :exc:`urllib.error.URLError` is now raised.
|
Loading…
Add table
Add a link
Reference in a new issue