GH-123599: url2pathname(): handle authority section in file URL (#126844)

In `urllib.request.url2pathname()`, if the authority resolves to the
current host, discard it. If an authority is present but resolves somewhere
else, then on Windows we return a UNC path (as before), and on other
platforms we raise `URLError`.

Affects `pathlib.Path.from_uri()` in the same way.

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
Barney Gale 2025-04-10 20:58:04 +01:00 committed by GitHub
parent a214db0c54
commit 66cdb2bd8a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 106 additions and 48 deletions

View file

@ -1450,16 +1450,6 @@ def parse_http_list(s):
return [part.strip() for part in res]
class FileHandler(BaseHandler):
# Use local file or FTP depending on form of URL
def file_open(self, req):
url = req.selector
if url[:2] == '//' and url[2:3] != '/' and (req.host and
req.host != 'localhost'):
if not req.host in self.get_names():
raise URLError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(req)
# names for the localhost
names = None
def get_names(self):
@ -1476,8 +1466,7 @@ class FileHandler(BaseHandler):
def open_local_file(self, req):
import email.utils
import mimetypes
host = req.host
filename = req.selector
filename = _splittype(req.full_url)[1]
localfile = url2pathname(filename)
try:
stats = os.stat(localfile)
@ -1487,21 +1476,21 @@ class FileHandler(BaseHandler):
headers = email.message_from_string(
'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified))
if host:
host, port = _splitport(host)
if not host or \
(not port and _safe_gethostbyname(host) in self.get_names()):
origurl = 'file:' + pathname2url(localfile)
return addinfourl(open(localfile, 'rb'), headers, origurl)
origurl = f'file:{pathname2url(localfile)}'
return addinfourl(open(localfile, 'rb'), headers, origurl)
except OSError as exp:
raise URLError(exp, exp.filename)
raise URLError('file not on local host')
def _safe_gethostbyname(host):
file_open = open_local_file
def _is_local_authority(authority):
if not authority or authority == 'localhost':
return True
try:
return socket.gethostbyname(host)
except socket.gaierror:
return None
address = socket.gethostbyname(authority)
except (socket.gaierror, AttributeError):
return False
return address in FileHandler().get_names()
class FTPHandler(BaseHandler):
def ftp_open(self, req):
@ -1649,16 +1638,13 @@ class DataHandler(BaseHandler):
def url2pathname(url):
"""OS-specific conversion from a relative URL of the 'file' scheme
to a file system path; not recommended for general use."""
if url[:3] == '///':
# Empty authority section, so the path begins on the third character.
url = url[2:]
elif url[:12] == '//localhost/':
# Skip past 'localhost' authority.
url = url[11:]
authority, url = _splithost(url)
if os.name == 'nt':
if url[:3] == '///':
# Skip past extra slash before UNC drive in URL path.
if not _is_local_authority(authority):
# e.g. file://server/share/file.txt
url = '//' + authority + url
elif url[:3] == '///':
# e.g. file://///server/share/file.txt
url = url[1:]
else:
if url[:1] == '/' and url[2:3] in (':', '|'):
@ -1668,6 +1654,8 @@ def url2pathname(url):
# Older URLs use a pipe after a drive letter
url = url[:1] + ':' + url[2:]
url = url.replace('/', '\\')
elif not _is_local_authority(authority):
raise URLError("file:// scheme is supported only on localhost")
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return unquote(url, encoding=encoding, errors=errors)