mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
GH-125866: Support complete "file:" URLs in urllib (#132378)
Add optional *add_scheme* argument to `urllib.request.pathname2url()`; when set to true, a complete URL is returned. Likewise add optional *require_scheme* argument to `url2pathname()`; when set to true, a complete URL is accepted. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
4d3ad0467e
commit
ccad61e35d
9 changed files with 121 additions and 33 deletions
|
@ -146,16 +146,19 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
attribute to modify its position in the handlers list.
|
||||
|
||||
|
||||
.. function:: pathname2url(path)
|
||||
.. function:: pathname2url(path, *, add_scheme=False)
|
||||
|
||||
Convert the given local path to a ``file:`` URL. This function uses
|
||||
:func:`~urllib.parse.quote` function to encode the path. For historical
|
||||
reasons, the return value omits the ``file:`` scheme prefix. This example
|
||||
shows the function being used on Windows::
|
||||
:func:`~urllib.parse.quote` function to encode the path.
|
||||
|
||||
If *add_scheme* is false (the default), the return value omits the
|
||||
``file:`` scheme prefix. Set *add_scheme* to true to return a complete URL.
|
||||
|
||||
This example shows the function being used on Windows::
|
||||
|
||||
>>> from urllib.request import pathname2url
|
||||
>>> path = 'C:\\Program Files'
|
||||
>>> 'file:' + pathname2url(path)
|
||||
>>> pathname2url(path, add_scheme=True)
|
||||
'file:///C:/Program%20Files'
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
|
@ -168,17 +171,25 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
sections. For example, the path ``/etc/hosts`` is converted to
|
||||
the URL ``///etc/hosts``.
|
||||
|
||||
.. versionchanged:: next
|
||||
The *add_scheme* argument was added.
|
||||
|
||||
.. function:: url2pathname(url)
|
||||
|
||||
.. function:: url2pathname(url, *, require_scheme=False)
|
||||
|
||||
Convert the given ``file:`` URL to a local path. This function uses
|
||||
:func:`~urllib.parse.unquote` to decode the URL. For historical reasons,
|
||||
the given value *must* omit the ``file:`` scheme prefix. This example shows
|
||||
the function being used on Windows::
|
||||
:func:`~urllib.parse.unquote` to decode the URL.
|
||||
|
||||
If *require_scheme* is false (the default), the given value should omit a
|
||||
``file:`` scheme prefix. If *require_scheme* is set to true, the given
|
||||
value should include the prefix; a :exc:`~urllib.error.URLError` is raised
|
||||
if it doesn't.
|
||||
|
||||
This example shows the function being used on Windows::
|
||||
|
||||
>>> from urllib.request import url2pathname
|
||||
>>> url = 'file:///C:/Program%20Files'
|
||||
>>> url2pathname(url.removeprefix('file:'))
|
||||
>>> url2pathname(url, require_scheme=True)
|
||||
'C:\\Program Files'
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
|
@ -193,6 +204,9 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
returned (as before), and on other platforms a
|
||||
:exc:`~urllib.error.URLError` is raised.
|
||||
|
||||
.. versionchanged:: next
|
||||
The *require_scheme* argument was added.
|
||||
|
||||
|
||||
.. function:: getproxies()
|
||||
|
||||
|
|
|
@ -1218,16 +1218,20 @@ urllib
|
|||
supporting SHA-256 digest authentication as specified in :rfc:`7616`.
|
||||
(Contributed by Calvin Bui in :gh:`128193`.)
|
||||
|
||||
* Improve standards compliance when parsing and emitting ``file:`` URLs.
|
||||
* Improve ergonomics and standards compliance when parsing and emitting
|
||||
``file:`` URLs.
|
||||
|
||||
In :func:`urllib.request.url2pathname`:
|
||||
|
||||
- Accept a complete URL when the new *require_scheme* argument is set to
|
||||
true.
|
||||
- Discard URL authorities that resolve to a local IP address.
|
||||
- Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve
|
||||
to ``localhost``, except on Windows where we return a UNC path.
|
||||
to a local IP address, except on Windows where we return a UNC path.
|
||||
|
||||
In :func:`urllib.request.pathname2url`:
|
||||
|
||||
- Return a complete URL when the new *add_scheme* argument is set to true.
|
||||
- Include an empty URL authority when a path begins with a slash. For
|
||||
example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``.
|
||||
|
||||
|
|
|
@ -1271,17 +1271,15 @@ class Path(PurePath):
|
|||
if not self.is_absolute():
|
||||
raise ValueError("relative paths can't be expressed as file URIs")
|
||||
from urllib.request import pathname2url
|
||||
return f'file:{pathname2url(str(self))}'
|
||||
return pathname2url(str(self), add_scheme=True)
|
||||
|
||||
@classmethod
|
||||
def from_uri(cls, uri):
|
||||
"""Return a new path from the given 'file' URI."""
|
||||
if not uri.startswith('file:'):
|
||||
raise ValueError(f"URI does not start with 'file:': {uri!r}")
|
||||
from urllib.error import URLError
|
||||
from urllib.request import url2pathname
|
||||
try:
|
||||
path = cls(url2pathname(uri.removeprefix('file:')))
|
||||
path = cls(url2pathname(uri, require_scheme=True))
|
||||
except URLError as exc:
|
||||
raise ValueError(exc.reason) from None
|
||||
if not path.is_absolute():
|
||||
|
|
|
@ -3302,8 +3302,8 @@ class PathTest(PurePathTest):
|
|||
@needs_posix
|
||||
def test_from_uri_pathname2url_posix(self):
|
||||
P = self.cls
|
||||
self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar'))
|
||||
self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar'))
|
||||
self.assertEqual(P.from_uri(pathname2url('/foo/bar', add_scheme=True)), P('/foo/bar'))
|
||||
self.assertEqual(P.from_uri(pathname2url('//foo/bar', add_scheme=True)), P('//foo/bar'))
|
||||
|
||||
@needs_windows
|
||||
def test_absolute_windows(self):
|
||||
|
|
|
@ -476,7 +476,7 @@ Connection: close
|
|||
|
||||
def test_file_notexists(self):
|
||||
fd, tmp_file = tempfile.mkstemp()
|
||||
tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file)
|
||||
tmp_file_canon_url = urllib.request.pathname2url(tmp_file, add_scheme=True)
|
||||
parsed = urllib.parse.urlsplit(tmp_file_canon_url)
|
||||
tmp_fileurl = parsed._replace(netloc='localhost').geturl()
|
||||
try:
|
||||
|
@ -620,7 +620,7 @@ class urlretrieve_FileTests(unittest.TestCase):
|
|||
|
||||
def constructLocalFileUrl(self, filePath):
|
||||
filePath = os.path.abspath(filePath)
|
||||
return "file:" + urllib.request.pathname2url(filePath)
|
||||
return urllib.request.pathname2url(filePath, add_scheme=True)
|
||||
|
||||
def createNewTempFile(self, data=b""):
|
||||
"""Creates a new temporary file containing the specified data,
|
||||
|
@ -1436,6 +1436,21 @@ class Pathname_Tests(unittest.TestCase):
|
|||
self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c')
|
||||
self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c')
|
||||
|
||||
def test_pathname2url_add_scheme(self):
|
||||
sep = os.path.sep
|
||||
subtests = [
|
||||
('', 'file:'),
|
||||
(sep, 'file:///'),
|
||||
('a', 'file:a'),
|
||||
(f'a{sep}b.c', 'file:a/b.c'),
|
||||
(f'{sep}a{sep}b.c', 'file:///a/b.c'),
|
||||
(f'{sep}a{sep}b%#c', 'file:///a/b%25%23c'),
|
||||
]
|
||||
for path, expected_url in subtests:
|
||||
with self.subTest(path=path):
|
||||
self.assertEqual(
|
||||
urllib.request.pathname2url(path, add_scheme=True), expected_url)
|
||||
|
||||
@unittest.skipUnless(sys.platform == 'win32',
|
||||
'test specific to Windows pathnames.')
|
||||
def test_pathname2url_win(self):
|
||||
|
@ -1503,6 +1518,49 @@ class Pathname_Tests(unittest.TestCase):
|
|||
self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar')
|
||||
self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar')
|
||||
self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar')
|
||||
self.assertEqual(fn('data:blah'), 'data:blah')
|
||||
self.assertEqual(fn('data://blah'), f'data:{sep}{sep}blah')
|
||||
|
||||
def test_url2pathname_require_scheme(self):
|
||||
sep = os.path.sep
|
||||
subtests = [
|
||||
('file:', ''),
|
||||
('FILE:', ''),
|
||||
('FiLe:', ''),
|
||||
('file:/', f'{sep}'),
|
||||
('file:///', f'{sep}'),
|
||||
('file:////', f'{sep}{sep}'),
|
||||
('file:foo', 'foo'),
|
||||
('file:foo/bar', f'foo{sep}bar'),
|
||||
('file:/foo/bar', f'{sep}foo{sep}bar'),
|
||||
('file://localhost/foo/bar', f'{sep}foo{sep}bar'),
|
||||
('file:///foo/bar', f'{sep}foo{sep}bar'),
|
||||
('file:////foo/bar', f'{sep}{sep}foo{sep}bar'),
|
||||
('file:data:blah', 'data:blah'),
|
||||
('file:data://blah', f'data:{sep}{sep}blah'),
|
||||
]
|
||||
for url, expected_path in subtests:
|
||||
with self.subTest(url=url):
|
||||
self.assertEqual(
|
||||
urllib.request.url2pathname(url, require_scheme=True),
|
||||
expected_path)
|
||||
|
||||
error_subtests = [
|
||||
'',
|
||||
':',
|
||||
'foo',
|
||||
'http:foo',
|
||||
'localfile:foo',
|
||||
'data:foo',
|
||||
'data:file:foo',
|
||||
'data:file://foo',
|
||||
]
|
||||
for url in error_subtests:
|
||||
with self.subTest(url=url):
|
||||
self.assertRaises(
|
||||
urllib.error.URLError,
|
||||
urllib.request.url2pathname,
|
||||
url, require_scheme=True)
|
||||
|
||||
@unittest.skipUnless(sys.platform == 'win32',
|
||||
'test specific to Windows pathnames.')
|
||||
|
|
|
@ -809,7 +809,7 @@ class HandlerTests(unittest.TestCase):
|
|||
|
||||
TESTFN = os_helper.TESTFN
|
||||
towrite = b"hello, world\n"
|
||||
canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN))
|
||||
canonurl = urllib.request.pathname2url(os.path.abspath(TESTFN), add_scheme=True)
|
||||
parsed = urlsplit(canonurl)
|
||||
if parsed.netloc:
|
||||
raise unittest.SkipTest("non-local working directory")
|
||||
|
|
|
@ -150,7 +150,7 @@ class OtherNetworkTests(unittest.TestCase):
|
|||
f.write('hi there\n')
|
||||
f.close()
|
||||
urls = [
|
||||
'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)),
|
||||
urllib.request.pathname2url(os.path.abspath(TESTFN), add_scheme=True),
|
||||
('file:///nonsensename/etc/passwd', None,
|
||||
urllib.error.URLError),
|
||||
]
|
||||
|
|
|
@ -1466,17 +1466,16 @@ class FileHandler(BaseHandler):
|
|||
def open_local_file(self, req):
|
||||
import email.utils
|
||||
import mimetypes
|
||||
filename = _splittype(req.full_url)[1]
|
||||
localfile = url2pathname(filename)
|
||||
localfile = url2pathname(req.full_url, require_scheme=True)
|
||||
try:
|
||||
stats = os.stat(localfile)
|
||||
size = stats.st_size
|
||||
modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
|
||||
mtype = mimetypes.guess_type(filename)[0]
|
||||
mtype = mimetypes.guess_file_type(localfile)[0]
|
||||
headers = email.message_from_string(
|
||||
'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
|
||||
(mtype or 'text/plain', size, modified))
|
||||
origurl = f'file:{pathname2url(localfile)}'
|
||||
origurl = pathname2url(localfile, add_scheme=True)
|
||||
return addinfourl(open(localfile, 'rb'), headers, origurl)
|
||||
except OSError as exp:
|
||||
raise URLError(exp, exp.filename)
|
||||
|
@ -1635,9 +1634,16 @@ class DataHandler(BaseHandler):
|
|||
|
||||
# Code move from the old urllib module
|
||||
|
||||
def url2pathname(url):
|
||||
"""OS-specific conversion from a relative URL of the 'file' scheme
|
||||
to a file system path; not recommended for general use."""
|
||||
def url2pathname(url, *, require_scheme=False):
|
||||
"""Convert the given file URL to a local file system path.
|
||||
|
||||
The 'file:' scheme prefix must be omitted unless *require_scheme*
|
||||
is set to true.
|
||||
"""
|
||||
if require_scheme:
|
||||
scheme, url = _splittype(url)
|
||||
if scheme != 'file':
|
||||
raise URLError("URL is missing a 'file:' scheme")
|
||||
authority, url = _splithost(url)
|
||||
if os.name == 'nt':
|
||||
if not _is_local_authority(authority):
|
||||
|
@ -1661,13 +1667,17 @@ def url2pathname(url):
|
|||
return unquote(url, encoding=encoding, errors=errors)
|
||||
|
||||
|
||||
def pathname2url(pathname):
|
||||
"""OS-specific conversion from a file system path to a relative URL
|
||||
of the 'file' scheme; not recommended for general use."""
|
||||
def pathname2url(pathname, *, add_scheme=False):
|
||||
"""Convert the given local file system path to a file URL.
|
||||
|
||||
The 'file:' scheme prefix is omitted unless *add_scheme*
|
||||
is set to true.
|
||||
"""
|
||||
if os.name == 'nt':
|
||||
pathname = pathname.replace('\\', '/')
|
||||
encoding = sys.getfilesystemencoding()
|
||||
errors = sys.getfilesystemencodeerrors()
|
||||
scheme = 'file:' if add_scheme else ''
|
||||
drive, root, tail = os.path.splitroot(pathname)
|
||||
if drive:
|
||||
# First, clean up some special forms. We are going to sacrifice the
|
||||
|
@ -1689,7 +1699,7 @@ def pathname2url(pathname):
|
|||
# avoids interpreting the path as a URL authority.
|
||||
root = '//' + root
|
||||
tail = quote(tail, encoding=encoding, errors=errors)
|
||||
return drive + root + tail
|
||||
return scheme + drive + root + tail
|
||||
|
||||
|
||||
# Utility functions
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Add optional *add_scheme* argument to :func:`urllib.request.pathname2url`; when
|
||||
set to true, a complete URL is returned. Likewise add optional *require_scheme*
|
||||
argument to :func:`~urllib.request.url2pathname`; when set to true, a complete
|
||||
URL is accepted.
|
Loading…
Add table
Add a link
Reference in a new issue