bpo-43757: Make pathlib use os.path.realpath() to resolve symlinks in a path (GH-25264)

Also adds a new "strict" argument to realpath() to avoid changing the default behaviour of pathlib while sharing the implementation.
This commit is contained in:
Barney Gale 2021-04-28 16:50:17 +01:00 committed by GitHub
parent 859577c249
commit baecfbd849
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 184 additions and 109 deletions

View file

@ -344,15 +344,24 @@ the :mod:`glob` module.)
Accepts a :term:`path-like object`. Accepts a :term:`path-like object`.
.. function:: realpath(path) .. function:: realpath(path, *, strict=False)
Return the canonical path of the specified filename, eliminating any symbolic Return the canonical path of the specified filename, eliminating any symbolic
links encountered in the path (if they are supported by the operating links encountered in the path (if they are supported by the operating
system). system).
If a path doesn't exist or a symlink loop is encountered, and *strict* is
``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is
resolved as far as possible and any remainder is appended without checking
whether it exists.
.. note:: .. note::
When symbolic link cycles occur, the returned path will be one member of This function emulates the operating system's procedure for making a path
the cycle, but no guarantee is made about which member that will be. canonical, which differs slightly between Windows and UNIX with respect
to how links and subsequent path components interact.
Operating system APIs make paths canonical as needed, so it's not
normally necessary to call this function.
.. versionchanged:: 3.6 .. versionchanged:: 3.6
Accepts a :term:`path-like object`. Accepts a :term:`path-like object`.
@ -360,6 +369,9 @@ the :mod:`glob` module.)
.. versionchanged:: 3.8 .. versionchanged:: 3.8
Symbolic links and junctions are now resolved on Windows. Symbolic links and junctions are now resolved on Windows.
.. versionchanged:: 3.10
The *strict* parameter was added.
.. function:: relpath(path, start=os.curdir) .. function:: relpath(path, start=os.curdir)

View file

@ -635,7 +635,7 @@ else:
tail = join(name, tail) if tail else name tail = join(name, tail) if tail else name
return tail return tail
def realpath(path): def realpath(path, *, strict=False):
path = normpath(path) path = normpath(path)
if isinstance(path, bytes): if isinstance(path, bytes):
prefix = b'\\\\?\\' prefix = b'\\\\?\\'
@ -660,6 +660,8 @@ else:
path = _getfinalpathname(path) path = _getfinalpathname(path)
initial_winerror = 0 initial_winerror = 0
except OSError as ex: except OSError as ex:
if strict:
raise
initial_winerror = ex.winerror initial_winerror = ex.winerror
path = _getfinalpathname_nonstrict(path) path = _getfinalpathname_nonstrict(path)
# The path returned by _getfinalpathname will always start with \\?\ - # The path returned by _getfinalpathname will always start with \\?\ -

View file

@ -14,12 +14,6 @@ from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes from urllib.parse import quote_from_bytes as urlquote_from_bytes
if os.name == 'nt':
from nt import _getfinalpathname
else:
_getfinalpathname = None
__all__ = [ __all__ = [
"PurePath", "PurePosixPath", "PureWindowsPath", "PurePath", "PurePosixPath", "PureWindowsPath",
"Path", "PosixPath", "WindowsPath", "Path", "PosixPath", "WindowsPath",
@ -29,14 +23,17 @@ __all__ = [
# Internals # Internals
# #
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
# EBADF - guard against macOS `stat` throwing EBADF # EBADF - guard against macOS `stat` throwing EBADF
_IGNORED_ERROS = (ENOENT, ENOTDIR, EBADF, ELOOP) _IGNORED_ERROS = (ENOENT, ENOTDIR, EBADF, ELOOP)
_IGNORED_WINERRORS = ( _IGNORED_WINERRORS = (
21, # ERROR_NOT_READY - drive exists but is not accessible _WINERROR_NOT_READY,
123, # ERROR_INVALID_NAME - fix for bpo-35306 _WINERROR_INVALID_NAME,
1921, # ERROR_CANT_RESOLVE_FILENAME - fix for broken symlink pointing to itself _WINERROR_CANT_RESOLVE_FILENAME)
)
def _ignore_error(exception): def _ignore_error(exception):
return (getattr(exception, 'errno', None) in _IGNORED_ERROS or return (getattr(exception, 'errno', None) in _IGNORED_ERROS or
@ -186,30 +183,6 @@ class _WindowsFlavour(_Flavour):
def compile_pattern(self, pattern): def compile_pattern(self, pattern):
return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch
def resolve(self, path, strict=False):
s = str(path)
if not s:
return path._accessor.getcwd()
previous_s = None
if _getfinalpathname is not None:
if strict:
return self._ext_to_normal(_getfinalpathname(s))
else:
tail_parts = [] # End of the path after the first one not found
while True:
try:
s = self._ext_to_normal(_getfinalpathname(s))
except FileNotFoundError:
previous_s = s
s, tail = os.path.split(s)
tail_parts.append(tail)
if previous_s == s:
return path
else:
return os.path.join(s, *reversed(tail_parts))
# Means fallback on absolute
return None
def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
prefix = '' prefix = ''
if s.startswith(ext_prefix): if s.startswith(ext_prefix):
@ -220,10 +193,6 @@ class _WindowsFlavour(_Flavour):
s = '\\' + s[3:] s = '\\' + s[3:]
return prefix, s return prefix, s
def _ext_to_normal(self, s):
# Turn back an extended path into a normal DOS-like path
return self._split_extended_path(s)[1]
def is_reserved(self, parts): def is_reserved(self, parts):
# NOTE: the rules for reserved names seem somewhat complicated # NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL"). # (e.g. r"..\NUL" is reserved but not r"foo\NUL").
@ -281,54 +250,6 @@ class _PosixFlavour(_Flavour):
def compile_pattern(self, pattern): def compile_pattern(self, pattern):
return re.compile(fnmatch.translate(pattern)).fullmatch return re.compile(fnmatch.translate(pattern)).fullmatch
def resolve(self, path, strict=False):
sep = self.sep
accessor = path._accessor
seen = {}
def _resolve(path, rest):
if rest.startswith(sep):
path = ''
for name in rest.split(sep):
if not name or name == '.':
# current dir
continue
if name == '..':
# parent dir
path, _, _ = path.rpartition(sep)
continue
if path.endswith(sep):
newpath = path + name
else:
newpath = path + sep + name
if newpath in seen:
# Already seen this path
path = seen[newpath]
if path is not None:
# use cached value
continue
# The symlink is not resolved, so we must have a symlink loop.
raise RuntimeError("Symlink loop from %r" % newpath)
# Resolve the symbolic link
try:
target = accessor.readlink(newpath)
except OSError as e:
if e.errno != EINVAL and strict:
raise
# Not a symlink, or non-strict mode. We just leave the path
# untouched.
path = newpath
else:
seen[newpath] = None # not resolved symlink
path = _resolve(path, target)
seen[newpath] = path # resolved symlink
return path
# NOTE: according to POSIX, getcwd() cannot contain path components
# which are symlinks.
base = '' if path.is_absolute() else accessor.getcwd()
return _resolve(base, str(path)) or sep
def is_reserved(self, parts): def is_reserved(self, parts):
return False return False
@ -424,6 +345,8 @@ class _NormalAccessor(_Accessor):
expanduser = staticmethod(os.path.expanduser) expanduser = staticmethod(os.path.expanduser)
realpath = staticmethod(os.path.realpath)
_normal_accessor = _NormalAccessor() _normal_accessor = _NormalAccessor()
@ -1132,15 +1055,27 @@ class Path(PurePath):
normalizing it (for example turning slashes into backslashes under normalizing it (for example turning slashes into backslashes under
Windows). Windows).
""" """
s = self._flavour.resolve(self, strict=strict)
if s is None: def check_eloop(e):
# No symlink resolution => for consistency, raise an error if winerror = getattr(e, 'winerror', 0)
# the path doesn't exist or is forbidden if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME:
self.stat() raise RuntimeError("Symlink loop from %r" % e.filename)
s = str(self.absolute())
# Now we have no symlinks in the path, it's safe to normalize it. try:
normed = self._flavour.pathmod.normpath(s) s = self._accessor.realpath(self, strict=strict)
return self._from_parts((normed,)) except OSError as e:
check_eloop(e)
raise
p = self._from_parts((s,))
# In non-strict mode, realpath() doesn't raise on symlink loops.
# Ensure we get an exception by calling stat()
if not strict:
try:
p.stat()
except OSError as e:
check_eloop(e)
return p
def stat(self, *, follow_symlinks=True): def stat(self, *, follow_symlinks=True):
""" """

View file

@ -387,16 +387,16 @@ def abspath(path):
# Return a canonical path (i.e. the absolute location of a file on the # Return a canonical path (i.e. the absolute location of a file on the
# filesystem). # filesystem).
def realpath(filename): def realpath(filename, *, strict=False):
"""Return the canonical path of the specified filename, eliminating any """Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path.""" symbolic links encountered in the path."""
filename = os.fspath(filename) filename = os.fspath(filename)
path, ok = _joinrealpath(filename[:0], filename, {}) path, ok = _joinrealpath(filename[:0], filename, strict, {})
return abspath(path) return abspath(path)
# Join two paths, normalizing and eliminating any symbolic links # Join two paths, normalizing and eliminating any symbolic links
# encountered in the second path. # encountered in the second path.
def _joinrealpath(path, rest, seen): def _joinrealpath(path, rest, strict, seen):
if isinstance(path, bytes): if isinstance(path, bytes):
sep = b'/' sep = b'/'
curdir = b'.' curdir = b'.'
@ -425,7 +425,15 @@ def _joinrealpath(path, rest, seen):
path = pardir path = pardir
continue continue
newpath = join(path, name) newpath = join(path, name)
if not islink(newpath): try:
st = os.lstat(newpath)
except OSError:
if strict:
raise
is_link = False
else:
is_link = stat.S_ISLNK(st.st_mode)
if not is_link:
path = newpath path = newpath
continue continue
# Resolve the symbolic link # Resolve the symbolic link
@ -436,10 +444,14 @@ def _joinrealpath(path, rest, seen):
# use cached value # use cached value
continue continue
# The symlink is not resolved, so we must have a symlink loop. # The symlink is not resolved, so we must have a symlink loop.
# Return already resolved part + rest of the path unchanged. if strict:
return join(newpath, rest), False # Raise OSError(errno.ELOOP)
os.stat(newpath)
else:
# Return already resolved part + rest of the path unchanged.
return join(newpath, rest), False
seen[newpath] = None # not resolved symlink seen[newpath] = None # not resolved symlink
path, ok = _joinrealpath(path, os.readlink(newpath), seen) path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
if not ok: if not ok:
return join(path, rest), False return join(path, rest), False
seen[newpath] = path # resolved symlink seen[newpath] = path # resolved symlink

View file

@ -269,6 +269,17 @@ class TestNtpath(NtpathTestCase):
self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
os.fsencode(ABSTFN)) os.fsencode(ABSTFN))
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_strict(self):
# Bug #43757: raise FileNotFoundError in strict mode if we encounter
# a path that does not exist.
ABSTFN = ntpath.abspath(os_helper.TESTFN)
os.symlink(ABSTFN + "1", ABSTFN)
self.addCleanup(os_helper.unlink, ABSTFN)
self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True)
self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True)
@os_helper.skip_unless_symlink @os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_relative(self): def test_realpath_relative(self):
@ -340,8 +351,9 @@ class TestNtpath(NtpathTestCase):
@os_helper.skip_unless_symlink @os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_loops(self): def test_realpath_symlink_loops(self):
# Symlink loops are non-deterministic as to which path is returned, but # Symlink loops in non-strict mode are non-deterministic as to which
# it will always be the fully resolved path of one member of the cycle # path is returned, but it will always be the fully resolved path of
# one member of the cycle
ABSTFN = ntpath.abspath(os_helper.TESTFN) ABSTFN = ntpath.abspath(os_helper.TESTFN)
self.addCleanup(os_helper.unlink, ABSTFN) self.addCleanup(os_helper.unlink, ABSTFN)
self.addCleanup(os_helper.unlink, ABSTFN + "1") self.addCleanup(os_helper.unlink, ABSTFN + "1")
@ -383,6 +395,50 @@ class TestNtpath(NtpathTestCase):
# Test using relative path as well. # Test using relative path as well.
self.assertPathEqual(ntpath.realpath(ntpath.basename(ABSTFN)), ABSTFN) self.assertPathEqual(ntpath.realpath(ntpath.basename(ABSTFN)), ABSTFN)
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_loops_strict(self):
# Symlink loops raise OSError in strict mode
ABSTFN = ntpath.abspath(os_helper.TESTFN)
self.addCleanup(os_helper.unlink, ABSTFN)
self.addCleanup(os_helper.unlink, ABSTFN + "1")
self.addCleanup(os_helper.unlink, ABSTFN + "2")
self.addCleanup(os_helper.unlink, ABSTFN + "y")
self.addCleanup(os_helper.unlink, ABSTFN + "c")
self.addCleanup(os_helper.unlink, ABSTFN + "a")
os.symlink(ABSTFN, ABSTFN)
self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=True)
os.symlink(ABSTFN + "1", ABSTFN + "2")
os.symlink(ABSTFN + "2", ABSTFN + "1")
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1", strict=True)
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2", strict=True)
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x", strict=True)
# Windows eliminates '..' components before resolving links, so the
# following call is not expected to raise.
self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..", strict=True),
ntpath.dirname(ABSTFN))
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\..\\x", strict=True)
os.symlink(ABSTFN + "x", ABSTFN + "y")
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\..\\"
+ ntpath.basename(ABSTFN) + "y",
strict=True)
self.assertRaises(OSError, ntpath.realpath,
ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1",
strict=True)
os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a", strict=True)
os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
+ "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c", strict=True)
# Test using relative path as well.
self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN),
strict=True)
@os_helper.skip_unless_symlink @os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_prefix(self): def test_realpath_symlink_prefix(self):

View file

@ -355,6 +355,19 @@ class PosixPathTest(unittest.TestCase):
finally: finally:
os_helper.unlink(ABSTFN) os_helper.unlink(ABSTFN)
@unittest.skipUnless(hasattr(os, "symlink"),
"Missing symlink implementation")
@skip_if_ABSTFN_contains_backslash
def test_realpath_strict(self):
# Bug #43757: raise FileNotFoundError in strict mode if we encounter
# a path that does not exist.
try:
os.symlink(ABSTFN+"1", ABSTFN)
self.assertRaises(FileNotFoundError, realpath, ABSTFN, strict=True)
self.assertRaises(FileNotFoundError, realpath, ABSTFN + "2", strict=True)
finally:
os_helper.unlink(ABSTFN)
@unittest.skipUnless(hasattr(os, "symlink"), @unittest.skipUnless(hasattr(os, "symlink"),
"Missing symlink implementation") "Missing symlink implementation")
@skip_if_ABSTFN_contains_backslash @skip_if_ABSTFN_contains_backslash
@ -370,7 +383,7 @@ class PosixPathTest(unittest.TestCase):
@skip_if_ABSTFN_contains_backslash @skip_if_ABSTFN_contains_backslash
def test_realpath_symlink_loops(self): def test_realpath_symlink_loops(self):
# Bug #930024, return the path unchanged if we get into an infinite # Bug #930024, return the path unchanged if we get into an infinite
# symlink loop. # symlink loop in non-strict mode (default).
try: try:
os.symlink(ABSTFN, ABSTFN) os.symlink(ABSTFN, ABSTFN)
self.assertEqual(realpath(ABSTFN), ABSTFN) self.assertEqual(realpath(ABSTFN), ABSTFN)
@ -407,6 +420,48 @@ class PosixPathTest(unittest.TestCase):
os_helper.unlink(ABSTFN+"c") os_helper.unlink(ABSTFN+"c")
os_helper.unlink(ABSTFN+"a") os_helper.unlink(ABSTFN+"a")
@unittest.skipUnless(hasattr(os, "symlink"),
"Missing symlink implementation")
@skip_if_ABSTFN_contains_backslash
def test_realpath_symlink_loops_strict(self):
# Bug #43757, raise OSError if we get into an infinite symlink loop in
# strict mode.
try:
os.symlink(ABSTFN, ABSTFN)
self.assertRaises(OSError, realpath, ABSTFN, strict=True)
os.symlink(ABSTFN+"1", ABSTFN+"2")
os.symlink(ABSTFN+"2", ABSTFN+"1")
self.assertRaises(OSError, realpath, ABSTFN+"1", strict=True)
self.assertRaises(OSError, realpath, ABSTFN+"2", strict=True)
self.assertRaises(OSError, realpath, ABSTFN+"1/x", strict=True)
self.assertRaises(OSError, realpath, ABSTFN+"1/..", strict=True)
self.assertRaises(OSError, realpath, ABSTFN+"1/../x", strict=True)
os.symlink(ABSTFN+"x", ABSTFN+"y")
self.assertRaises(OSError, realpath,
ABSTFN+"1/../" + basename(ABSTFN) + "y", strict=True)
self.assertRaises(OSError, realpath,
ABSTFN+"1/../" + basename(ABSTFN) + "1", strict=True)
os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a")
self.assertRaises(OSError, realpath, ABSTFN+"a", strict=True)
os.symlink("../" + basename(dirname(ABSTFN)) + "/" +
basename(ABSTFN) + "c", ABSTFN+"c")
self.assertRaises(OSError, realpath, ABSTFN+"c", strict=True)
# Test using relative path as well.
with os_helper.change_cwd(dirname(ABSTFN)):
self.assertRaises(OSError, realpath, basename(ABSTFN), strict=True)
finally:
os_helper.unlink(ABSTFN)
os_helper.unlink(ABSTFN+"1")
os_helper.unlink(ABSTFN+"2")
os_helper.unlink(ABSTFN+"y")
os_helper.unlink(ABSTFN+"c")
os_helper.unlink(ABSTFN+"a")
@unittest.skipUnless(hasattr(os, "symlink"), @unittest.skipUnless(hasattr(os, "symlink"),
"Missing symlink implementation") "Missing symlink implementation")
@skip_if_ABSTFN_contains_backslash @skip_if_ABSTFN_contains_backslash

View file

@ -0,0 +1,3 @@
:func:`os.path.realpath` now accepts a *strict* keyword-only argument.
When set to ``True``, :exc:`OSError` is raised if a path doesn't exist
or a symlink loop is encountered.