[3.14] gh-135034: Normalize link targets in tarfile, add os.path.realpath(strict='allow_missing') (gh-135037) (gh-135065)

Addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517.

(cherry picked from commit 3612d8f517)

Signed-off-by: Łukasz Langa <lukasz@langa.pl>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Łukasz Langa 2025-06-03 14:05:00 +02:00 committed by GitHub
parent 78fd7ce3d2
commit 9e0ac76d96
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 970 additions and 173 deletions

View file

@ -29,7 +29,7 @@ __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext"
"abspath","curdir","pardir","sep","pathsep","defpath","altsep",
"extsep","devnull","realpath","supports_unicode_filenames","relpath",
"samefile", "sameopenfile", "samestat", "commonpath", "isjunction",
"isdevdrive"]
"isdevdrive", "ALLOW_MISSING"]
def _get_bothseps(path):
if isinstance(path, bytes):
@ -601,9 +601,10 @@ try:
from nt import _findfirstfile, _getfinalpathname, readlink as _nt_readlink
except ImportError:
# realpath is a no-op on systems without _getfinalpathname support.
realpath = abspath
def realpath(path, *, strict=False):
return abspath(path)
else:
def _readlink_deep(path):
def _readlink_deep(path, ignored_error=OSError):
# These error codes indicate that we should stop reading links and
# return the path we currently have.
# 1: ERROR_INVALID_FUNCTION
@ -636,7 +637,7 @@ else:
path = old_path
break
path = normpath(join(dirname(old_path), path))
except OSError as ex:
except ignored_error as ex:
if ex.winerror in allowed_winerror:
break
raise
@ -645,7 +646,7 @@ else:
break
return path
def _getfinalpathname_nonstrict(path):
def _getfinalpathname_nonstrict(path, ignored_error=OSError):
# These error codes indicate that we should stop resolving the path
# and return the value we currently have.
# 1: ERROR_INVALID_FUNCTION
@ -673,17 +674,18 @@ else:
try:
path = _getfinalpathname(path)
return join(path, tail) if tail else path
except OSError as ex:
except ignored_error as ex:
if ex.winerror not in allowed_winerror:
raise
try:
# The OS could not resolve this path fully, so we attempt
# to follow the link ourselves. If we succeed, join the tail
# and return.
new_path = _readlink_deep(path)
new_path = _readlink_deep(path,
ignored_error=ignored_error)
if new_path != path:
return join(new_path, tail) if tail else new_path
except OSError:
except ignored_error:
# If we fail to readlink(), let's keep traversing
pass
# If we get these errors, try to get the real name of the file without accessing it.
@ -691,7 +693,7 @@ else:
try:
name = _findfirstfile(path)
path, _ = split(path)
except OSError:
except ignored_error:
path, name = split(path)
else:
path, name = split(path)
@ -721,6 +723,15 @@ else:
if normcase(path) == devnull:
return '\\\\.\\NUL'
had_prefix = path.startswith(prefix)
if strict is ALLOW_MISSING:
ignored_error = FileNotFoundError
strict = True
elif strict:
ignored_error = ()
else:
ignored_error = OSError
if not had_prefix and not isabs(path):
path = join(cwd, path)
try:
@ -728,17 +739,16 @@ else:
initial_winerror = 0
except ValueError as ex:
# gh-106242: Raised for embedded null characters
# In strict mode, we convert into an OSError.
# In strict modes, we convert into an OSError.
# Non-strict mode returns the path as-is, since we've already
# made it absolute.
if strict:
raise OSError(str(ex)) from None
path = normpath(path)
except OSError as ex:
if strict:
raise
except ignored_error as ex:
initial_winerror = ex.winerror
path = _getfinalpathname_nonstrict(path)
path = _getfinalpathname_nonstrict(path,
ignored_error=ignored_error)
# The path returned by _getfinalpathname will always start with \\?\ -
# strip off that prefix unless it was already provided on the original
# path.