[3.12] gh-135034: Normalize link targets in tarfile, add os.path.realpath(strict='allow_missing') (GH-135037) (GH-135066)

Addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517.

(cherry picked from commit 3612d8f517)

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
Signed-off-by: Łukasz Langa <lukasz@langa.pl>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
T. Wouters 2025-06-03 16:00:21 +02:00 committed by GitHub
parent f3272d8630
commit 19de092deb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1067 additions and 142 deletions

View file

@ -30,7 +30,8 @@ __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext"
"ismount", "expanduser","expandvars","normpath","abspath",
"curdir","pardir","sep","pathsep","defpath","altsep",
"extsep","devnull","realpath","supports_unicode_filenames","relpath",
"samefile", "sameopenfile", "samestat", "commonpath", "isjunction"]
"samefile", "sameopenfile", "samestat", "commonpath", "isjunction",
"ALLOW_MISSING"]
def _get_bothseps(path):
if isinstance(path, bytes):
@ -609,9 +610,10 @@ try:
from nt import _getfinalpathname, readlink as _nt_readlink
except ImportError:
# realpath is a no-op on systems without _getfinalpathname support.
realpath = abspath
def realpath(path, *, strict=False):
return abspath(path)
else:
def _readlink_deep(path):
def _readlink_deep(path, ignored_error=OSError):
# These error codes indicate that we should stop reading links and
# return the path we currently have.
# 1: ERROR_INVALID_FUNCTION
@ -644,7 +646,7 @@ else:
path = old_path
break
path = normpath(join(dirname(old_path), path))
except OSError as ex:
except ignored_error as ex:
if ex.winerror in allowed_winerror:
break
raise
@ -653,7 +655,7 @@ else:
break
return path
def _getfinalpathname_nonstrict(path):
def _getfinalpathname_nonstrict(path, ignored_error=OSError):
# These error codes indicate that we should stop resolving the path
# and return the value we currently have.
# 1: ERROR_INVALID_FUNCTION
@ -680,17 +682,18 @@ else:
try:
path = _getfinalpathname(path)
return join(path, tail) if tail else path
except OSError as ex:
except ignored_error as ex:
if ex.winerror not in allowed_winerror:
raise
try:
# The OS could not resolve this path fully, so we attempt
# to follow the link ourselves. If we succeed, join the tail
# and return.
new_path = _readlink_deep(path)
new_path = _readlink_deep(path,
ignored_error=ignored_error)
if new_path != path:
return join(new_path, tail) if tail else new_path
except OSError:
except ignored_error:
# If we fail to readlink(), let's keep traversing
pass
path, name = split(path)
@ -721,6 +724,15 @@ else:
if normcase(path) == normcase(devnull):
return '\\\\.\\NUL'
had_prefix = path.startswith(prefix)
if strict is ALLOW_MISSING:
ignored_error = FileNotFoundError
strict = True
elif strict:
ignored_error = ()
else:
ignored_error = OSError
if not had_prefix and not isabs(path):
path = join(cwd, path)
try:
@ -728,17 +740,16 @@ else:
initial_winerror = 0
except ValueError as ex:
# gh-106242: Raised for embedded null characters
# In strict mode, we convert into an OSError.
# In strict modes, we convert into an OSError.
# Non-strict mode returns the path as-is, since we've already
# made it absolute.
if strict:
raise OSError(str(ex)) from None
path = normpath(path)
except OSError as ex:
if strict:
raise
except ignored_error as ex:
initial_winerror = ex.winerror
path = _getfinalpathname_nonstrict(path)
path = _getfinalpathname_nonstrict(path,
ignored_error=ignored_error)
# The path returned by _getfinalpathname will always start with \\?\ -
# strip off that prefix unless it was already provided on the original
# path.