gh-57911: Sanitize symlink targets in tarfile on win32 (GH-138309)

This commit is contained in:
Christoph Walcher 2025-09-05 16:19:47 +02:00 committed by GitHub
parent e76464d161
commit c1a9c23195
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 71 additions and 42 deletions

View file

@ -463,6 +463,10 @@ tarfile
:func:`~tarfile.TarFile.errorlevel` is zero. :func:`~tarfile.TarFile.errorlevel` is zero.
(Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887`
and :cve:`2025-4435`.) and :cve:`2025-4435`.)
* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall`
now replace slashes by backslashes in symlink targets on Windows to prevent
creation of corrupted links.
(Contributed by Christoph Walcher in :gh:`57911`.)
types types

View file

@ -2718,7 +2718,13 @@ class TarFile(object):
if os.path.lexists(targetpath): if os.path.lexists(targetpath):
# Avoid FileExistsError on following os.symlink. # Avoid FileExistsError on following os.symlink.
os.unlink(targetpath) os.unlink(targetpath)
os.symlink(tarinfo.linkname, targetpath) link_target = tarinfo.linkname
if os.name == "nt":
# gh-57911: Posix-flavoured forward-slash path separators in
# symlink targets aren't acknowledged by Windows, resulting
# in corrupted links.
link_target = link_target.replace("/", os.path.sep)
os.symlink(link_target, targetpath)
return return
else: else:
if os.path.exists(tarinfo._link_target): if os.path.exists(tarinfo._link_target):

View file

@ -2777,7 +2777,7 @@ class MiscTest(unittest.TestCase):
str(excinfo.exception), str(excinfo.exception),
) )
@unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') @os_helper.skip_unless_symlink
@unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod")
@unittest.mock.patch('os.chmod') @unittest.mock.patch('os.chmod')
def test_deferred_directory_attributes_update(self, mock_chmod): def test_deferred_directory_attributes_update(self, mock_chmod):
@ -3663,6 +3663,39 @@ class TestExtractionFilters(unittest.TestCase):
# The destination for the extraction, within `outerdir` # The destination for the extraction, within `outerdir`
destdir = outerdir / 'dest' destdir = outerdir / 'dest'
@classmethod
def setUpClass(cls):
# Posix and Windows have different pathname resolution:
# either symlink or a '..' component resolve first.
# Let's see which we are on.
if os_helper.can_symlink():
testpath = os.path.join(TEMPDIR, 'resolution_test')
os.mkdir(testpath)
# testpath/current links to `.` which is all of:
# - `testpath`
# - `testpath/current`
# - `testpath/current/current`
# - etc.
os.symlink('.', os.path.join(testpath, 'current'))
# we'll test where `testpath/current/../file` ends up
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
pass
if os.path.exists(os.path.join(testpath, 'file')):
# Windows collapses 'current\..' to '.' first, leaving
# 'testpath\file'
cls.dotdot_resolves_early = True
elif os.path.exists(os.path.join(testpath, '..', 'file')):
# Posix resolves 'current' to '.' first, leaving
# 'testpath/../file'
cls.dotdot_resolves_early = False
else:
raise AssertionError('Could not determine link resolution')
else:
cls.dotdot_resolves_early = True
@contextmanager @contextmanager
def check_context(self, tar, filter, *, check_flag=True): def check_context(self, tar, filter, *, check_flag=True):
"""Extracts `tar` to `self.destdir` and allows checking the result """Extracts `tar` to `self.destdir` and allows checking the result
@ -3809,23 +3842,21 @@ class TestExtractionFilters(unittest.TestCase):
arc.add('current', symlink_to='.') arc.add('current', symlink_to='.')
# effectively points to ./../ # effectively points to ./../
arc.add('parent', symlink_to='current/..') if self.dotdot_resolves_early:
arc.add('parent', symlink_to='current/../..')
else:
arc.add('parent', symlink_to='current/..')
arc.add('parent/evil') arc.add('parent/evil')
if os_helper.can_symlink(): if os_helper.can_symlink():
with self.check_context(arc.open(), 'fully_trusted'): with self.check_context(arc.open(), 'fully_trusted'):
if self.raised_exception is not None: self.expect_file('current', symlink_to='.')
# Windows will refuse to create a file that's a symlink to itself if self.dotdot_resolves_early:
# (and tarfile doesn't swallow that exception) self.expect_file('parent', symlink_to='current/../..')
self.expect_exception(FileExistsError)
# The other cases will fail with this error too.
# Skip the rest of this test.
return
else: else:
self.expect_file('current', symlink_to='.')
self.expect_file('parent', symlink_to='current/..') self.expect_file('parent', symlink_to='current/..')
self.expect_file('../evil') self.expect_file('../evil')
with self.check_context(arc.open(), 'tar'): with self.check_context(arc.open(), 'tar'):
self.expect_exception( self.expect_exception(
@ -3927,35 +3958,6 @@ class TestExtractionFilters(unittest.TestCase):
# Test interplaying symlinks # Test interplaying symlinks
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives # Inspired by 'dirsymlink2b' in jwilk/traversal-archives
# Posix and Windows have different pathname resolution:
# either symlink or a '..' component resolve first.
# Let's see which we are on.
if os_helper.can_symlink():
testpath = os.path.join(TEMPDIR, 'resolution_test')
os.mkdir(testpath)
# testpath/current links to `.` which is all of:
# - `testpath`
# - `testpath/current`
# - `testpath/current/current`
# - etc.
os.symlink('.', os.path.join(testpath, 'current'))
# we'll test where `testpath/current/../file` ends up
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
pass
if os.path.exists(os.path.join(testpath, 'file')):
# Windows collapses 'current\..' to '.' first, leaving
# 'testpath\file'
dotdot_resolves_early = True
elif os.path.exists(os.path.join(testpath, '..', 'file')):
# Posix resolves 'current' to '.' first, leaving
# 'testpath/../file'
dotdot_resolves_early = False
else:
raise AssertionError('Could not determine link resolution')
with ArchiveMaker() as arc: with ArchiveMaker() as arc:
# `current` links to `.` which is both the destination directory # `current` links to `.` which is both the destination directory
@ -3991,7 +3993,7 @@ class TestExtractionFilters(unittest.TestCase):
with self.check_context(arc.open(), 'data'): with self.check_context(arc.open(), 'data'):
if os_helper.can_symlink(): if os_helper.can_symlink():
if dotdot_resolves_early: if self.dotdot_resolves_early:
# Fail when extracting a file outside destination # Fail when extracting a file outside destination
self.expect_exception( self.expect_exception(
tarfile.OutsideDestinationError, tarfile.OutsideDestinationError,
@ -4039,6 +4041,21 @@ class TestExtractionFilters(unittest.TestCase):
tarfile.AbsoluteLinkError, tarfile.AbsoluteLinkError,
"'parent' is a link to an absolute path") "'parent' is a link to an absolute path")
@symlink_test
@os_helper.skip_unless_symlink
def test_symlink_target_seperator_rewrite_on_windows(self):
with ArchiveMaker() as arc:
arc.add('link', symlink_to="relative/test/path")
with self.check_context(arc.open(), 'fully_trusted'):
self.expect_file('link', type=tarfile.SYMTYPE)
link_path = os.path.normpath(self.destdir / "link")
link_target = os.readlink(link_path)
if os.name == "nt":
self.assertEqual(link_target, "relative\\test\\path")
else:
self.assertEqual(link_target, "relative/test/path")
def test_absolute_hardlink(self): def test_absolute_hardlink(self):
# Test hardlink to an absolute path # Test hardlink to an absolute path
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives

View file

@ -0,0 +1,2 @@
When extracting tar files on Windows, slashes in symlink targets will be
replaced by backslashes to prevent corrupted links.