This commit is contained in:
Maxim Martynov 2025-12-23 00:36:28 -08:00 committed by GitHub
commit 2f5dcf3e6b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 361 additions and 76 deletions

View file

@ -833,6 +833,18 @@ xml.parsers.expat
.. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack
abc
---
* Reduce memory usage of :func:`issubclass` checks for classes inheriting abstract classes.
:class:`abc.ABCMeta` hook ``__subclasscheck__`` now includes
a guard which is triggered when the hook is called from a parent class
(``issubclass(cls, RootClass)`` -> ``issubclass(cls, NestedClass)`` -> ...).
This guard prevents adding ``cls`` to ``NestedClass`` positive and negative caches,
preventing memory bloat in some cases (thousands of classes inherited from ABC).
(Contributed by Maxim Martynov in :gh:`92810`.)
zlib
----

View file

@ -49,6 +49,8 @@ extern "C" {
_Py_atomic_load_uint16_relaxed(&value)
#define FT_ATOMIC_LOAD_UINT32_RELAXED(value) \
_Py_atomic_load_uint32_relaxed(&value)
#define FT_ATOMIC_LOAD_UINT64_RELAXED(value) \
_Py_atomic_load_uint64_relaxed(&value)
#define FT_ATOMIC_LOAD_ULONG_RELAXED(value) \
_Py_atomic_load_ulong_relaxed(&value)
#define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \
@ -71,6 +73,8 @@ extern "C" {
_Py_atomic_store_uint16_relaxed(&value, new_value)
#define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) \
_Py_atomic_store_uint32_relaxed(&value, new_value)
#define FT_ATOMIC_STORE_UINT64_RELAXED(value, new_value) \
_Py_atomic_store_uint64_relaxed(&value, new_value)
#define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) \
_Py_atomic_store_char_relaxed(&value, new_value)
#define FT_ATOMIC_LOAD_CHAR_RELAXED(value) \
@ -125,6 +129,8 @@ extern "C" {
_Py_atomic_load_ullong_relaxed(&value)
#define FT_ATOMIC_ADD_SSIZE(value, new_value) \
(void)_Py_atomic_add_ssize(&value, new_value)
#define FT_ATOMIC_ADD_UINT64(value, new_value) \
(void)_Py_atomic_add_uint64(&value, new_value)
#define FT_MUTEX_LOCK(lock) PyMutex_Lock(lock)
#define FT_MUTEX_UNLOCK(lock) PyMutex_Unlock(lock)
@ -144,6 +150,7 @@ extern "C" {
#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value
#define FT_ATOMIC_LOAD_UINT16_RELAXED(value) value
#define FT_ATOMIC_LOAD_UINT32_RELAXED(value) value
#define FT_ATOMIC_LOAD_UINT64_RELAXED(value) value
#define FT_ATOMIC_LOAD_ULONG_RELAXED(value) value
#define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value
@ -155,6 +162,7 @@ extern "C" {
#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_STORE_UINT64_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_LOAD_CHAR_RELAXED(value) value
#define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_LOAD_UCHAR_RELAXED(value) value
@ -182,6 +190,7 @@ extern "C" {
#define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) value
#define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_ADD_SSIZE(value, new_value) (void)(value += new_value)
#define FT_ATOMIC_ADD_UINT64(value, new_value) (void)(value += new_value)
#define FT_MUTEX_LOCK(lock) do {} while (0)
#define FT_MUTEX_UNLOCK(lock) do {} while (0)

View file

@ -49,6 +49,7 @@ class ABCMeta(type):
cls._abc_cache = WeakSet()
cls._abc_negative_cache = WeakSet()
cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
cls._abc_issubclasscheck_recursive = False
return cls
def register(cls, subclass):
@ -65,7 +66,10 @@ class ABCMeta(type):
if issubclass(cls, subclass):
# This would create a cycle, which is bad for the algorithm below
raise RuntimeError("Refusing to create an inheritance cycle")
# Add registry entry
cls._abc_registry.add(subclass)
# Automatically include cache entry
cls._abc_cache.add(subclass)
ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache
return subclass
@ -139,9 +143,25 @@ class ABCMeta(type):
return True
# Check if it's a subclass of a subclass (recursive)
for scls in cls.__subclasses__():
if issubclass(subclass, scls):
cls._abc_cache.add(subclass)
# If inside recursive issubclass check, avoid adding classes
# to any cache because this may drastically increase memory usage.
# Unfortunately, issubclass/__subclasscheck__ don't accept third
# argument with context, so using global context within ABCMeta.
# This is done only on first method call, next will use cache anyway.
scls_is_abc = hasattr(scls, "_abc_issubclasscheck_recursive")
if scls_is_abc:
scls._abc_issubclasscheck_recursive = True
try:
# Perform recursive check
result = issubclass(subclass, scls)
finally:
if scls_is_abc:
scls._abc_issubclasscheck_recursive = False
if result:
if not cls._abc_issubclasscheck_recursive:
cls._abc_cache.add(subclass)
return True
# No dice; update negative cache
cls._abc_negative_cache.add(subclass)
if not cls._abc_issubclasscheck_recursive:
cls._abc_negative_cache.add(subclass)
return False

View file

@ -12,6 +12,7 @@ import abc
import _py_abc
from inspect import isabstract
def test_factory(abc_ABCMeta, abc_get_cache_token):
class TestLegacyAPI(unittest.TestCase):
@ -70,6 +71,25 @@ def test_factory(abc_ABCMeta, abc_get_cache_token):
class TestABC(unittest.TestCase):
def check_isinstance(self, obj, target_class):
self.assertIsInstance(obj, target_class)
self.assertIsInstance(obj, (target_class,))
self.assertIsInstance(obj, target_class | int)
def check_not_isinstance(self, obj, target_class):
self.assertNotIsInstance(obj, target_class)
self.assertNotIsInstance(obj, (target_class,))
self.assertNotIsInstance(obj, target_class | int)
def check_issubclass(self, klass, target_class):
self.assertIsSubclass(klass, target_class)
self.assertIsSubclass(klass, (target_class,))
self.assertIsSubclass(klass, target_class | int)
def check_not_issubclass(self, klass, target_class):
self.assertNotIsSubclass(klass, target_class)
self.assertNotIsSubclass(klass, (target_class,))
self.assertNotIsSubclass(klass, target_class | int)
def test_ABC_helper(self):
# create an ABC using the helper class and perform basic checks
@ -270,29 +290,75 @@ def test_factory(abc_ABCMeta, abc_get_cache_token):
class C(metaclass=meta):
pass
def test_isinstance_direct_inheritance(self):
class A(metaclass=abc_ABCMeta):
pass
class B(A):
pass
class C(A):
pass
a = A()
b = B()
c = C()
# trigger caching
for _ in range(2):
self.check_isinstance(a, A)
self.check_not_isinstance(a, B)
self.check_not_isinstance(a, C)
self.check_isinstance(b, B)
self.check_isinstance(b, A)
self.check_not_isinstance(b, C)
self.check_isinstance(c, C)
self.check_isinstance(c, A)
self.check_not_isinstance(c, B)
self.check_issubclass(B, A)
self.check_issubclass(C, A)
self.check_not_issubclass(B, C)
self.check_not_issubclass(C, B)
self.check_not_issubclass(A, B)
self.check_not_issubclass(A, C)
def test_registration_basics(self):
class A(metaclass=abc_ABCMeta):
pass
class B(object):
pass
a = A()
b = B()
self.assertNotIsSubclass(B, A)
self.assertNotIsSubclass(B, (A,))
self.assertNotIsInstance(b, A)
self.assertNotIsInstance(b, (A,))
# trigger caching
for _ in range(2):
self.check_not_issubclass(B, A)
self.check_not_isinstance(b, A)
self.check_not_issubclass(A, B)
self.check_not_isinstance(a, B)
B1 = A.register(B)
self.assertIsSubclass(B, A)
self.assertIsSubclass(B, (A,))
self.assertIsInstance(b, A)
self.assertIsInstance(b, (A,))
self.assertIs(B1, B)
# trigger caching
for _ in range(2):
self.check_issubclass(B, A)
self.check_isinstance(b, A)
self.assertIs(B1, B)
self.check_not_issubclass(A, B)
self.check_not_isinstance(a, B)
class C(B):
pass
c = C()
self.assertIsSubclass(C, A)
self.assertIsSubclass(C, (A,))
self.assertIsInstance(c, A)
self.assertIsInstance(c, (A,))
# trigger caching
for _ in range(2):
self.check_issubclass(C, A)
self.check_isinstance(c, A)
self.check_not_issubclass(A, C)
self.check_not_isinstance(a, C)
def test_register_as_class_deco(self):
class A(metaclass=abc_ABCMeta):
@ -377,39 +443,95 @@ def test_factory(abc_ABCMeta, abc_get_cache_token):
pass
self.assertIsSubclass(A, A)
self.assertIsSubclass(A, (A,))
class B(metaclass=abc_ABCMeta):
pass
self.assertNotIsSubclass(A, B)
self.assertNotIsSubclass(A, (B,))
self.assertNotIsSubclass(B, A)
self.assertNotIsSubclass(B, (A,))
class C(metaclass=abc_ABCMeta):
pass
A.register(B)
class B1(B):
pass
self.assertIsSubclass(B1, A)
self.assertIsSubclass(B1, (A,))
# trigger caching
for _ in range(2):
self.assertIsSubclass(B1, A)
self.assertIsSubclass(B1, (A,))
class C1(C):
pass
B1.register(C1)
self.assertNotIsSubclass(C, B)
self.assertNotIsSubclass(C, (B,))
self.assertNotIsSubclass(C, B1)
self.assertNotIsSubclass(C, (B1,))
self.assertIsSubclass(C1, A)
self.assertIsSubclass(C1, (A,))
self.assertIsSubclass(C1, B)
self.assertIsSubclass(C1, (B,))
self.assertIsSubclass(C1, B1)
self.assertIsSubclass(C1, (B1,))
# trigger caching
for _ in range(2):
self.assertNotIsSubclass(C, B)
self.assertNotIsSubclass(C, (B,))
self.assertNotIsSubclass(C, B1)
self.assertNotIsSubclass(C, (B1,))
self.assertIsSubclass(C1, A)
self.assertIsSubclass(C1, (A,))
self.assertIsSubclass(C1, B)
self.assertIsSubclass(C1, (B,))
self.assertIsSubclass(C1, B1)
self.assertIsSubclass(C1, (B1,))
C1.register(int)
class MyInt(int):
pass
self.assertIsSubclass(MyInt, A)
self.assertIsSubclass(MyInt, (A,))
self.assertIsInstance(42, A)
self.assertIsInstance(42, (A,))
# trigger caching
for _ in range(2):
self.assertIsSubclass(MyInt, A)
self.assertIsSubclass(MyInt, (A,))
self.assertIsInstance(42, A)
self.assertIsInstance(42, (A,))
def test_custom_subclasses(self):
class A: pass
class B(A): pass
class C: pass
class D(C): pass
class Root(metaclass=abc_ABCMeta): pass
class Parent1(Root):
@classmethod
def __subclasses__(cls):
return [A]
class Parent2(Root):
__subclasses__ = lambda: [A]
# trigger caching
for _ in range(2):
self.check_isinstance(A(), Parent1)
self.check_isinstance(B(), Parent1)
self.check_issubclass(A, Parent1)
self.check_issubclass(B, Parent1)
self.check_not_isinstance(C(), Parent1)
self.check_not_isinstance(D(), Parent1)
self.check_not_issubclass(C, Parent1)
self.check_not_issubclass(D, Parent1)
self.check_isinstance(A(), Parent2)
self.check_isinstance(B(), Parent2)
self.check_issubclass(A, Parent2)
self.check_issubclass(B, Parent2)
self.check_not_isinstance(C(), Parent2)
self.check_not_isinstance(D(), Parent2)
self.check_not_issubclass(C, Parent2)
self.check_not_issubclass(D, Parent2)
self.check_isinstance(A(), Root)
self.check_isinstance(B(), Root)
self.check_issubclass(A, Root)
self.check_issubclass(B, Root)
self.check_not_isinstance(C(), Root)
self.check_not_isinstance(D(), Root)
self.check_not_issubclass(C, Root)
self.check_not_issubclass(D, Root)
def test_issubclass_bad_arguments(self):
class A(metaclass=abc_ABCMeta):
@ -460,8 +582,32 @@ def test_factory(abc_ABCMeta, abc_get_cache_token):
with self.assertRaisesRegex(CustomError, exc_msg):
issubclass(int, S)
def test_subclasshook(self):
def test_issubclass_bad_class(self):
class A(metaclass=abc.ABCMeta):
pass
A._abc_impl = 1
error_msg = "_abc_impl is set to a wrong type"
with self.assertRaisesRegex(TypeError, error_msg):
issubclass(A, A)
class B(metaclass=_py_abc.ABCMeta):
pass
B._abc_cache = 1
error_msg = "argument of type 'int' is not a container or iterable"
with self.assertRaisesRegex(TypeError, error_msg):
issubclass(B, B)
class C(metaclass=_py_abc.ABCMeta):
pass
C._abc_negative_cache = 1
with self.assertRaisesRegex(TypeError, error_msg):
issubclass(C, C)
def test_subclasshook(self):
class A(metaclass=abc_ABCMeta):
@classmethod
def __subclasshook__(cls, C):
if cls is A:
@ -478,6 +624,26 @@ def test_factory(abc_ABCMeta, abc_get_cache_token):
self.assertNotIsSubclass(C, A)
self.assertNotIsSubclass(C, (A,))
def test_subclasshook_exception(self):
# Check that issubclass() propagates exceptions raised by
# __subclasshook__.
class CustomError(Exception): ...
exc_msg = "exception from __subclasshook__"
class A(metaclass=abc_ABCMeta):
@classmethod
def __subclasshook__(cls, C):
raise CustomError(exc_msg)
with self.assertRaisesRegex(CustomError, exc_msg):
issubclass(A, A)
class B(A):
pass
with self.assertRaisesRegex(CustomError, exc_msg):
issubclass(B, A)
class C:
pass
with self.assertRaisesRegex(CustomError, exc_msg):
issubclass(C, A)
def test_all_new_methods_are_called(self):
class A(metaclass=abc_ABCMeta):
pass
@ -522,7 +688,6 @@ def test_factory(abc_ABCMeta, abc_get_cache_token):
self.assertEqual(A.__abstractmethods__, set())
A()
def test_update_new_abstractmethods(self):
class A(metaclass=abc_ABCMeta):
@abc.abstractmethod

View file

@ -353,6 +353,28 @@ class TestIsInstanceIsSubclass(unittest.TestCase):
with support.infinite_recursion(25):
self.assertRaises(RecursionError, issubclass, X(), int)
def test_custom_subclasses_are_ignored(self):
class A: pass
class B: pass
class Parent1:
@classmethod
def __subclasses__(cls):
return [A, B]
class Parent2:
__subclasses__ = lambda: [A, B]
self.assertNotIsInstance(A(), Parent1)
self.assertNotIsInstance(B(), Parent1)
self.assertNotIsSubclass(A, Parent1)
self.assertNotIsSubclass(B, Parent1)
self.assertNotIsInstance(A(), Parent2)
self.assertNotIsInstance(B(), Parent2)
self.assertNotIsSubclass(A, Parent2)
self.assertNotIsSubclass(B, Parent2)
def blowstack(fxn, arg, compare_to):
# Make sure that calling isinstance with a deeply nested tuple for its

View file

@ -0,0 +1,2 @@
Reduce memory usage by :meth:`~type.__subclasscheck__`
for :class:`abc.ABCMeta` and large class trees.

View file

@ -35,21 +35,13 @@ get_abc_state(PyObject *module)
static inline uint64_t
get_invalidation_counter(_abcmodule_state *state)
{
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_uint64(&state->abc_invalidation_counter);
#else
return state->abc_invalidation_counter;
#endif
return FT_ATOMIC_LOAD_UINT64_RELAXED(state->abc_invalidation_counter);
}
static inline void
increment_invalidation_counter(_abcmodule_state *state)
{
#ifdef Py_GIL_DISABLED
_Py_atomic_add_uint64(&state->abc_invalidation_counter, 1);
#else
state->abc_invalidation_counter++;
#endif
FT_ATOMIC_ADD_UINT64(state->abc_invalidation_counter, 1);
}
/* This object stores internal state for ABCs.
@ -65,6 +57,7 @@ typedef struct {
PyObject *_abc_cache;
PyObject *_abc_negative_cache;
uint64_t _abc_negative_cache_version;
uint8_t _abc_issubclasscheck_recursive;
} _abc_data;
#define _abc_data_CAST(op) ((_abc_data *)(op))
@ -72,21 +65,31 @@ typedef struct {
static inline uint64_t
get_cache_version(_abc_data *impl)
{
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_uint64(&impl->_abc_negative_cache_version);
#else
return impl->_abc_negative_cache_version;
#endif
return FT_ATOMIC_LOAD_UINT64_RELAXED(impl->_abc_negative_cache_version);
}
static inline void
set_cache_version(_abc_data *impl, uint64_t version)
{
#ifdef Py_GIL_DISABLED
_Py_atomic_store_uint64(&impl->_abc_negative_cache_version, version);
#else
impl->_abc_negative_cache_version = version;
#endif
FT_ATOMIC_STORE_UINT64_RELAXED(impl->_abc_negative_cache_version, version);
}
static inline uint8_t
is_issubclasscheck_recursive(_abc_data *impl)
{
return FT_ATOMIC_LOAD_UINT8_RELAXED(impl->_abc_issubclasscheck_recursive);
}
static inline void
set_issubclasscheck_recursive(_abc_data *impl)
{
FT_ATOMIC_STORE_UINT8_RELAXED(impl->_abc_issubclasscheck_recursive, 1);
}
static inline void
unset_issubclasscheck_recursive(_abc_data *impl)
{
FT_ATOMIC_STORE_UINT8_RELAXED(impl->_abc_issubclasscheck_recursive, 0);
}
static int
@ -139,6 +142,7 @@ abc_data_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->_abc_cache = NULL;
self->_abc_negative_cache = NULL;
self->_abc_negative_cache_version = get_invalidation_counter(state);
self->_abc_issubclasscheck_recursive = 0;
return (PyObject *) self;
}
@ -177,6 +181,30 @@ _get_impl(PyObject *module, PyObject *self)
return (_abc_data *)impl;
}
/* If class is inherited from ABC, set data to point to internal ABC state of class, and return 1.
If object is not inherited from ABC, return 0.
If error is encountered, return -1.
*/
static int
_get_optional_impl(_abcmodule_state *state, PyObject *self, _abc_data **data)
{
assert(data != NULL);
PyObject *impl = NULL;
int res = PyObject_GetOptionalAttr(self, &_Py_ID(_abc_impl), &impl);
if (res <= 0) {
*data = NULL;
return res;
}
if (!Py_IS_TYPE(impl, state->_abc_data_type)) {
PyErr_SetString(PyExc_TypeError, "_abc_impl is set to a wrong type");
Py_DECREF(impl);
*data = NULL;
return -1;
}
*data = (_abc_data *)impl;
return 1;
}
static int
_in_weak_set(_abc_data *impl, PyObject **pset, PyObject *obj)
{
@ -347,11 +375,12 @@ _abc__get_dump(PyObject *module, PyObject *self)
}
PyObject *res;
Py_BEGIN_CRITICAL_SECTION(impl);
res = Py_BuildValue("NNNK",
res = Py_BuildValue("NNNKK",
PySet_New(impl->_abc_registry),
PySet_New(impl->_abc_cache),
PySet_New(impl->_abc_negative_cache),
get_cache_version(impl));
get_cache_version(impl),
is_issubclasscheck_recursive(impl));
Py_END_CRITICAL_SECTION();
Py_DECREF(impl);
return res;
@ -584,10 +613,16 @@ _abc__abc_register_impl(PyObject *module, PyObject *self, PyObject *subclass)
if (impl == NULL) {
return NULL;
}
// Add registry entry
if (_add_to_weak_set(impl, &impl->_abc_registry, subclass) < 0) {
Py_DECREF(impl);
return NULL;
}
// Automatically include cache entry
if (_add_to_weak_set(impl, &impl->_abc_cache, subclass) < 0) {
Py_DECREF(impl);
return NULL;
}
Py_DECREF(impl);
/* Invalidate negative cache */
@ -818,23 +853,53 @@ _abc__abc_subclasscheck_impl(PyObject *module, PyObject *self,
if (scls == NULL) {
goto end;
}
int r = PyObject_IsSubclass(subclass, scls);
Py_DECREF(scls);
if (r > 0) {
if (_add_to_weak_set(impl, &impl->_abc_cache, subclass) < 0) {
goto end;
}
result = Py_True;
_abc_data *scls_impl;
int scls_is_abc = _get_optional_impl(state, scls, &scls_impl);
if (scls_is_abc < 0) {
Py_DECREF(scls);
goto end;
}
if (scls_is_abc > 0) {
/*
If inside recursive issubclass check, avoid adding classes
to any cache because this may drastically increase memory usage.
Unfortunately, issubclass/__subclasscheck__ don't accept third
argument with context, so using global context within ABCMeta.
This is done only on first method call, next will use cache.
*/
set_issubclasscheck_recursive(scls_impl);
}
// Perform recursive check
int r = PyObject_IsSubclass(subclass, scls);
Py_DECREF(scls);
if (scls_is_abc > 0) {
// reset recursion guard even if exception was raised in __subclasscheck__
unset_issubclasscheck_recursive(scls_impl);
}
Py_XDECREF(scls_impl);
if (r < 0) {
goto end;
}
if (r > 0) {
if (!is_issubclasscheck_recursive(impl)) {
if (_add_to_weak_set(impl, &impl->_abc_cache, subclass) < 0) {
goto end;
}
}
result = Py_True;
goto end;
}
}
/* No dice; update negative cache. */
if (_add_to_weak_set(impl, &impl->_abc_negative_cache, subclass) < 0) {
goto end;
if (!is_issubclasscheck_recursive(impl)) {
if (_add_to_weak_set(impl, &impl->_abc_negative_cache, subclass) < 0) {
goto end;
}
}
result = Py_False;
@ -849,17 +914,6 @@ static int
subclasscheck_check_registry(_abc_data *impl, PyObject *subclass,
PyObject **result)
{
// Fast path: check subclass is in weakref directly.
int ret = _in_weak_set(impl, &impl->_abc_registry, subclass);
if (ret < 0) {
*result = NULL;
return -1;
}
if (ret > 0) {
*result = Py_True;
return 1;
}
PyObject *registry_shared;
Py_BEGIN_CRITICAL_SECTION(impl);
registry_shared = impl->_abc_registry;
@ -874,6 +928,7 @@ subclasscheck_check_registry(_abc_data *impl, PyObject *subclass,
if (registry == NULL) {
return -1;
}
int ret = 0;
PyObject *key;
Py_ssize_t pos = 0;
Py_hash_t hash;