gh-115999: Add partial free-thread specialization for BINARY_SUBSCR (gh-127227)

This commit is contained in:
Donghee Na 2024-12-02 10:38:17 +09:00 committed by GitHub
parent 7ea523f47c
commit e2713409cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 128 additions and 71 deletions

View file

@ -10,6 +10,9 @@ extern "C" {
PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
extern void _PyList_DebugMallocStats(FILE *out);
// _PyList_GetItemRef should be used only when the object is known as a list
// because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does.
extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i);
#define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)

View file

@ -1952,7 +1952,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = {
[BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG },

View file

@ -84,7 +84,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG,
[_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG,
[_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG,
[_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,

View file

@ -1260,27 +1260,6 @@ class DisTests(DisTestBase):
got = self.get_disassembly(load_test, adaptive=True)
self.do_disassembly_compare(got, dis_load_test_quickened_code)
@cpython_only
@requires_specialization
def test_binary_subscr_specialize(self):
binary_subscr_quicken = """\
0 RESUME_CHECK 0
1 LOAD_NAME 0 (a)
LOAD_SMALL_INT 0
%s
RETURN_VALUE
"""
co_list = compile('a[0]', "<list>", "eval")
self.code_quicken(lambda: exec(co_list, {}, {'a': [0]}))
got = self.get_disassembly(co_list, adaptive=True)
self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_LIST_INT")
co_dict = compile('a[0]', "<dict>", "eval")
self.code_quicken(lambda: exec(co_dict, {}, {'a': {0: '1'}}))
got = self.get_disassembly(co_dict, adaptive=True)
self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_DICT")
@cpython_only
@requires_specialization
def test_load_attr_specialize(self):

View file

@ -617,7 +617,7 @@ class TestRacesDoNotCrash(TestBase):
opname = "BINARY_SUBSCR_GETITEM"
self.assert_races_do_not_crash(opname, get_items, read, write)
@requires_specialization
@requires_specialization_ft
def test_binary_subscr_list_int(self):
def get_items():
items = []
@ -1023,7 +1023,7 @@ class TestRacesDoNotCrash(TestBase):
opname = "STORE_ATTR_WITH_HINT"
self.assert_races_do_not_crash(opname, get_items, read, write)
@requires_specialization
@requires_specialization_ft
def test_store_subscr_list_int(self):
def get_items():
items = []
@ -1229,48 +1229,48 @@ class TestSpecializer(TestBase):
@cpython_only
@requires_specialization_ft
def test_binary_op(self):
def f():
def binary_op_add_int():
for _ in range(100):
a, b = 1, 2
c = a + b
self.assertEqual(c, 3)
f()
self.assert_specialized(f, "BINARY_OP_ADD_INT")
self.assert_no_opcode(f, "BINARY_OP")
binary_op_add_int()
self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT")
self.assert_no_opcode(binary_op_add_int, "BINARY_OP")
def g():
def binary_op_add_unicode():
for _ in range(100):
a, b = "foo", "bar"
c = a + b
self.assertEqual(c, "foobar")
g()
self.assert_specialized(g, "BINARY_OP_ADD_UNICODE")
self.assert_no_opcode(g, "BINARY_OP")
binary_op_add_unicode()
self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE")
self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP")
@cpython_only
@requires_specialization_ft
def test_contain_op(self):
def f():
def contains_op_dict():
for _ in range(100):
a, b = 1, {1: 2, 2: 5}
self.assertTrue(a in b)
self.assertFalse(3 in b)
f()
self.assert_specialized(f, "CONTAINS_OP_DICT")
self.assert_no_opcode(f, "CONTAINS_OP")
contains_op_dict()
self.assert_specialized(contains_op_dict, "CONTAINS_OP_DICT")
self.assert_no_opcode(contains_op_dict, "CONTAINS_OP")
def g():
def contains_op_set():
for _ in range(100):
a, b = 1, {1, 2}
self.assertTrue(a in b)
self.assertFalse(3 in b)
g()
self.assert_specialized(g, "CONTAINS_OP_SET")
self.assert_no_opcode(g, "CONTAINS_OP")
contains_op_set()
self.assert_specialized(contains_op_set, "CONTAINS_OP_SET")
self.assert_no_opcode(contains_op_set, "CONTAINS_OP")
@cpython_only
@requires_specialization_ft
@ -1342,34 +1342,81 @@ class TestSpecializer(TestBase):
@cpython_only
@requires_specialization_ft
def test_unpack_sequence(self):
def f():
def unpack_sequence_two_tuple():
for _ in range(100):
a, b = 1, 2
self.assertEqual(a, 1)
self.assertEqual(b, 2)
f()
self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE")
self.assert_no_opcode(f, "UNPACK_SEQUENCE")
unpack_sequence_two_tuple()
self.assert_specialized(unpack_sequence_two_tuple,
"UNPACK_SEQUENCE_TWO_TUPLE")
self.assert_no_opcode(unpack_sequence_two_tuple, "UNPACK_SEQUENCE")
def g():
def unpack_sequence_tuple():
for _ in range(100):
a, = 1,
self.assertEqual(a, 1)
g()
self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE")
self.assert_no_opcode(g, "UNPACK_SEQUENCE")
unpack_sequence_tuple()
self.assert_specialized(unpack_sequence_tuple, "UNPACK_SEQUENCE_TUPLE")
self.assert_no_opcode(unpack_sequence_tuple, "UNPACK_SEQUENCE")
def x():
def unpack_sequence_list():
for _ in range(100):
a, b = [1, 2]
self.assertEqual(a, 1)
self.assertEqual(b, 2)
x()
self.assert_specialized(x, "UNPACK_SEQUENCE_LIST")
self.assert_no_opcode(x, "UNPACK_SEQUENCE")
unpack_sequence_list()
self.assert_specialized(unpack_sequence_list, "UNPACK_SEQUENCE_LIST")
self.assert_no_opcode(unpack_sequence_list, "UNPACK_SEQUENCE")
@cpython_only
@requires_specialization_ft
def test_binary_subscr(self):
def binary_subscr_list_int():
for _ in range(100):
a = [1, 2, 3]
for idx, expected in enumerate(a):
self.assertEqual(a[idx], expected)
binary_subscr_list_int()
self.assert_specialized(binary_subscr_list_int,
"BINARY_SUBSCR_LIST_INT")
self.assert_no_opcode(binary_subscr_list_int, "BINARY_SUBSCR")
def binary_subscr_tuple_int():
for _ in range(100):
a = (1, 2, 3)
for idx, expected in enumerate(a):
self.assertEqual(a[idx], expected)
binary_subscr_tuple_int()
self.assert_specialized(binary_subscr_tuple_int,
"BINARY_SUBSCR_TUPLE_INT")
self.assert_no_opcode(binary_subscr_tuple_int, "BINARY_SUBSCR")
def binary_subscr_dict():
for _ in range(100):
a = {1: 2, 2: 3}
self.assertEqual(a[1], 2)
self.assertEqual(a[2], 3)
binary_subscr_dict()
self.assert_specialized(binary_subscr_dict, "BINARY_SUBSCR_DICT")
self.assert_no_opcode(binary_subscr_dict, "BINARY_SUBSCR")
def binary_subscr_str_int():
for _ in range(100):
a = "foobar"
for idx, expected in enumerate(a):
self.assertEqual(a[idx], expected)
binary_subscr_str_int()
self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT")
self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR")
if __name__ == "__main__":
unittest.main()

View file

@ -391,6 +391,12 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i)
return item;
}
PyObject *
_PyList_GetItemRef(PyListObject *list, Py_ssize_t i)
{
return list_get_item_ref(list, i);
}
int
PyList_SetItem(PyObject *op, Py_ssize_t i,
PyObject *newitem)

View file

@ -704,7 +704,7 @@ dummy_func(
};
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
assert(frame->stackpointer == NULL);
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
@ -713,7 +713,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(BINARY_SUBSCR);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
op(_BINARY_SUBSCR, (container, sub -- res)) {
@ -790,11 +790,17 @@ dummy_func(
// Deopt unless 0 <= sub < PyList_Size(list)
DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
#ifdef Py_GIL_DISABLED
PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
DEOPT_IF(res_o == NULL);
STAT_INC(BINARY_SUBSCR, hit);
#else
DEOPT_IF(index >= PyList_GET_SIZE(list));
STAT_INC(BINARY_SUBSCR, hit);
PyObject *res_o = PyList_GET_ITEM(list, index);
assert(res_o != NULL);
Py_INCREF(res_o);
#endif
PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
DEAD(sub_st);
PyStackRef_CLOSE(list_st);

View file

@ -981,6 +981,16 @@
JUMP_TO_JUMP_TARGET();
}
Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
#ifdef Py_GIL_DISABLED
_PyFrame_SetStackPointer(frame, stack_pointer);
PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (res_o == NULL) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
STAT_INC(BINARY_SUBSCR, hit);
#else
if (index >= PyList_GET_SIZE(list)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
@ -989,6 +999,7 @@
PyObject *res_o = PyList_GET_ITEM(list, index);
assert(res_o != NULL);
Py_INCREF(res_o);
#endif
PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
PyStackRef_CLOSE(list_st);
res = PyStackRef_FromPyObjectSteal(res_o);

View file

@ -433,7 +433,7 @@
container = stack_pointer[-2];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
assert(frame->stackpointer == NULL);
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
@ -444,7 +444,7 @@
}
OPCODE_DEFERRED_INC(BINARY_SUBSCR);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
// _BINARY_SUBSCR
{
@ -577,11 +577,19 @@
// Deopt unless 0 <= sub < PyList_Size(list)
DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR);
Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
#ifdef Py_GIL_DISABLED
_PyFrame_SetStackPointer(frame, stack_pointer);
PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
stack_pointer = _PyFrame_GetStackPointer(frame);
DEOPT_IF(res_o == NULL, BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit);
#else
DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit);
PyObject *res_o = PyList_GET_ITEM(list, index);
assert(res_o != NULL);
Py_INCREF(res_o);
#endif
PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
PyStackRef_CLOSE(list_st);
res = PyStackRef_FromPyObjectSteal(res_o);

View file

@ -1717,15 +1717,15 @@ _Py_Specialize_BinarySubscr(
PyObject *container = PyStackRef_AsPyObjectBorrow(container_st);
PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st);
assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[BINARY_SUBSCR] ==
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
PyTypeObject *container_type = Py_TYPE(container);
uint8_t specialized_op;
if (container_type == &PyList_Type) {
if (PyLong_CheckExact(sub)) {
if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
instr->op.code = BINARY_SUBSCR_LIST_INT;
specialized_op = BINARY_SUBSCR_LIST_INT;
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
@ -1738,7 +1738,7 @@ _Py_Specialize_BinarySubscr(
if (container_type == &PyTuple_Type) {
if (PyLong_CheckExact(sub)) {
if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
instr->op.code = BINARY_SUBSCR_TUPLE_INT;
specialized_op = BINARY_SUBSCR_TUPLE_INT;
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
@ -1751,7 +1751,7 @@ _Py_Specialize_BinarySubscr(
if (container_type == &PyUnicode_Type) {
if (PyLong_CheckExact(sub)) {
if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
instr->op.code = BINARY_SUBSCR_STR_INT;
specialized_op = BINARY_SUBSCR_STR_INT;
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
@ -1762,9 +1762,10 @@ _Py_Specialize_BinarySubscr(
goto fail;
}
if (container_type == &PyDict_Type) {
instr->op.code = BINARY_SUBSCR_DICT;
specialized_op = BINARY_SUBSCR_DICT;
goto success;
}
#ifndef Py_GIL_DISABLED
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
@ -1797,21 +1798,17 @@ _Py_Specialize_BinarySubscr(
// struct _specialization_cache):
ht->_spec_cache.getitem = descriptor;
ht->_spec_cache.getitem_version = version;
instr->op.code = BINARY_SUBSCR_GETITEM;
specialized_op = BINARY_SUBSCR_GETITEM;
goto success;
}
#endif // Py_GIL_DISABLED
SPECIALIZATION_FAIL(BINARY_SUBSCR,
binary_subscr_fail_kind(container_type, sub));
fail:
STAT_INC(BINARY_SUBSCR, failure);
assert(!PyErr_Occurred());
instr->op.code = BINARY_SUBSCR;
cache->counter = adaptive_counter_backoff(cache->counter);
unspecialize(instr);
return;
success:
STAT_INC(BINARY_SUBSCR, success);
assert(!PyErr_Occurred());
cache->counter = adaptive_counter_cooldown();
specialize(instr, specialized_op);
}