GH-115457: Support splitting and replication of micro ops. (GH-115558)

This commit is contained in:
Mark Shannon 2024-02-20 10:50:59 +00:00 committed by GitHub
parent 7b21403ccd
commit 626c414995
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 918 additions and 319 deletions

View file

@ -54,6 +54,8 @@
#define guard
#define override
#define specializing
#define split
#define replicate(TIMES)
// Dummy variables for stack effects.
static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
@ -208,7 +210,7 @@ dummy_func(
Py_INCREF(value);
}
pure inst(LOAD_FAST, (-- value)) {
replicate(8) pure inst(LOAD_FAST, (-- value)) {
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
@ -234,7 +236,7 @@ dummy_func(
Py_INCREF(value);
}
inst(STORE_FAST, (value --)) {
replicate(8) inst(STORE_FAST, (value --)) {
SETLOCAL(oparg, value);
}
@ -1914,7 +1916,7 @@ dummy_func(
DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv));
}
op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) {
split op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) {
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
attr = _PyDictOrValues_GetValues(dorv)->values[index];
DEOPT_IF(attr == NULL);
@ -1995,7 +1997,7 @@ dummy_func(
_LOAD_ATTR_WITH_HINT +
unused/5;
op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) {
split op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) {
char *addr = (char *)owner + index;
attr = *(PyObject **)addr;
DEOPT_IF(attr == NULL);
@ -2018,7 +2020,7 @@ dummy_func(
}
op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) {
split op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) {
STAT_INC(LOAD_ATTR, hit);
assert(descr != NULL);
attr = Py_NewRef(descr);
@ -2888,7 +2890,7 @@ dummy_func(
DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version);
}
op(_LOAD_ATTR_METHOD_WITH_VALUES, (descr/4, owner -- attr, self if (1))) {
split op(_LOAD_ATTR_METHOD_WITH_VALUES, (descr/4, owner -- attr, self if (1))) {
assert(oparg & 1);
/* Cached method object */
STAT_INC(LOAD_ATTR, hit);
@ -3130,7 +3132,7 @@ dummy_func(
DEOPT_IF(tstate->py_recursion_remaining <= 1);
}
pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
int argcount = oparg;
if (self_or_null != NULL) {
args--;

View file

@ -1029,7 +1029,7 @@ enter_tier_two:
#ifdef Py_DEBUG
{
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n",
opcode, next_uop[-1].oparg, next_uop[-1].operand,
next_uop[-1].opcode, next_uop[-1].oparg, next_uop[-1].operand,
(int)(next_uop - current_executor->trace - 1));
Py_FatalError("Unknown uop");
}

View file

@ -37,6 +37,102 @@
break;
}
case _LOAD_FAST_0: {
PyObject *value;
oparg = 0;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_1: {
PyObject *value;
oparg = 1;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_2: {
PyObject *value;
oparg = 2;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_3: {
PyObject *value;
oparg = 3;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_4: {
PyObject *value;
oparg = 4;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_5: {
PyObject *value;
oparg = 5;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_6: {
PyObject *value;
oparg = 6;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST_7: {
PyObject *value;
oparg = 7;
assert(oparg == CURRENT_OPARG());
value = GETLOCAL(oparg);
assert(value != NULL);
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
break;
}
case _LOAD_FAST: {
PyObject *value;
oparg = CURRENT_OPARG();
@ -69,6 +165,86 @@
break;
}
case _STORE_FAST_0: {
PyObject *value;
oparg = 0;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_1: {
PyObject *value;
oparg = 1;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_2: {
PyObject *value;
oparg = 2;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_3: {
PyObject *value;
oparg = 3;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_4: {
PyObject *value;
oparg = 4;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_5: {
PyObject *value;
oparg = 5;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_6: {
PyObject *value;
oparg = 6;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST_7: {
PyObject *value;
oparg = 7;
assert(oparg == CURRENT_OPARG());
value = stack_pointer[-1];
SETLOCAL(oparg, value);
stack_pointer += -1;
break;
}
case _STORE_FAST: {
PyObject *value;
oparg = CURRENT_OPARG();
@ -1534,7 +1710,7 @@
Py_DECREF(self);
if (attr == NULL) goto pop_3_error_tier_two;
stack_pointer[-3] = attr;
stack_pointer += -2 + ((0) ? 1 : 0);
stack_pointer += -2;
break;
}
@ -1637,11 +1813,11 @@
break;
}
case _LOAD_ATTR_INSTANCE_VALUE: {
case _LOAD_ATTR_INSTANCE_VALUE_0: {
PyObject *owner;
PyObject *attr;
PyObject *null = NULL;
oparg = CURRENT_OPARG();
(void)null;
owner = stack_pointer[-1];
uint16_t index = (uint16_t)CURRENT_OPERAND();
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
@ -1652,11 +1828,31 @@
null = NULL;
Py_DECREF(owner);
stack_pointer[-1] = attr;
if (oparg & 1) stack_pointer[0] = null;
stack_pointer += (oparg & 1);
break;
}
case _LOAD_ATTR_INSTANCE_VALUE_1: {
PyObject *owner;
PyObject *attr;
PyObject *null = NULL;
(void)null;
owner = stack_pointer[-1];
uint16_t index = (uint16_t)CURRENT_OPERAND();
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
attr = _PyDictOrValues_GetValues(dorv)->values[index];
if (attr == NULL) goto deoptimize;
STAT_INC(LOAD_ATTR, hit);
Py_INCREF(attr);
null = NULL;
Py_DECREF(owner);
stack_pointer[-1] = attr;
stack_pointer[0] = null;
stack_pointer += 1;
break;
}
/* _LOAD_ATTR_INSTANCE_VALUE is split on (oparg & 1) */
case _CHECK_ATTR_MODULE: {
PyObject *owner;
owner = stack_pointer[-1];
@ -1735,11 +1931,11 @@
break;
}
case _LOAD_ATTR_SLOT: {
case _LOAD_ATTR_SLOT_0: {
PyObject *owner;
PyObject *attr;
PyObject *null = NULL;
oparg = CURRENT_OPARG();
(void)null;
owner = stack_pointer[-1];
uint16_t index = (uint16_t)CURRENT_OPERAND();
char *addr = (char *)owner + index;
@ -1750,11 +1946,31 @@
null = NULL;
Py_DECREF(owner);
stack_pointer[-1] = attr;
if (oparg & 1) stack_pointer[0] = null;
stack_pointer += (oparg & 1);
break;
}
case _LOAD_ATTR_SLOT_1: {
PyObject *owner;
PyObject *attr;
PyObject *null = NULL;
(void)null;
owner = stack_pointer[-1];
uint16_t index = (uint16_t)CURRENT_OPERAND();
char *addr = (char *)owner + index;
attr = *(PyObject **)addr;
if (attr == NULL) goto deoptimize;
STAT_INC(LOAD_ATTR, hit);
Py_INCREF(attr);
null = NULL;
Py_DECREF(owner);
stack_pointer[-1] = attr;
stack_pointer[0] = null;
stack_pointer += 1;
break;
}
/* _LOAD_ATTR_SLOT is split on (oparg & 1) */
case _CHECK_ATTR_CLASS: {
PyObject *owner;
owner = stack_pointer[-1];
@ -1765,11 +1981,11 @@
break;
}
case _LOAD_ATTR_CLASS: {
case _LOAD_ATTR_CLASS_0: {
PyObject *owner;
PyObject *attr;
PyObject *null = NULL;
oparg = CURRENT_OPARG();
(void)null;
owner = stack_pointer[-1];
PyObject *descr = (PyObject *)CURRENT_OPERAND();
STAT_INC(LOAD_ATTR, hit);
@ -1778,11 +1994,29 @@
null = NULL;
Py_DECREF(owner);
stack_pointer[-1] = attr;
if (oparg & 1) stack_pointer[0] = null;
stack_pointer += (oparg & 1);
break;
}
case _LOAD_ATTR_CLASS_1: {
PyObject *owner;
PyObject *attr;
PyObject *null = NULL;
(void)null;
owner = stack_pointer[-1];
PyObject *descr = (PyObject *)CURRENT_OPERAND();
STAT_INC(LOAD_ATTR, hit);
assert(descr != NULL);
attr = Py_NewRef(descr);
null = NULL;
Py_DECREF(owner);
stack_pointer[-1] = attr;
stack_pointer[0] = null;
stack_pointer += 1;
break;
}
/* _LOAD_ATTR_CLASS is split on (oparg & 1) */
/* _LOAD_ATTR_PROPERTY is not a viable micro-op for tier 2 */
/* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */
@ -2464,8 +2698,8 @@
assert(_PyType_HasFeature(Py_TYPE(attr), Py_TPFLAGS_METHOD_DESCRIPTOR));
self = owner;
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
break;
}
@ -2484,8 +2718,8 @@
attr = Py_NewRef(descr);
self = owner;
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
break;
}
@ -2501,7 +2735,6 @@
Py_DECREF(owner);
attr = Py_NewRef(descr);
stack_pointer[-1] = attr;
stack_pointer += ((0) ? 1 : 0);
break;
}
@ -2518,7 +2751,6 @@
Py_DECREF(owner);
attr = Py_NewRef(descr);
stack_pointer[-1] = attr;
stack_pointer += ((0) ? 1 : 0);
break;
}
@ -2547,8 +2779,8 @@
attr = Py_NewRef(descr);
self = owner;
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
break;
}
@ -2615,6 +2847,136 @@
break;
}
case _INIT_CALL_PY_EXACT_ARGS_0: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
oparg = 0;
assert(oparg == CURRENT_OPARG());
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
stack_pointer[-2 - oparg] = (PyObject *)new_frame;
stack_pointer += -1 - oparg;
break;
}
case _INIT_CALL_PY_EXACT_ARGS_1: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
oparg = 1;
assert(oparg == CURRENT_OPARG());
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
stack_pointer[-2 - oparg] = (PyObject *)new_frame;
stack_pointer += -1 - oparg;
break;
}
case _INIT_CALL_PY_EXACT_ARGS_2: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
oparg = 2;
assert(oparg == CURRENT_OPARG());
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
stack_pointer[-2 - oparg] = (PyObject *)new_frame;
stack_pointer += -1 - oparg;
break;
}
case _INIT_CALL_PY_EXACT_ARGS_3: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
oparg = 3;
assert(oparg == CURRENT_OPARG());
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
stack_pointer[-2 - oparg] = (PyObject *)new_frame;
stack_pointer += -1 - oparg;
break;
}
case _INIT_CALL_PY_EXACT_ARGS_4: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
oparg = 4;
assert(oparg == CURRENT_OPARG());
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
stack_pointer[-2 - oparg] = (PyObject *)new_frame;
stack_pointer += -1 - oparg;
break;
}
case _INIT_CALL_PY_EXACT_ARGS: {
PyObject **args;
PyObject *self_or_null;
@ -2660,7 +3022,6 @@
goto exit_unwind;
}
#endif
stack_pointer += ((0) ? 1 : 0);
break;
}

View file

@ -1005,7 +1005,6 @@
}
#endif
}
stack_pointer += ((0) ? 1 : 0);
DISPATCH();
}
@ -1755,7 +1754,6 @@
}
#endif
}
stack_pointer += ((0) ? 1 : 0);
DISPATCH();
}
@ -3597,8 +3595,8 @@
self = owner;
}
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
DISPATCH();
}
@ -3632,8 +3630,8 @@
self = owner;
}
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
DISPATCH();
}
@ -3679,8 +3677,8 @@
self = owner;
}
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
DISPATCH();
}
@ -3751,7 +3749,6 @@
attr = Py_NewRef(descr);
}
stack_pointer[-1] = attr;
stack_pointer += ((0) ? 1 : 0);
DISPATCH();
}
@ -3794,7 +3791,6 @@
attr = Py_NewRef(descr);
}
stack_pointer[-1] = attr;
stack_pointer += ((0) ? 1 : 0);
DISPATCH();
}
@ -4380,7 +4376,7 @@
Py_DECREF(self);
if (attr == NULL) goto pop_3_error;
stack_pointer[-3] = attr;
stack_pointer += -2 + ((0) ? 1 : 0);
stack_pointer += -2;
DISPATCH();
}

View file

@ -963,6 +963,21 @@ uop_optimize(
}
}
assert(err == 1);
/* Fix up */
for (int pc = 0; pc < UOP_MAX_TRACE_LENGTH; pc++) {
int opcode = buffer[pc].opcode;
int oparg = buffer[pc].oparg;
if (_PyUop_Flags[opcode] & HAS_OPARG_AND_1_FLAG) {
buffer[pc].opcode = opcode + 1 + (oparg & 1);
}
else if (oparg < _PyUop_Replication[opcode]) {
buffer[pc].opcode = opcode + oparg + 1;
}
else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
break;
}
assert(_PyOpcode_uop_name[buffer[pc].opcode]);
}
_PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies);
if (executor == NULL) {
return -1;

View file

@ -834,7 +834,7 @@
attr = sym_new_unknown(ctx);
if (attr == NULL) goto out_of_space;
stack_pointer[-3] = attr;
stack_pointer += -2 + ((0) ? 1 : 0);
stack_pointer += -2;
break;
}
@ -1264,8 +1264,8 @@
self = sym_new_unknown(ctx);
if (self == NULL) goto out_of_space;
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
break;
}
@ -1277,8 +1277,8 @@
self = sym_new_unknown(ctx);
if (self == NULL) goto out_of_space;
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
break;
}
@ -1287,7 +1287,6 @@
attr = sym_new_unknown(ctx);
if (attr == NULL) goto out_of_space;
stack_pointer[-1] = attr;
stack_pointer += ((0) ? 1 : 0);
break;
}
@ -1296,7 +1295,6 @@
attr = sym_new_unknown(ctx);
if (attr == NULL) goto out_of_space;
stack_pointer[-1] = attr;
stack_pointer += ((0) ? 1 : 0);
break;
}
@ -1312,8 +1310,8 @@
self = sym_new_unknown(ctx);
if (self == NULL) goto out_of_space;
stack_pointer[-1] = attr;
if (1) stack_pointer[0] = self;
stack_pointer += ((1) ? 1 : 0);
stack_pointer[0] = self;
stack_pointer += 1;
break;
}
@ -1409,7 +1407,6 @@
ctx->frame = new_frame;
ctx->curr_frame_depth++;
stack_pointer = new_frame->stack_pointer;
stack_pointer += ((0) ? 1 : 0);
break;
}