mirror of
https://github.com/python/cpython.git
synced 2025-07-19 09:15:34 +00:00
gh-119258: Eliminate Type Guards in Tier 2 Optimizer with Watcher (GH-119365)
Co-authored-by: parmeggiani <parmeggiani@spaziodati.eu> Co-authored-by: dpdani <git@danieleparmeggiani.me> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Brandt Bucher <brandtbucher@microsoft.com> Co-authored-by: Ken Jin <kenjin@python.org>
This commit is contained in:
parent
2080425154
commit
55402d3232
13 changed files with 366 additions and 59 deletions
|
@ -79,6 +79,7 @@ increment_mutations(PyObject* dict) {
|
|||
* so we don't need to check that they haven't been used */
|
||||
#define BUILTINS_WATCHER_ID 0
|
||||
#define GLOBALS_WATCHER_ID 1
|
||||
#define TYPE_WATCHER_ID 0
|
||||
|
||||
static int
|
||||
globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
|
||||
|
@ -92,6 +93,14 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
type_watcher_callback(PyTypeObject* type)
|
||||
{
|
||||
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), type, 1);
|
||||
PyType_Unwatch(TYPE_WATCHER_ID, (PyObject *)type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
|
||||
{
|
||||
|
@ -167,6 +176,9 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
|
||||
interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback;
|
||||
}
|
||||
if (interp->type_watchers[TYPE_WATCHER_ID] == NULL) {
|
||||
interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback;
|
||||
}
|
||||
for (int pc = 0; pc < buffer_size; pc++) {
|
||||
_PyUOpInstruction *inst = &buffer[pc];
|
||||
int opcode = inst->opcode;
|
||||
|
@ -310,9 +322,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
#define sym_has_type _Py_uop_sym_has_type
|
||||
#define sym_get_type _Py_uop_sym_get_type
|
||||
#define sym_matches_type _Py_uop_sym_matches_type
|
||||
#define sym_matches_type_version _Py_uop_sym_matches_type_version
|
||||
#define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
|
||||
#define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
|
||||
#define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
|
||||
#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
|
||||
#define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
|
||||
#define sym_is_bottom _Py_uop_sym_is_bottom
|
||||
#define sym_truthiness _Py_uop_sym_truthiness
|
||||
|
@ -395,7 +409,7 @@ optimize_uops(
|
|||
_PyUOpInstruction *corresponding_check_stack = NULL;
|
||||
|
||||
_Py_uop_abstractcontext_init(ctx);
|
||||
_Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen);
|
||||
_Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0);
|
||||
if (frame == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
|
|||
#define sym_new_const _Py_uop_sym_new_const
|
||||
#define sym_new_null _Py_uop_sym_new_null
|
||||
#define sym_matches_type _Py_uop_sym_matches_type
|
||||
#define sym_matches_type_version _Py_uop_sym_matches_type_version
|
||||
#define sym_get_type _Py_uop_sym_get_type
|
||||
#define sym_has_type _Py_uop_sym_has_type
|
||||
#define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
|
||||
#define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
|
||||
#define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
|
||||
#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
|
||||
#define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
|
||||
#define sym_is_bottom _Py_uop_sym_is_bottom
|
||||
#define frame_new _Py_uop_frame_new
|
||||
|
@ -113,6 +115,29 @@ dummy_func(void) {
|
|||
sym_set_type(right, &PyLong_Type);
|
||||
}
|
||||
|
||||
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
|
||||
assert(type_version);
|
||||
if (sym_matches_type_version(owner, type_version)) {
|
||||
REPLACE_OP(this_instr, _NOP, 0, 0);
|
||||
} else {
|
||||
// add watcher so that whenever the type changes we invalidate this
|
||||
PyTypeObject *type = _PyType_LookupByVersion(type_version);
|
||||
// if the type is null, it was not found in the cache (there was a conflict)
|
||||
// with the key, in which case we can't trust the version
|
||||
if (type) {
|
||||
// if the type version was set properly, then add a watcher
|
||||
// if it wasn't this means that the type version was previously set to something else
|
||||
// and we set the owner to bottom, so we don't need to add a watcher because we must have
|
||||
// already added one earlier.
|
||||
if (sym_set_type_version(owner, type_version)) {
|
||||
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
|
||||
_Py_BloomFilter_Add(dependencies, type);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
|
||||
if (sym_matches_type(left, &PyFloat_Type)) {
|
||||
if (sym_matches_type(right, &PyFloat_Type)) {
|
||||
|
@ -563,16 +588,12 @@ dummy_func(void) {
|
|||
argcount++;
|
||||
}
|
||||
|
||||
_Py_UopsSymbol **localsplus_start = ctx->n_consumed;
|
||||
int n_locals_already_filled = 0;
|
||||
// Can determine statically, so we interleave the new locals
|
||||
// and make the current stack the new locals.
|
||||
// This also sets up for true call inlining.
|
||||
if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
|
||||
localsplus_start = args;
|
||||
n_locals_already_filled = argcount;
|
||||
new_frame = frame_new(ctx, co, 0, args, argcount);
|
||||
} else {
|
||||
new_frame = frame_new(ctx, co, 0, NULL, 0);
|
||||
|
||||
}
|
||||
new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
|
||||
}
|
||||
|
||||
op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) {
|
||||
|
|
33
Python/optimizer_cases.c.h
generated
33
Python/optimizer_cases.c.h
generated
|
@ -930,6 +930,28 @@
|
|||
}
|
||||
|
||||
case _GUARD_TYPE_VERSION: {
|
||||
_Py_UopsSymbol *owner;
|
||||
owner = stack_pointer[-1];
|
||||
uint32_t type_version = (uint32_t)this_instr->operand;
|
||||
assert(type_version);
|
||||
if (sym_matches_type_version(owner, type_version)) {
|
||||
REPLACE_OP(this_instr, _NOP, 0, 0);
|
||||
} else {
|
||||
// add watcher so that whenever the type changes we invalidate this
|
||||
PyTypeObject *type = _PyType_LookupByVersion(type_version);
|
||||
// if the type is null, it was not found in the cache (there was a conflict)
|
||||
// with the key, in which case we can't trust the version
|
||||
if (type) {
|
||||
// if the type version was set properly, then add a watcher
|
||||
// if it wasn't this means that the type version was previously set to something else
|
||||
// and we set the owner to bottom, so we don't need to add a watcher because we must have
|
||||
// already added one earlier.
|
||||
if (sym_set_type_version(owner, type_version)) {
|
||||
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
|
||||
_Py_BloomFilter_Add(dependencies, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1583,16 +1605,11 @@
|
|||
args--;
|
||||
argcount++;
|
||||
}
|
||||
_Py_UopsSymbol **localsplus_start = ctx->n_consumed;
|
||||
int n_locals_already_filled = 0;
|
||||
// Can determine statically, so we interleave the new locals
|
||||
// and make the current stack the new locals.
|
||||
// This also sets up for true call inlining.
|
||||
if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
|
||||
localsplus_start = args;
|
||||
n_locals_already_filled = argcount;
|
||||
new_frame = frame_new(ctx, co, 0, args, argcount);
|
||||
} else {
|
||||
new_frame = frame_new(ctx, co, 0, NULL, 0);
|
||||
}
|
||||
new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
|
||||
stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame;
|
||||
stack_pointer += -1 - oparg;
|
||||
break;
|
||||
|
|
|
@ -52,7 +52,8 @@ static inline int get_lltrace(void) {
|
|||
static _Py_UopsSymbol NO_SPACE_SYMBOL = {
|
||||
.flags = IS_NULL | NOT_NULL | NO_SPACE,
|
||||
.typ = NULL,
|
||||
.const_val = NULL
|
||||
.const_val = NULL,
|
||||
.type_version = 0,
|
||||
};
|
||||
|
||||
_Py_UopsSymbol *
|
||||
|
@ -76,6 +77,7 @@ sym_new(_Py_UOpsContext *ctx)
|
|||
self->flags = 0;
|
||||
self->typ = NULL;
|
||||
self->const_val = NULL;
|
||||
self->type_version = 0;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
@ -152,6 +154,18 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version)
|
||||
{
|
||||
// if the type version was already set, then it must be different and we should set it to bottom
|
||||
if (sym->type_version) {
|
||||
sym_set_bottom(ctx, sym);
|
||||
return false;
|
||||
}
|
||||
sym->type_version = version;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
_Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val)
|
||||
{
|
||||
|
@ -256,6 +270,12 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym)
|
|||
return sym->typ;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym)
|
||||
{
|
||||
return sym->type_version;
|
||||
}
|
||||
|
||||
bool
|
||||
_Py_uop_sym_has_type(_Py_UopsSymbol *sym)
|
||||
{
|
||||
|
@ -272,6 +292,13 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ)
|
|||
return _Py_uop_sym_get_type(sym) == typ;
|
||||
}
|
||||
|
||||
bool
|
||||
_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version)
|
||||
{
|
||||
return _Py_uop_sym_get_type_version(sym) == version;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
_Py_uop_sym_truthiness(_Py_UopsSymbol *sym)
|
||||
{
|
||||
|
@ -311,9 +338,9 @@ _Py_UOpsAbstractFrame *
|
|||
_Py_uop_frame_new(
|
||||
_Py_UOpsContext *ctx,
|
||||
PyCodeObject *co,
|
||||
_Py_UopsSymbol **localsplus_start,
|
||||
int n_locals_already_filled,
|
||||
int curr_stackentries)
|
||||
int curr_stackentries,
|
||||
_Py_UopsSymbol **args,
|
||||
int arg_len)
|
||||
{
|
||||
assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
|
||||
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
|
||||
|
@ -321,19 +348,22 @@ _Py_uop_frame_new(
|
|||
frame->stack_len = co->co_stacksize;
|
||||
frame->locals_len = co->co_nlocalsplus;
|
||||
|
||||
frame->locals = localsplus_start;
|
||||
frame->locals = ctx->n_consumed;
|
||||
frame->stack = frame->locals + co->co_nlocalsplus;
|
||||
frame->stack_pointer = frame->stack + curr_stackentries;
|
||||
ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize);
|
||||
ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + co->co_stacksize);
|
||||
if (ctx->n_consumed >= ctx->limit) {
|
||||
ctx->done = true;
|
||||
ctx->out_of_space = true;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// Initialize with the initial state of all local variables
|
||||
for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) {
|
||||
for (int i = 0; i < arg_len; i++) {
|
||||
frame->locals[i] = args[i];
|
||||
}
|
||||
|
||||
for (int i = arg_len; i < co->co_nlocalsplus; i++) {
|
||||
_Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx);
|
||||
frame->locals[i] = local;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue