mirror of
https://github.com/python/cpython.git
synced 2025-08-22 17:55:18 +00:00
gh-114058: Foundations of the Tier2 redundancy eliminator (GH-115085)
--------- Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com> Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: Guido van Rossum <gvanrossum@users.noreply.github.com>
This commit is contained in:
parent
ccc76c3e88
commit
7cce857622
25 changed files with 3137 additions and 140 deletions
|
@ -1,3 +1,14 @@
|
|||
/*
|
||||
* This file contains the support code for CPython's uops redundancy eliminator.
|
||||
* It also performs some simple optimizations.
|
||||
* It performs a traditional data-flow analysis[1] over the trace of uops.
|
||||
* Using the information gained, it chooses to emit, or skip certain instructions
|
||||
* if possible.
|
||||
*
|
||||
* [1] For information on data-flow analysis, please see
|
||||
* https://clang.llvm.org/docs/DataFlowAnalysisIntro.html
|
||||
*
|
||||
* */
|
||||
#include "Python.h"
|
||||
#include "opcode.h"
|
||||
#include "pycore_dict.h"
|
||||
|
@ -9,10 +20,355 @@
|
|||
#include "pycore_dict.h"
|
||||
#include "pycore_long.h"
|
||||
#include "cpython/optimizer.h"
|
||||
#include "pycore_optimizer.h"
|
||||
#include "pycore_object.h"
|
||||
#include "pycore_dict.h"
|
||||
#include "pycore_function.h"
|
||||
#include "pycore_uop_metadata.h"
|
||||
#include "pycore_uop_ids.h"
|
||||
#include "pycore_range.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "pycore_optimizer.h"
|
||||
|
||||
// Holds locals, stack, locals, stack ... co_consts (in that order)
|
||||
#define MAX_ABSTRACT_INTERP_SIZE 4096
|
||||
|
||||
#define OVERALLOCATE_FACTOR 5
|
||||
|
||||
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * OVERALLOCATE_FACTOR)
|
||||
|
||||
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
|
||||
#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
|
||||
static inline int get_lltrace(void) {
|
||||
char *uop_debug = Py_GETENV(DEBUG_ENV);
|
||||
int lltrace = 0;
|
||||
if (uop_debug != NULL && *uop_debug >= '0') {
|
||||
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
return lltrace;
|
||||
}
|
||||
#define DPRINTF(level, ...) \
|
||||
if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
|
||||
#else
|
||||
#define DPRINTF(level, ...)
|
||||
#endif
|
||||
|
||||
|
||||
// Flags for below.
|
||||
#define KNOWN 1 << 0
|
||||
#define TRUE_CONST 1 << 1
|
||||
#define IS_NULL 1 << 2
|
||||
#define NOT_NULL 1 << 3
|
||||
|
||||
typedef struct {
|
||||
int flags;
|
||||
PyTypeObject *typ;
|
||||
// constant propagated value (might be NULL)
|
||||
PyObject *const_val;
|
||||
} _Py_UOpsSymType;
|
||||
|
||||
|
||||
typedef struct _Py_UOpsAbstractFrame {
|
||||
// Max stacklen
|
||||
int stack_len;
|
||||
int locals_len;
|
||||
|
||||
_Py_UOpsSymType **stack_pointer;
|
||||
_Py_UOpsSymType **stack;
|
||||
_Py_UOpsSymType **locals;
|
||||
} _Py_UOpsAbstractFrame;
|
||||
|
||||
|
||||
typedef struct ty_arena {
|
||||
int ty_curr_number;
|
||||
int ty_max_number;
|
||||
_Py_UOpsSymType arena[TY_ARENA_SIZE];
|
||||
} ty_arena;
|
||||
|
||||
// Tier 2 types meta interpreter
|
||||
typedef struct _Py_UOpsAbstractInterpContext {
|
||||
PyObject_HEAD
|
||||
// The current "executing" frame.
|
||||
_Py_UOpsAbstractFrame *frame;
|
||||
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
|
||||
int curr_frame_depth;
|
||||
|
||||
// Arena for the symbolic types.
|
||||
ty_arena t_arena;
|
||||
|
||||
_Py_UOpsSymType **n_consumed;
|
||||
_Py_UOpsSymType **limit;
|
||||
_Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
|
||||
} _Py_UOpsAbstractInterpContext;
|
||||
|
||||
static inline _Py_UOpsSymType* sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx);
|
||||
|
||||
// 0 on success, -1 on error.
|
||||
static _Py_UOpsAbstractFrame *
|
||||
ctx_frame_new(
|
||||
_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyCodeObject *co,
|
||||
_Py_UOpsSymType **localsplus_start,
|
||||
int n_locals_already_filled,
|
||||
int curr_stackentries
|
||||
)
|
||||
{
|
||||
assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
|
||||
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
|
||||
|
||||
frame->stack_len = co->co_stacksize;
|
||||
frame->locals_len = co->co_nlocalsplus;
|
||||
|
||||
frame->locals = localsplus_start;
|
||||
frame->stack = frame->locals + co->co_nlocalsplus;
|
||||
frame->stack_pointer = frame->stack + curr_stackentries;
|
||||
ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize);
|
||||
if (ctx->n_consumed >= ctx->limit) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// Initialize with the initial state of all local variables
|
||||
for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) {
|
||||
_Py_UOpsSymType *local = sym_new_unknown(ctx);
|
||||
if (local == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
frame->locals[i] = local;
|
||||
}
|
||||
|
||||
|
||||
// Initialize the stack as well
|
||||
for (int i = 0; i < curr_stackentries; i++) {
|
||||
_Py_UOpsSymType *stackvar = sym_new_unknown(ctx);
|
||||
if (stackvar == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
frame->stack[i] = stackvar;
|
||||
}
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
static void
|
||||
abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
if (ctx == NULL) {
|
||||
return;
|
||||
}
|
||||
ctx->curr_frame_depth = 0;
|
||||
int tys = ctx->t_arena.ty_curr_number;
|
||||
for (int i = 0; i < tys; i++) {
|
||||
Py_CLEAR(ctx->t_arena.arena[i].const_val);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
abstractcontext_init(
|
||||
_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyCodeObject *co,
|
||||
int curr_stacklen,
|
||||
int ir_entries
|
||||
)
|
||||
{
|
||||
ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE;
|
||||
ctx->n_consumed = ctx->locals_and_stack;
|
||||
#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter.
|
||||
for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) {
|
||||
ctx->locals_and_stack[i] = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Setup the arena for sym expressions.
|
||||
ctx->t_arena.ty_curr_number = 0;
|
||||
ctx->t_arena.ty_max_number = TY_ARENA_SIZE;
|
||||
|
||||
// Frame setup
|
||||
ctx->curr_frame_depth = 0;
|
||||
_Py_UOpsAbstractFrame *frame = ctx_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen);
|
||||
if (frame == NULL) {
|
||||
return -1;
|
||||
}
|
||||
ctx->curr_frame_depth++;
|
||||
ctx->frame = frame;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ctx_frame_pop(
|
||||
_Py_UOpsAbstractInterpContext *ctx
|
||||
)
|
||||
{
|
||||
_Py_UOpsAbstractFrame *frame = ctx->frame;
|
||||
|
||||
ctx->n_consumed = frame->locals;
|
||||
ctx->curr_frame_depth--;
|
||||
assert(ctx->curr_frame_depth >= 1);
|
||||
ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Takes a borrowed reference to const_val, turns that into a strong reference.
|
||||
static _Py_UOpsSymType*
|
||||
sym_new(_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyObject *const_val)
|
||||
{
|
||||
_Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number];
|
||||
if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) {
|
||||
OPT_STAT_INC(optimizer_failure_reason_no_memory);
|
||||
DPRINTF(1, "out of space for symbolic expression type\n");
|
||||
return NULL;
|
||||
}
|
||||
ctx->t_arena.ty_curr_number++;
|
||||
self->const_val = NULL;
|
||||
self->typ = NULL;
|
||||
self->flags = 0;
|
||||
|
||||
if (const_val != NULL) {
|
||||
self->const_val = Py_NewRef(const_val);
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_set_flag(_Py_UOpsSymType *sym, int flag)
|
||||
{
|
||||
sym->flags |= flag;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_clear_flag(_Py_UOpsSymType *sym, int flag)
|
||||
{
|
||||
sym->flags &= (~flag);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_has_flag(_Py_UOpsSymType *sym, int flag)
|
||||
{
|
||||
return (sym->flags & flag) != 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_is_known(_Py_UOpsSymType *sym)
|
||||
{
|
||||
return sym_has_flag(sym, KNOWN);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_is_not_null(_Py_UOpsSymType *sym)
|
||||
{
|
||||
return (sym->flags & (IS_NULL | NOT_NULL)) == NOT_NULL;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_is_null(_Py_UOpsSymType *sym)
|
||||
{
|
||||
return (sym->flags & (IS_NULL | NOT_NULL)) == IS_NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_set_type(_Py_UOpsSymType *sym, PyTypeObject *tp)
|
||||
{
|
||||
assert(PyType_Check(tp));
|
||||
sym->typ = tp;
|
||||
sym_set_flag(sym, KNOWN);
|
||||
sym_set_flag(sym, NOT_NULL);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_set_null(_Py_UOpsSymType *sym)
|
||||
{
|
||||
sym_set_flag(sym, IS_NULL);
|
||||
sym_set_flag(sym, KNOWN);
|
||||
}
|
||||
|
||||
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
return sym_new(ctx,NULL);
|
||||
}
|
||||
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_known_notnull(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
_Py_UOpsSymType *res = sym_new_unknown(ctx);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_flag(res, NOT_NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_known_type(_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyTypeObject *typ)
|
||||
{
|
||||
_Py_UOpsSymType *res = sym_new(ctx,NULL);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_type(res, typ);
|
||||
return res;
|
||||
}
|
||||
|
||||
// Takes a borrowed reference to const_val.
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val)
|
||||
{
|
||||
assert(const_val != NULL);
|
||||
_Py_UOpsSymType *temp = sym_new(
|
||||
ctx,
|
||||
const_val
|
||||
);
|
||||
if (temp == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_type(temp, Py_TYPE(const_val));
|
||||
sym_set_flag(temp, TRUE_CONST);
|
||||
sym_set_flag(temp, KNOWN);
|
||||
sym_set_flag(temp, NOT_NULL);
|
||||
return temp;
|
||||
}
|
||||
|
||||
static _Py_UOpsSymType*
|
||||
sym_new_null(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
_Py_UOpsSymType *null_sym = sym_new_unknown(ctx);
|
||||
if (null_sym == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_null(null_sym);
|
||||
return null_sym;
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
sym_matches_type(_Py_UOpsSymType *sym, PyTypeObject *typ)
|
||||
{
|
||||
assert(typ == NULL || PyType_Check(typ));
|
||||
if (!sym_has_flag(sym, KNOWN)) {
|
||||
return false;
|
||||
}
|
||||
return sym->typ == typ;
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
op_is_end(uint32_t opcode)
|
||||
{
|
||||
return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
|
||||
}
|
||||
|
||||
static int
|
||||
get_mutations(PyObject* dict) {
|
||||
|
@ -199,14 +555,138 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
builtins = func->func_builtins;
|
||||
break;
|
||||
}
|
||||
case _JUMP_TO_TOP:
|
||||
case _EXIT_TRACE:
|
||||
return 1;
|
||||
default:
|
||||
if (op_is_end(opcode)) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
|
||||
|
||||
#define GETLOCAL(idx) ((ctx->frame->locals[idx]))
|
||||
|
||||
#define REPLACE_OP(INST, OP, ARG, OPERAND) \
|
||||
INST->opcode = OP; \
|
||||
INST->oparg = ARG; \
|
||||
INST->operand = OPERAND;
|
||||
|
||||
#define _LOAD_ATTR_NOT_NULL \
|
||||
do { \
|
||||
attr = sym_new_known_notnull(ctx); \
|
||||
if (attr == NULL) { \
|
||||
goto error; \
|
||||
} \
|
||||
null = sym_new_null(ctx); \
|
||||
if (null == NULL) { \
|
||||
goto error; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
|
||||
/* 1 for success, 0 for not ready, cannot error at the moment. */
|
||||
static int
|
||||
uop_redundancy_eliminator(
|
||||
PyCodeObject *co,
|
||||
_PyUOpInstruction *trace,
|
||||
int trace_len,
|
||||
int curr_stacklen
|
||||
)
|
||||
{
|
||||
|
||||
_Py_UOpsAbstractInterpContext context;
|
||||
_Py_UOpsAbstractInterpContext *ctx = &context;
|
||||
|
||||
if (abstractcontext_init(
|
||||
ctx,
|
||||
co, curr_stacklen,
|
||||
trace_len) < 0) {
|
||||
goto out_of_space;
|
||||
}
|
||||
|
||||
for (_PyUOpInstruction *this_instr = trace;
|
||||
this_instr < trace + trace_len && !op_is_end(this_instr->opcode);
|
||||
this_instr++) {
|
||||
|
||||
int oparg = this_instr->oparg;
|
||||
uint32_t opcode = this_instr->opcode;
|
||||
|
||||
_Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer;
|
||||
|
||||
DPRINTF(3, "Abstract interpreting %s:%d ",
|
||||
_PyOpcode_uop_name[opcode],
|
||||
oparg);
|
||||
switch (opcode) {
|
||||
#include "tier2_redundancy_eliminator_cases.c.h"
|
||||
|
||||
default:
|
||||
DPRINTF(1, "Unknown opcode in abstract interpreter\n");
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
assert(ctx->frame != NULL);
|
||||
DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
|
||||
ctx->frame->stack_pointer = stack_pointer;
|
||||
assert(STACK_LEVEL() >= 0);
|
||||
}
|
||||
|
||||
abstractcontext_fini(ctx);
|
||||
return 1;
|
||||
|
||||
out_of_space:
|
||||
DPRINTF(1, "Out of space in abstract interpreter\n");
|
||||
abstractcontext_fini(ctx);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
DPRINTF(1, "Encountered error in abstract interpreter\n");
|
||||
abstractcontext_fini(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
int last_set_ip = -1;
|
||||
bool maybe_invalid = false;
|
||||
for (int pc = 0; pc < buffer_size; pc++) {
|
||||
int opcode = buffer[pc].opcode;
|
||||
if (opcode == _SET_IP) {
|
||||
buffer[pc].opcode = NOP;
|
||||
last_set_ip = pc;
|
||||
}
|
||||
else if (opcode == _CHECK_VALIDITY) {
|
||||
if (maybe_invalid) {
|
||||
maybe_invalid = false;
|
||||
}
|
||||
else {
|
||||
buffer[pc].opcode = NOP;
|
||||
}
|
||||
}
|
||||
else if (op_is_end(opcode)) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
|
||||
maybe_invalid = true;
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
|
@ -250,44 +730,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
int last_set_ip = -1;
|
||||
bool maybe_invalid = false;
|
||||
for (int pc = 0; pc < buffer_size; pc++) {
|
||||
int opcode = buffer[pc].opcode;
|
||||
if (opcode == _SET_IP) {
|
||||
buffer[pc].opcode = NOP;
|
||||
last_set_ip = pc;
|
||||
}
|
||||
else if (opcode == _CHECK_VALIDITY) {
|
||||
if (maybe_invalid) {
|
||||
maybe_invalid = false;
|
||||
}
|
||||
else {
|
||||
buffer[pc].opcode = NOP;
|
||||
}
|
||||
}
|
||||
else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
|
||||
maybe_invalid = true;
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 0 - failure, no error raised, just fall back to Tier 1
|
||||
// -1 - failure, and raise error
|
||||
// 1 - optimizer success
|
||||
int
|
||||
_Py_uop_analyze_and_optimize(
|
||||
_PyInterpreterFrame *frame,
|
||||
|
@ -297,11 +742,33 @@ _Py_uop_analyze_and_optimize(
|
|||
_PyBloomFilter *dependencies
|
||||
)
|
||||
{
|
||||
OPT_STAT_INC(optimizer_attempts);
|
||||
|
||||
int err = remove_globals(frame, buffer, buffer_size, dependencies);
|
||||
if (err <= 0) {
|
||||
return err;
|
||||
if (err == 0) {
|
||||
goto not_ready;
|
||||
}
|
||||
if (err < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
peephole_opt(frame, buffer, buffer_size);
|
||||
|
||||
err = uop_redundancy_eliminator(
|
||||
(PyCodeObject *)frame->f_executable, buffer,
|
||||
buffer_size, curr_stacklen);
|
||||
|
||||
if (err == 0) {
|
||||
goto not_ready;
|
||||
}
|
||||
assert(err == 1);
|
||||
|
||||
remove_unneeded_uops(buffer, buffer_size);
|
||||
|
||||
OPT_STAT_INC(optimizer_successes);
|
||||
return 1;
|
||||
not_ready:
|
||||
return 0;
|
||||
error:
|
||||
return -1;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue