mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
Some checks are pending
Tests / Change detection (push) Waiting to run
Tests / Docs (push) Blocked by required conditions
Tests / Check if Autoconf files are up to date (push) Blocked by required conditions
Tests / Check if generated files are up to date (push) Blocked by required conditions
Tests / (push) Blocked by required conditions
Tests / Windows MSI (push) Blocked by required conditions
Tests / Ubuntu SSL tests with OpenSSL (push) Blocked by required conditions
Tests / Ubuntu SSL tests with AWS-LC (push) Blocked by required conditions
Tests / Android (aarch64) (push) Blocked by required conditions
Tests / Android (x86_64) (push) Blocked by required conditions
Tests / iOS (push) Blocked by required conditions
Tests / WASI (push) Blocked by required conditions
Tests / Hypothesis tests on Ubuntu (push) Blocked by required conditions
Tests / Address sanitizer (push) Blocked by required conditions
Tests / Sanitizers (push) Blocked by required conditions
Tests / Cross build Linux (push) Blocked by required conditions
Tests / CIFuzz (push) Blocked by required conditions
Tests / All required checks pass (push) Blocked by required conditions
JIT / Interpreter (Debug) (push) Waiting to run
JIT / aarch64-pc-windows-msvc/msvc (Release) (push) Blocked by required conditions
JIT / aarch64-pc-windows-msvc/msvc (Debug) (push) Blocked by required conditions
JIT / i686-pc-windows-msvc/msvc (Release) (push) Blocked by required conditions
JIT / i686-pc-windows-msvc/msvc (Debug) (push) Blocked by required conditions
JIT / aarch64-apple-darwin/clang (Release) (push) Blocked by required conditions
JIT / aarch64-unknown-linux-gnu/gcc (Release) (push) Blocked by required conditions
JIT / aarch64-apple-darwin/clang (Debug) (push) Blocked by required conditions
JIT / aarch64-unknown-linux-gnu/gcc (Debug) (push) Blocked by required conditions
JIT / x86_64-pc-windows-msvc/msvc (Release) (push) Blocked by required conditions
JIT / x86_64-pc-windows-msvc/msvc (Debug) (push) Blocked by required conditions
JIT / x86_64-apple-darwin/clang (Release) (push) Blocked by required conditions
JIT / x86_64-unknown-linux-gnu/gcc (Release) (push) Blocked by required conditions
JIT / x86_64-apple-darwin/clang (Debug) (push) Blocked by required conditions
JIT / x86_64-unknown-linux-gnu/gcc (Debug) (push) Blocked by required conditions
JIT / Free-Threaded (Debug) (push) Blocked by required conditions
JIT / JIT without optimizations (Debug) (push) Blocked by required conditions
JIT / JIT with tail calling interpreter (push) Blocked by required conditions
Lint / lint (push) Waiting to run
mypy / Run mypy on Lib/_pyrepl (push) Waiting to run
mypy / Run mypy on Lib/test/libregrtest (push) Waiting to run
mypy / Run mypy on Lib/tomllib (push) Waiting to run
mypy / Run mypy on Tools/build (push) Waiting to run
mypy / Run mypy on Tools/cases_generator (push) Waiting to run
mypy / Run mypy on Tools/clinic (push) Waiting to run
mypy / Run mypy on Tools/jit (push) Waiting to run
mypy / Run mypy on Tools/peg_generator (push) Waiting to run
Tail calling interpreter / aarch64-apple-darwin/clang (push) Waiting to run
Tail calling interpreter / aarch64-unknown-linux-gnu/gcc (push) Waiting to run
Tail calling interpreter / x86_64-pc-windows-msvc/msvc (push) Waiting to run
Tail calling interpreter / x86_64-apple-darwin/clang (push) Waiting to run
Tail calling interpreter / free-threading (push) Waiting to run
Tail calling interpreter / x86_64-unknown-linux-gnu/gcc (push) Waiting to run
Uses three registers to cache values at the top of the evaluation stack This significantly reduces memory traffic for smaller, more common uops.
819 lines
29 KiB
C
819 lines
29 KiB
C
#include "Python.h"
|
|
|
|
#include "pycore_opcode_metadata.h" // _PyOpcode_Caches
|
|
#include "pycore_pyatomic_ft_wrappers.h"
|
|
#include "pycore_pylifecycle.h" // _PyOS_URandomNonblock()
|
|
#include "pycore_tstate.h"
|
|
#include "pycore_initconfig.h" // _PyStatus_OK()
|
|
#include "pycore_uop_metadata.h" // _PyOpcode_uop_name
|
|
#include "pycore_uop_ids.h" // MAX_UOP_REGS_ID
|
|
#include "pycore_pystate.h" // _PyThreadState_GET()
|
|
#include "pycore_runtime.h" // NUM_GENERATIONS
|
|
|
|
#include <stdlib.h> // rand()
|
|
|
|
#ifdef Py_STATS
|
|
|
|
PyStats *
|
|
_PyStats_GetLocal(void)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
if (tstate) {
|
|
return tstate->pystats;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
#define STATS_LOCK(interp) PyMutex_Lock(&interp->pystats_mutex)
|
|
#define STATS_UNLOCK(interp) PyMutex_Unlock(&interp->pystats_mutex)
|
|
#else
|
|
#define STATS_LOCK(interp)
|
|
#define STATS_UNLOCK(interp)
|
|
#endif
|
|
|
|
|
|
#if PYSTATS_MAX_UOP_ID < MAX_UOP_REGS_ID
|
|
#error "Not enough space allocated for pystats. Increase PYSTATS_MAX_UOP_ID to at least MAX_UOP_REGS_ID"
|
|
#endif
|
|
|
|
#define ADD_STAT_TO_DICT(res, field) \
|
|
do { \
|
|
PyObject *val = PyLong_FromUnsignedLongLong(stats->field); \
|
|
if (val == NULL) { \
|
|
Py_DECREF(res); \
|
|
return NULL; \
|
|
} \
|
|
if (PyDict_SetItemString(res, #field, val) == -1) { \
|
|
Py_DECREF(res); \
|
|
Py_DECREF(val); \
|
|
return NULL; \
|
|
} \
|
|
Py_DECREF(val); \
|
|
} while(0);
|
|
|
|
static PyObject*
|
|
stats_to_dict(SpecializationStats *stats)
|
|
{
|
|
PyObject *res = PyDict_New();
|
|
if (res == NULL) {
|
|
return NULL;
|
|
}
|
|
ADD_STAT_TO_DICT(res, success);
|
|
ADD_STAT_TO_DICT(res, failure);
|
|
ADD_STAT_TO_DICT(res, hit);
|
|
ADD_STAT_TO_DICT(res, deferred);
|
|
ADD_STAT_TO_DICT(res, miss);
|
|
ADD_STAT_TO_DICT(res, deopt);
|
|
PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
|
|
if (failure_kinds == NULL) {
|
|
Py_DECREF(res);
|
|
return NULL;
|
|
}
|
|
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
|
|
PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
|
|
if (stat == NULL) {
|
|
Py_DECREF(res);
|
|
Py_DECREF(failure_kinds);
|
|
return NULL;
|
|
}
|
|
PyTuple_SET_ITEM(failure_kinds, i, stat);
|
|
}
|
|
if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
|
|
Py_DECREF(res);
|
|
Py_DECREF(failure_kinds);
|
|
return NULL;
|
|
}
|
|
Py_DECREF(failure_kinds);
|
|
return res;
|
|
}
|
|
#undef ADD_STAT_TO_DICT
|
|
|
|
static int
|
|
add_stat_dict(
|
|
PyStats *src,
|
|
PyObject *res,
|
|
int opcode,
|
|
const char *name) {
|
|
|
|
SpecializationStats *stats = &src->opcode_stats[opcode].specialization;
|
|
PyObject *d = stats_to_dict(stats);
|
|
if (d == NULL) {
|
|
return -1;
|
|
}
|
|
int err = PyDict_SetItemString(res, name, d);
|
|
Py_DECREF(d);
|
|
return err;
|
|
}
|
|
|
|
PyObject*
|
|
_Py_GetSpecializationStats(void) {
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
PyStats *src = FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct);
|
|
if (src == NULL) {
|
|
Py_RETURN_NONE;
|
|
}
|
|
PyObject *stats = PyDict_New();
|
|
if (stats == NULL) {
|
|
return NULL;
|
|
}
|
|
int err = 0;
|
|
err += add_stat_dict(src, stats, CONTAINS_OP, "contains_op");
|
|
err += add_stat_dict(src, stats, LOAD_SUPER_ATTR, "load_super_attr");
|
|
err += add_stat_dict(src, stats, LOAD_ATTR, "load_attr");
|
|
err += add_stat_dict(src, stats, LOAD_GLOBAL, "load_global");
|
|
err += add_stat_dict(src, stats, STORE_SUBSCR, "store_subscr");
|
|
err += add_stat_dict(src, stats, STORE_ATTR, "store_attr");
|
|
err += add_stat_dict(src, stats, JUMP_BACKWARD, "jump_backward");
|
|
err += add_stat_dict(src, stats, CALL, "call");
|
|
err += add_stat_dict(src, stats, CALL_KW, "call_kw");
|
|
err += add_stat_dict(src, stats, BINARY_OP, "binary_op");
|
|
err += add_stat_dict(src, stats, COMPARE_OP, "compare_op");
|
|
err += add_stat_dict(src, stats, UNPACK_SEQUENCE, "unpack_sequence");
|
|
err += add_stat_dict(src, stats, FOR_ITER, "for_iter");
|
|
err += add_stat_dict(src, stats, TO_BOOL, "to_bool");
|
|
err += add_stat_dict(src, stats, SEND, "send");
|
|
if (err < 0) {
|
|
Py_DECREF(stats);
|
|
return NULL;
|
|
}
|
|
return stats;
|
|
}
|
|
|
|
|
|
#define PRINT_STAT(i, field) \
|
|
if (stats[i].field) { \
|
|
fprintf(out, " opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
|
|
}
|
|
|
|
static void
|
|
print_spec_stats(FILE *out, OpcodeStats *stats)
|
|
{
|
|
/* Mark some opcodes as specializable for stats,
|
|
* even though we don't specialize them yet. */
|
|
fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
|
|
fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
|
|
fprintf(out, "opcode[GET_ITER].specializable : 1\n");
|
|
for (int i = 0; i < 256; i++) {
|
|
if (_PyOpcode_Caches[i]) {
|
|
/* Ignore jumps as they cannot be specialized */
|
|
switch (i) {
|
|
case POP_JUMP_IF_FALSE:
|
|
case POP_JUMP_IF_TRUE:
|
|
case POP_JUMP_IF_NONE:
|
|
case POP_JUMP_IF_NOT_NONE:
|
|
case JUMP_BACKWARD:
|
|
break;
|
|
default:
|
|
fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
|
|
}
|
|
}
|
|
PRINT_STAT(i, specialization.success);
|
|
PRINT_STAT(i, specialization.failure);
|
|
PRINT_STAT(i, specialization.hit);
|
|
PRINT_STAT(i, specialization.deferred);
|
|
PRINT_STAT(i, specialization.miss);
|
|
PRINT_STAT(i, specialization.deopt);
|
|
PRINT_STAT(i, execution_count);
|
|
for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
|
|
uint64_t val = stats[i].specialization.failure_kinds[j];
|
|
if (val) {
|
|
fprintf(out, " opcode[%s].specialization.failure_kinds[%d] : %"
|
|
PRIu64 "\n", _PyOpcode_OpName[i], j, val);
|
|
}
|
|
}
|
|
for (int j = 0; j < 256; j++) {
|
|
if (stats[i].pair_count[j]) {
|
|
fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
|
|
_PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#undef PRINT_STAT
|
|
|
|
|
|
static void
|
|
print_call_stats(FILE *out, CallStats *stats)
|
|
{
|
|
fprintf(out, "Calls to PyEval_EvalDefault: %" PRIu64 "\n", stats->pyeval_calls);
|
|
fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls);
|
|
fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed);
|
|
fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created);
|
|
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
|
|
fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]);
|
|
}
|
|
}
|
|
|
|
static void
|
|
print_object_stats(FILE *out, ObjectStats *stats)
|
|
{
|
|
fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist);
|
|
fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist);
|
|
fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations);
|
|
fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512);
|
|
fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k);
|
|
fprintf(out, "Object allocations over 4 kbytes: %" PRIu64 "\n", stats->allocations_big);
|
|
fprintf(out, "Object frees: %" PRIu64 "\n", stats->frees);
|
|
fprintf(out, "Object inline values: %" PRIu64 "\n", stats->inline_values);
|
|
fprintf(out, "Object interpreter mortal increfs: %" PRIu64 "\n", stats->interpreter_increfs);
|
|
fprintf(out, "Object interpreter mortal decrefs: %" PRIu64 "\n", stats->interpreter_decrefs);
|
|
fprintf(out, "Object mortal increfs: %" PRIu64 "\n", stats->increfs);
|
|
fprintf(out, "Object mortal decrefs: %" PRIu64 "\n", stats->decrefs);
|
|
fprintf(out, "Object interpreter immortal increfs: %" PRIu64 "\n", stats->interpreter_immortal_increfs);
|
|
fprintf(out, "Object interpreter immortal decrefs: %" PRIu64 "\n", stats->interpreter_immortal_decrefs);
|
|
fprintf(out, "Object immortal increfs: %" PRIu64 "\n", stats->immortal_increfs);
|
|
fprintf(out, "Object immortal decrefs: %" PRIu64 "\n", stats->immortal_decrefs);
|
|
fprintf(out, "Object materialize dict (on request): %" PRIu64 "\n", stats->dict_materialized_on_request);
|
|
fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
|
|
fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
|
|
fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
|
|
fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
|
|
fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
|
|
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
|
|
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
|
|
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
|
|
}
|
|
|
|
static void
|
|
print_gc_stats(FILE *out, GCStats *stats)
|
|
{
|
|
for (int i = 0; i < NUM_GENERATIONS; i++) {
|
|
fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
|
|
fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
|
|
fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
|
|
fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
|
|
fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
|
|
}
|
|
}
|
|
|
|
#ifdef _Py_TIER2
|
|
static void
|
|
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
|
|
{
|
|
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
|
|
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
|
|
}
|
|
}
|
|
|
|
extern const char *_PyUOpName(int index);
|
|
|
|
static void
|
|
print_optimization_stats(FILE *out, OptimizationStats *stats)
|
|
{
|
|
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
|
|
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
|
|
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
|
|
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
|
|
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
|
|
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
|
|
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
|
|
fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
|
|
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
|
|
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
|
|
fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
|
|
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
|
|
fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
|
|
|
|
print_histogram(out, "Trace length", stats->trace_length_hist);
|
|
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
|
|
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
|
|
|
|
fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
|
|
fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
|
|
fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
|
|
stats->optimizer_failure_reason_no_memory);
|
|
fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
|
|
fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
|
|
for (int i = 0; i <= MAX_UOP_REGS_ID; i++) {
|
|
if (stats->opcode[i].execution_count) {
|
|
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
|
|
}
|
|
if (stats->opcode[i].miss) {
|
|
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
|
|
}
|
|
}
|
|
for (int i = 0; i < 256; i++) {
|
|
if (stats->unsupported_opcode[i]) {
|
|
fprintf(
|
|
out,
|
|
"unsupported_opcode[%s].count : %" PRIu64 "\n",
|
|
_PyOpcode_OpName[i],
|
|
stats->unsupported_opcode[i]
|
|
);
|
|
}
|
|
}
|
|
|
|
for (int i = 1; i <= MAX_UOP_REGS_ID; i++){
|
|
for (int j = 1; j <= MAX_UOP_REGS_ID; j++) {
|
|
if (stats->opcode[i].pair_count[j]) {
|
|
fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n",
|
|
_PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]);
|
|
}
|
|
}
|
|
}
|
|
for (int i = 0; i < MAX_UOP_REGS_ID; i++) {
|
|
if (stats->error_in_opcode[i]) {
|
|
fprintf(
|
|
out,
|
|
"error_in_opcode[%s].count : %" PRIu64 "\n",
|
|
_PyUOpName(i),
|
|
stats->error_in_opcode[i]
|
|
);
|
|
}
|
|
}
|
|
fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size);
|
|
fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size);
|
|
fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size);
|
|
fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size);
|
|
fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size);
|
|
fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size);
|
|
|
|
print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist);
|
|
}
|
|
#endif
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
static void
|
|
print_ft_stats(FILE *out, FTStats *stats)
|
|
{
|
|
fprintf(out, "Mutex sleeps (mutex_sleeps): %" PRIu64 "\n", stats->mutex_sleeps);
|
|
fprintf(out, "QSBR polls (qsbr_polls): %" PRIu64 "\n", stats->qsbr_polls);
|
|
fprintf(out, "World stops (world_stops): %" PRIu64 "\n", stats->world_stops);
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
print_rare_event_stats(FILE *out, RareEventStats *stats)
|
|
{
|
|
fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class);
|
|
fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases);
|
|
fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func);
|
|
fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict);
|
|
fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification);
|
|
fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification);
|
|
fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification);
|
|
}
|
|
|
|
static void
|
|
print_stats(FILE *out, PyStats *stats)
|
|
{
|
|
print_spec_stats(out, stats->opcode_stats);
|
|
print_call_stats(out, &stats->call_stats);
|
|
print_object_stats(out, &stats->object_stats);
|
|
print_gc_stats(out, stats->gc_stats);
|
|
#ifdef _Py_TIER2
|
|
print_optimization_stats(out, &stats->optimization_stats);
|
|
#endif
|
|
#ifdef Py_GIL_DISABLED
|
|
print_ft_stats(out, &stats->ft_stats);
|
|
#endif
|
|
print_rare_event_stats(out, &stats->rare_event_stats);
|
|
}
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
|
|
static void
|
|
merge_specialization_stats(SpecializationStats *dest, const SpecializationStats *src)
|
|
{
|
|
dest->success += src->success;
|
|
dest->failure += src->failure;
|
|
dest->hit += src->hit;
|
|
dest->deferred += src->deferred;
|
|
dest->miss += src->miss;
|
|
dest->deopt += src->deopt;
|
|
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
|
|
dest->failure_kinds[i] += src->failure_kinds[i];
|
|
}
|
|
}
|
|
|
|
static void
|
|
merge_opcode_stats_array(OpcodeStats *dest, const OpcodeStats *src)
|
|
{
|
|
for (int i = 0; i < 256; i++) {
|
|
merge_specialization_stats(&dest[i].specialization, &src[i].specialization);
|
|
dest[i].execution_count += src[i].execution_count;
|
|
for (int j = 0; j < 256; j++) {
|
|
dest[i].pair_count[j] += src[i].pair_count[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
merge_call_stats(CallStats *dest, const CallStats *src)
|
|
{
|
|
dest->inlined_py_calls += src->inlined_py_calls;
|
|
dest->pyeval_calls += src->pyeval_calls;
|
|
dest->frames_pushed += src->frames_pushed;
|
|
dest->frame_objects_created += src->frame_objects_created;
|
|
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
|
|
dest->eval_calls[i] += src->eval_calls[i];
|
|
}
|
|
}
|
|
|
|
static void
|
|
merge_object_stats(ObjectStats *dest, const ObjectStats *src)
|
|
{
|
|
dest->increfs += src->increfs;
|
|
dest->decrefs += src->decrefs;
|
|
dest->interpreter_increfs += src->interpreter_increfs;
|
|
dest->interpreter_decrefs += src->interpreter_decrefs;
|
|
dest->immortal_increfs += src->immortal_increfs;
|
|
dest->immortal_decrefs += src->immortal_decrefs;
|
|
dest->interpreter_immortal_increfs += src->interpreter_immortal_increfs;
|
|
dest->interpreter_immortal_decrefs += src->interpreter_immortal_decrefs;
|
|
dest->allocations += src->allocations;
|
|
dest->allocations512 += src->allocations512;
|
|
dest->allocations4k += src->allocations4k;
|
|
dest->allocations_big += src->allocations_big;
|
|
dest->frees += src->frees;
|
|
dest->to_freelist += src->to_freelist;
|
|
dest->from_freelist += src->from_freelist;
|
|
dest->inline_values += src->inline_values;
|
|
dest->dict_materialized_on_request += src->dict_materialized_on_request;
|
|
dest->dict_materialized_new_key += src->dict_materialized_new_key;
|
|
dest->dict_materialized_too_big += src->dict_materialized_too_big;
|
|
dest->dict_materialized_str_subclass += src->dict_materialized_str_subclass;
|
|
dest->type_cache_hits += src->type_cache_hits;
|
|
dest->type_cache_misses += src->type_cache_misses;
|
|
dest->type_cache_dunder_hits += src->type_cache_dunder_hits;
|
|
dest->type_cache_dunder_misses += src->type_cache_dunder_misses;
|
|
dest->type_cache_collisions += src->type_cache_collisions;
|
|
dest->object_visits += src->object_visits;
|
|
}
|
|
|
|
static void
|
|
merge_uop_stats_array(UOpStats *dest, const UOpStats *src)
|
|
{
|
|
for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
|
|
dest[i].execution_count += src[i].execution_count;
|
|
dest[i].miss += src[i].miss;
|
|
for (int j = 0; j <= PYSTATS_MAX_UOP_ID; j++) {
|
|
dest[i].pair_count[j] += src[i].pair_count[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
merge_optimization_stats(OptimizationStats *dest, const OptimizationStats *src)
|
|
{
|
|
dest->attempts += src->attempts;
|
|
dest->traces_created += src->traces_created;
|
|
dest->traces_executed += src->traces_executed;
|
|
dest->uops_executed += src->uops_executed;
|
|
dest->trace_stack_overflow += src->trace_stack_overflow;
|
|
dest->trace_stack_underflow += src->trace_stack_underflow;
|
|
dest->trace_too_long += src->trace_too_long;
|
|
dest->trace_too_short += src->trace_too_short;
|
|
dest->inner_loop += src->inner_loop;
|
|
dest->recursive_call += src->recursive_call;
|
|
dest->low_confidence += src->low_confidence;
|
|
dest->unknown_callee += src->unknown_callee;
|
|
dest->executors_invalidated += src->executors_invalidated;
|
|
dest->optimizer_attempts += src->optimizer_attempts;
|
|
dest->optimizer_successes += src->optimizer_successes;
|
|
dest->optimizer_failure_reason_no_memory += src->optimizer_failure_reason_no_memory;
|
|
dest->remove_globals_builtins_changed += src->remove_globals_builtins_changed;
|
|
dest->remove_globals_incorrect_keys += src->remove_globals_incorrect_keys;
|
|
dest->jit_total_memory_size += src->jit_total_memory_size;
|
|
dest->jit_code_size += src->jit_code_size;
|
|
dest->jit_trampoline_size += src->jit_trampoline_size;
|
|
dest->jit_data_size += src->jit_data_size;
|
|
dest->jit_padding_size += src->jit_padding_size;
|
|
dest->jit_freed_memory_size += src->jit_freed_memory_size;
|
|
|
|
merge_uop_stats_array(dest->opcode, src->opcode);
|
|
|
|
for (int i = 0; i < 256; i++) {
|
|
dest->unsupported_opcode[i] += src->unsupported_opcode[i];
|
|
}
|
|
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
|
|
dest->trace_length_hist[i] += src->trace_length_hist[i];
|
|
dest->trace_run_length_hist[i] += src->trace_run_length_hist[i];
|
|
dest->optimized_trace_length_hist[i] += src->optimized_trace_length_hist[i];
|
|
dest->trace_total_memory_hist[i] += src->trace_total_memory_hist[i];
|
|
}
|
|
for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
|
|
dest->error_in_opcode[i] += src->error_in_opcode[i];
|
|
}
|
|
}
|
|
|
|
static void
|
|
merge_ft_stats(FTStats *dest, const FTStats *src)
|
|
{
|
|
dest->mutex_sleeps = src->mutex_sleeps;
|
|
dest->qsbr_polls = src->qsbr_polls;
|
|
dest->world_stops = src->world_stops;
|
|
}
|
|
|
|
static void
|
|
merge_rare_event_stats(RareEventStats *dest, const RareEventStats *src)
|
|
{
|
|
dest->set_class += src->set_class;
|
|
dest->set_bases += src->set_bases;
|
|
dest->set_eval_frame_func += src->set_eval_frame_func;
|
|
dest->builtin_dict += src->builtin_dict;
|
|
dest->func_modification += src->func_modification;
|
|
dest->watched_dict_modification += src->watched_dict_modification;
|
|
dest->watched_globals_modification += src->watched_globals_modification;
|
|
}
|
|
|
|
static void
|
|
merge_gc_stats_array(GCStats *dest, const GCStats *src)
|
|
{
|
|
for (int i = 0; i < NUM_GENERATIONS; i++) {
|
|
dest[i].collections += src[i].collections;
|
|
dest[i].object_visits += src[i].object_visits;
|
|
dest[i].objects_collected += src[i].objects_collected;
|
|
dest[i].objects_transitively_reachable += src[i].objects_transitively_reachable;
|
|
dest[i].objects_not_transitively_reachable += src[i].objects_not_transitively_reachable;
|
|
}
|
|
}
|
|
|
|
void
|
|
stats_zero_thread(_PyThreadStateImpl *tstate)
|
|
{
|
|
// Zero the thread local stat counters
|
|
if (tstate->pystats_struct) {
|
|
memset(tstate->pystats_struct, 0, sizeof(PyStats));
|
|
}
|
|
}
|
|
|
|
// merge stats for a single thread into the global structure
|
|
void
|
|
stats_merge_thread(_PyThreadStateImpl *tstate)
|
|
{
|
|
PyStats *src = tstate->pystats_struct;
|
|
PyStats *dest = ((PyThreadState *)tstate)->interp->pystats_struct;
|
|
|
|
if (src == NULL || dest == NULL) {
|
|
return;
|
|
}
|
|
|
|
// Merge each category of stats using the helper functions.
|
|
merge_opcode_stats_array(dest->opcode_stats, src->opcode_stats);
|
|
merge_call_stats(&dest->call_stats, &src->call_stats);
|
|
merge_object_stats(&dest->object_stats, &src->object_stats);
|
|
merge_optimization_stats(&dest->optimization_stats, &src->optimization_stats);
|
|
merge_ft_stats(&dest->ft_stats, &src->ft_stats);
|
|
merge_rare_event_stats(&dest->rare_event_stats, &src->rare_event_stats);
|
|
merge_gc_stats_array(dest->gc_stats, src->gc_stats);
|
|
}
|
|
#endif // Py_GIL_DISABLED
|
|
|
|
// toggle stats collection on or off for all threads
|
|
static int
|
|
stats_toggle_on_off(PyThreadState *tstate, int on)
|
|
{
|
|
bool changed = false;
|
|
PyInterpreterState *interp = tstate->interp;
|
|
STATS_LOCK(interp);
|
|
if (on && interp->pystats_struct == NULL) {
|
|
PyStats *s = PyMem_RawCalloc(1, sizeof(PyStats));
|
|
if (s == NULL) {
|
|
STATS_UNLOCK(interp);
|
|
return -1;
|
|
}
|
|
FT_ATOMIC_STORE_PTR_RELAXED(interp->pystats_struct, s);
|
|
}
|
|
if (tstate->interp->pystats_enabled != on) {
|
|
FT_ATOMIC_STORE_INT_RELAXED(interp->pystats_enabled, on);
|
|
changed = true;
|
|
}
|
|
STATS_UNLOCK(interp);
|
|
if (!changed) {
|
|
return 0;
|
|
}
|
|
_PyEval_StopTheWorld(interp);
|
|
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
|
|
PyStats *s = NULL;
|
|
if (interp->pystats_enabled) {
|
|
#ifdef Py_GIL_DISABLED
|
|
_PyThreadStateImpl *ts_impl = (_PyThreadStateImpl *)ts;
|
|
if (ts_impl->pystats_struct == NULL) {
|
|
// first activation for this thread, allocate structure
|
|
ts_impl->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
|
|
}
|
|
s = ts_impl->pystats_struct;
|
|
#else
|
|
s = ts->interp->pystats_struct;
|
|
#endif
|
|
}
|
|
ts->pystats = s;
|
|
}
|
|
_PyEval_StartTheWorld(interp);
|
|
return 0;
|
|
}
|
|
|
|
// zero stats for all threads and for the interpreter
|
|
static void
|
|
stats_zero_all(void)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
if (tstate == NULL) {
|
|
return;
|
|
}
|
|
if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
|
|
return;
|
|
}
|
|
PyInterpreterState *interp = tstate->interp;
|
|
_PyEval_StopTheWorld(interp);
|
|
#ifdef Py_GIL_DISABLED
|
|
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
|
|
stats_zero_thread((_PyThreadStateImpl *)ts);
|
|
}
|
|
#endif
|
|
if (interp->pystats_struct) {
|
|
memset(interp->pystats_struct, 0, sizeof(PyStats));
|
|
}
|
|
_PyEval_StartTheWorld(interp);
|
|
}
|
|
|
|
// merge stats for all threads into the per-interpreter structure
|
|
static void
|
|
stats_merge_all(void)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
if (tstate == NULL) {
|
|
return;
|
|
}
|
|
if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
|
|
return;
|
|
}
|
|
PyInterpreterState *interp = tstate->interp;
|
|
_PyEval_StopTheWorld(interp);
|
|
#ifdef Py_GIL_DISABLED
|
|
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
|
|
stats_merge_thread((_PyThreadStateImpl *)ts);
|
|
stats_zero_thread((_PyThreadStateImpl *)ts);
|
|
}
|
|
#endif
|
|
_PyEval_StartTheWorld(interp);
|
|
}
|
|
|
|
int
|
|
_Py_StatsOn(void)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
return stats_toggle_on_off(tstate, 1);
|
|
}
|
|
|
|
void
|
|
_Py_StatsOff(void)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
stats_toggle_on_off(tstate, 0);
|
|
}
|
|
|
|
void
|
|
_Py_StatsClear(void)
|
|
{
|
|
stats_zero_all();
|
|
}
|
|
|
|
static int
|
|
mem_is_zero(unsigned char *ptr, size_t size)
|
|
{
|
|
for (size_t i=0; i < size; i++) {
|
|
if (*ptr != 0) {
|
|
return 0;
|
|
}
|
|
ptr++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
_Py_PrintSpecializationStats(int to_file)
|
|
{
|
|
assert(to_file);
|
|
stats_merge_all();
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
STATS_LOCK(tstate->interp);
|
|
PyStats *stats = tstate->interp->pystats_struct;
|
|
if (stats == NULL) {
|
|
STATS_UNLOCK(tstate->interp);
|
|
return 0;
|
|
}
|
|
#define MEM_IS_ZERO(DATA) mem_is_zero((unsigned char*)DATA, sizeof(*(DATA)))
|
|
int is_zero = (
|
|
MEM_IS_ZERO(stats->gc_stats) // is a pointer
|
|
&& MEM_IS_ZERO(&stats->opcode_stats)
|
|
&& MEM_IS_ZERO(&stats->call_stats)
|
|
&& MEM_IS_ZERO(&stats->object_stats)
|
|
);
|
|
#undef MEM_IS_ZERO
|
|
STATS_UNLOCK(tstate->interp);
|
|
if (is_zero) {
|
|
// gh-108753: -X pystats command line was used, but then _stats_off()
|
|
// and _stats_clear() have been called: in this case, avoid printing
|
|
// useless "all zeros" statistics.
|
|
return 0;
|
|
}
|
|
|
|
FILE *out = stderr;
|
|
if (to_file) {
|
|
/* Write to a file instead of stderr. */
|
|
# ifdef MS_WINDOWS
|
|
const char *dirname = "c:\\temp\\py_stats\\";
|
|
# else
|
|
const char *dirname = "/tmp/py_stats/";
|
|
# endif
|
|
/* Use random 160 bit number as file name,
|
|
* to avoid both accidental collisions and
|
|
* symlink attacks. */
|
|
unsigned char rand[20];
|
|
char hex_name[41];
|
|
_PyOS_URandomNonblock(rand, 20);
|
|
for (int i = 0; i < 20; i++) {
|
|
hex_name[2*i] = Py_hexdigits[rand[i]&15];
|
|
hex_name[2*i+1] = Py_hexdigits[(rand[i]>>4)&15];
|
|
}
|
|
hex_name[40] = '\0';
|
|
char buf[64];
|
|
assert(strlen(dirname) + 40 + strlen(".txt") < 64);
|
|
sprintf(buf, "%s%s.txt", dirname, hex_name);
|
|
FILE *fout = fopen(buf, "w");
|
|
if (fout) {
|
|
out = fout;
|
|
}
|
|
}
|
|
else {
|
|
fprintf(out, "Specialization stats:\n");
|
|
}
|
|
STATS_LOCK(tstate->interp);
|
|
print_stats(out, stats);
|
|
STATS_UNLOCK(tstate->interp);
|
|
if (out != stderr) {
|
|
fclose(out);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
PyStatus
|
|
_PyStats_InterpInit(PyInterpreterState *interp)
|
|
{
|
|
if (interp->config._pystats) {
|
|
// start with pystats enabled, can be disabled via sys._stats_off()
|
|
// this needs to be set before the first tstate is created
|
|
interp->pystats_enabled = 1;
|
|
interp->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
|
|
if (interp->pystats_struct == NULL) {
|
|
return _PyStatus_ERR("out-of-memory while initializing interpreter");
|
|
}
|
|
}
|
|
return _PyStatus_OK();
|
|
}
|
|
|
|
bool
|
|
_PyStats_ThreadInit(PyInterpreterState *interp, _PyThreadStateImpl *tstate)
|
|
{
|
|
#ifdef Py_GIL_DISABLED
|
|
if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
|
|
assert(interp->pystats_struct != NULL);
|
|
tstate->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
|
|
if (tstate->pystats_struct == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
void
|
|
_PyStats_ThreadFini(_PyThreadStateImpl *tstate)
|
|
{
|
|
#ifdef Py_GIL_DISABLED
|
|
STATS_LOCK(((PyThreadState *)tstate)->interp);
|
|
stats_merge_thread(tstate);
|
|
STATS_UNLOCK(((PyThreadState *)tstate)->interp);
|
|
PyMem_RawFree(tstate->pystats_struct);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
_PyStats_Attach(_PyThreadStateImpl *tstate_impl)
|
|
{
|
|
PyStats *s;
|
|
PyThreadState *tstate = (PyThreadState *)tstate_impl;
|
|
PyInterpreterState *interp = tstate->interp;
|
|
if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
|
|
#ifdef Py_GIL_DISABLED
|
|
s = ((_PyThreadStateImpl *)tstate)->pystats_struct;
|
|
#else
|
|
s = tstate->interp->pystats_struct;
|
|
#endif
|
|
}
|
|
else {
|
|
s = NULL;
|
|
}
|
|
tstate->pystats = s;
|
|
}
|
|
|
|
void
|
|
_PyStats_Detach(_PyThreadStateImpl *tstate_impl)
|
|
{
|
|
((PyThreadState *)tstate_impl)->pystats = NULL;
|
|
}
|
|
|
|
#endif // Py_STATS
|