gh-89279: In ceval.c, redefine some macros for speed (#32387)

Macros Py_DECREF, Py_XDECREF, Py_IS_TYPE, _Py_atomic_load_32bit_impl
and _Py_DECREF_SPECIALIZED are redefined as macros
that completely replace the inline functions of the same name.
These three came out in the top four of functions that (in MSVC)
somehow weren't inlined.

Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com>
This commit is contained in:
Guido van Rossum 2022-04-22 07:46:26 -07:00 committed by GitHub
parent 9fe82d0b70
commit 2f233fceae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 61 additions and 13 deletions

View file

@ -44,6 +44,55 @@
# error "ceval.c must be build with Py_BUILD_CORE define for best performance"
#endif
#ifndef Py_DEBUG
// GH-89279: The MSVC compiler does not inline these static inline functions
// in PGO build in _PyEval_EvalFrameDefault(), because this function is over
// the limit of PGO, and that limit cannot be configured.
// Define them as macros to make sure that they are always inlined by the
// preprocessor.
#undef Py_DECREF
#define Py_DECREF(arg) \
do { \
PyObject *op = _PyObject_CAST(arg); \
if (--op->ob_refcnt == 0) { \
destructor dealloc = Py_TYPE(op)->tp_dealloc; \
(*dealloc)(op); \
} \
} while (0)
#undef Py_XDECREF
#define Py_XDECREF(arg) \
do { \
PyObject *xop = _PyObject_CAST(arg); \
if (xop != NULL) { \
Py_DECREF(xop); \
} \
} while (0)
#undef Py_IS_TYPE
#define Py_IS_TYPE(ob, type) \
(_PyObject_CAST(ob)->ob_type == (type))
#undef _Py_DECREF_SPECIALIZED
#define _Py_DECREF_SPECIALIZED(arg, dealloc) \
do { \
PyObject *op = _PyObject_CAST(arg); \
if (--op->ob_refcnt == 0) { \
destructor d = (destructor)(dealloc); \
d(op); \
} \
} while (0)
#endif
// GH-89279: Similar to above, force inlining by using a macro.
#if defined(_MSC_VER) && SIZEOF_INT == 4
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) (assert(sizeof((ATOMIC_VAL)->_value) == 4), *((volatile int*)&((ATOMIC_VAL)->_value)))
#else
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL)
#endif
/* Forward declarations */
static PyObject *trace_call_function(
PyThreadState *tstate, PyObject *callable, PyObject **stack,
@ -192,10 +241,10 @@ COMPUTE_EVAL_BREAKER(PyInterpreterState *interp,
struct _ceval_state *ceval2)
{
_Py_atomic_store_relaxed(&ceval2->eval_breaker,
_Py_atomic_load_relaxed(&ceval2->gil_drop_request)
| (_Py_atomic_load_relaxed(&ceval->signals_pending)
_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)
| (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)
&& _Py_ThreadCanHandleSignals(interp))
| (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)
| (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)
&& _Py_ThreadCanHandlePendingCalls())
| ceval2->pending.async_exc);
}
@ -740,7 +789,7 @@ _Py_FinishPendingCalls(PyThreadState *tstate)
struct _pending_calls *pending = &tstate->interp->ceval.pending;
if (!_Py_atomic_load_relaxed(&(pending->calls_to_do))) {
if (!_Py_atomic_load_relaxed_int32(&(pending->calls_to_do))) {
return;
}
@ -1187,7 +1236,7 @@ eval_frame_handle_pending(PyThreadState *tstate)
struct _ceval_runtime_state *ceval = &runtime->ceval;
/* Pending signals */
if (_Py_atomic_load_relaxed(&ceval->signals_pending)) {
if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) {
if (handle_signals(tstate) != 0) {
return -1;
}
@ -1195,14 +1244,14 @@ eval_frame_handle_pending(PyThreadState *tstate)
/* Pending calls */
struct _ceval_state *ceval2 = &tstate->interp->ceval;
if (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)) {
if (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)) {
if (make_pending_calls(tstate->interp) != 0) {
return -1;
}
}
/* GIL drop request */
if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request)) {
if (_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)) {
/* Give another thread a chance */
if (_PyThreadState_Swap(&runtime->gilstate, NULL) != tstate) {
Py_FatalError("tstate mix-up");
@ -1360,7 +1409,7 @@ eval_frame_handle_pending(PyThreadState *tstate)
#define CHECK_EVAL_BREAKER() \
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
if (_Py_atomic_load_relaxed(eval_breaker)) { \
if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \
goto handle_eval_breaker; \
}
@ -1640,10 +1689,8 @@ typedef struct {
PyObject *kwnames;
} CallShape;
static inline bool
is_method(PyObject **stack_pointer, int args) {
return PEEK(args+2) != NULL;
}
// GH-89279: Must be a macro to be sure it's inlined by MSVC.
#define is_method(stack_pointer, args) (PEEK((args)+2) != NULL)
#define KWNAMES_LEN() \
(call_shape.kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(call_shape.kwnames)))
@ -1796,7 +1843,7 @@ handle_eval_breaker:
PREDICTED(RESUME_QUICK);
assert(tstate->cframe == &cframe);
assert(frame == cframe.current_frame);
if (_Py_atomic_load_relaxed(eval_breaker) && oparg < 2) {
if (_Py_atomic_load_relaxed_int32(eval_breaker) && oparg < 2) {
goto handle_eval_breaker;
}
DISPATCH();