GH-91432: Specialize FOR_ITER (GH-91713)

* Adds FOR_ITER_LIST and FOR_ITER_RANGE specializations.

* Adds _PyLong_AssignValue() internal function to avoid temporary boxing of ints.
This commit is contained in:
Dennis Sweeney 2022-06-21 06:19:26 -04:00 committed by GitHub
parent c735d54534
commit 5fcfdd87c9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 447 additions and 282 deletions

View file

@ -22,6 +22,7 @@
#include "pycore_pylifecycle.h" // _PyErr_Print()
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_range.h" // _PyRangeIterObject
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
@ -1823,7 +1824,6 @@ handle_eval_breaker:
}
TARGET(STORE_FAST) {
PREDICTED(STORE_FAST);
PyObject *value = POP();
SETLOCAL(oparg, value);
DISPATCH();
@ -4393,7 +4393,6 @@ handle_eval_breaker:
SET_TOP(iter);
if (iter == NULL)
goto error;
PREDICT(FOR_ITER);
DISPATCH();
}
@ -4430,16 +4429,10 @@ handle_eval_breaker:
PREDICTED(FOR_ITER);
/* before: [iter]; after: [iter, iter()] *or* [] */
PyObject *iter = TOP();
#ifdef Py_STATS
extern int _PySpecialization_ClassifyIterator(PyObject *);
_py_stats.opcode_stats[FOR_ITER].specialization.failure++;
_py_stats.opcode_stats[FOR_ITER].specialization.failure_kinds[_PySpecialization_ClassifyIterator(iter)]++;
#endif
PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next != NULL) {
PUSH(next);
PREDICT(STORE_FAST);
PREDICT(UNPACK_SEQUENCE);
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER);
DISPATCH();
}
if (_PyErr_Occurred(tstate)) {
@ -4451,13 +4444,70 @@ handle_eval_breaker:
}
_PyErr_Clear(tstate);
}
iterator_exhausted_no_error:
/* iterator ended normally */
STACK_SHRINK(1);
Py_DECREF(iter);
JUMPBY(oparg);
assert(!_PyErr_Occurred(tstate));
Py_DECREF(POP());
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
DISPATCH();
}
TARGET(FOR_ITER_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyForIterCache *cache = (_PyForIterCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
next_instr--;
_Py_Specialize_ForIter(TOP(), next_instr);
NOTRACE_DISPATCH_SAME_OPARG();
}
else {
STAT_INC(FOR_ITER, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(FOR_ITER);
}
}
TARGET(FOR_ITER_LIST) {
assert(cframe.use_tracing == 0);
_PyListIterObject *it = (_PyListIterObject *)TOP();
DEOPT_IF(Py_TYPE(it) != &PyListIter_Type, FOR_ITER);
STAT_INC(FOR_ITER, hit);
PyListObject *seq = it->it_seq;
if (seq == NULL) {
goto iterator_exhausted_no_error;
}
if (it->it_index < PyList_GET_SIZE(seq)) {
PyObject *next = PyList_GET_ITEM(seq, it->it_index++);
Py_INCREF(next);
PUSH(next);
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER);
NOTRACE_DISPATCH();
}
it->it_seq = NULL;
Py_DECREF(seq);
goto iterator_exhausted_no_error;
}
TARGET(FOR_ITER_RANGE) {
assert(cframe.use_tracing == 0);
_PyRangeIterObject *r = (_PyRangeIterObject *)TOP();
DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER];
assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST);
if (r->index >= r->len) {
goto iterator_exhausted_no_error;
}
long value = (long)(r->start +
(unsigned long)(r->index++) * r->step);
if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) {
goto error;
}
// The STORE_FAST is already done.
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1);
NOTRACE_DISPATCH();
}
TARGET(BEFORE_ASYNC_WITH) {
PyObject *mgr = TOP();
PyObject *res;

View file

@ -58,34 +58,34 @@ static void *opcode_targets[256] = {
&&TARGET_COMPARE_OP_INT_JUMP,
&&TARGET_COMPARE_OP_STR_JUMP,
&&TARGET_EXTENDED_ARG_QUICK,
&&TARGET_JUMP_BACKWARD_QUICK,
&&TARGET_FOR_ITER_ADAPTIVE,
&&TARGET_STORE_SUBSCR,
&&TARGET_DELETE_SUBSCR,
&&TARGET_FOR_ITER_LIST,
&&TARGET_FOR_ITER_RANGE,
&&TARGET_JUMP_BACKWARD_QUICK,
&&TARGET_LOAD_ATTR_ADAPTIVE,
&&TARGET_LOAD_ATTR_CLASS,
&&TARGET_LOAD_ATTR_INSTANCE_VALUE,
&&TARGET_LOAD_ATTR_MODULE,
&&TARGET_LOAD_ATTR_PROPERTY,
&&TARGET_LOAD_ATTR_SLOT,
&&TARGET_GET_ITER,
&&TARGET_GET_YIELD_FROM_ITER,
&&TARGET_PRINT_EXPR,
&&TARGET_LOAD_BUILD_CLASS,
&&TARGET_LOAD_ATTR_WITH_HINT,
&&TARGET_LOAD_ATTR_METHOD_LAZY_DICT,
&&TARGET_LOAD_ATTR_MODULE,
&&TARGET_LOAD_ATTR_PROPERTY,
&&TARGET_LOAD_ASSERTION_ERROR,
&&TARGET_RETURN_GENERATOR,
&&TARGET_LOAD_ATTR_SLOT,
&&TARGET_LOAD_ATTR_WITH_HINT,
&&TARGET_LOAD_ATTR_METHOD_LAZY_DICT,
&&TARGET_LOAD_ATTR_METHOD_NO_DICT,
&&TARGET_LOAD_ATTR_METHOD_WITH_DICT,
&&TARGET_LOAD_ATTR_METHOD_WITH_VALUES,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_LIST_TO_TUPLE,
&&TARGET_RETURN_VALUE,
&&TARGET_IMPORT_STAR,
&&TARGET_SETUP_ANNOTATIONS,
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_ASYNC_GEN_WRAP,
&&TARGET_PREP_RERAISE_STAR,
&&TARGET_POP_EXCEPT,
@ -112,7 +112,7 @@ static void *opcode_targets[256] = {
&&TARGET_JUMP_FORWARD,
&&TARGET_JUMP_IF_FALSE_OR_POP,
&&TARGET_JUMP_IF_TRUE_OR_POP,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_POP_JUMP_FORWARD_IF_FALSE,
&&TARGET_POP_JUMP_FORWARD_IF_TRUE,
&&TARGET_LOAD_GLOBAL,
@ -120,7 +120,7 @@ static void *opcode_targets[256] = {
&&TARGET_CONTAINS_OP,
&&TARGET_RERAISE,
&&TARGET_COPY,
&&TARGET_LOAD_GLOBAL_MODULE,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_BINARY_OP,
&&TARGET_SEND,
&&TARGET_LOAD_FAST,
@ -140,9 +140,9 @@ static void *opcode_targets[256] = {
&&TARGET_STORE_DEREF,
&&TARGET_DELETE_DEREF,
&&TARGET_JUMP_BACKWARD,
&&TARGET_RESUME_QUICK,
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_CALL_FUNCTION_EX,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_EXTENDED_ARG,
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
@ -152,30 +152,33 @@ static void *opcode_targets[256] = {
&&TARGET_YIELD_VALUE,
&&TARGET_RESUME,
&&TARGET_MATCH_CLASS,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_LOAD_GLOBAL_MODULE,
&&TARGET_RESUME_QUICK,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_STORE_SUBSCR_ADAPTIVE,
&&TARGET_LIST_EXTEND,
&&TARGET_SET_UPDATE,
&&TARGET_DICT_MERGE,
&&TARGET_DICT_UPDATE,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_STORE_SUBSCR_ADAPTIVE,
&&TARGET_STORE_SUBSCR_DICT,
&&TARGET_STORE_SUBSCR_LIST_INT,
&&TARGET_UNPACK_SEQUENCE_ADAPTIVE,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_CALL,
&&TARGET_KW_NAMES,
&&TARGET_POP_JUMP_BACKWARD_IF_NOT_NONE,
&&TARGET_POP_JUMP_BACKWARD_IF_NONE,
&&TARGET_POP_JUMP_BACKWARD_IF_FALSE,
&&TARGET_POP_JUMP_BACKWARD_IF_TRUE,
&&TARGET_UNPACK_SEQUENCE_ADAPTIVE,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&_unknown_opcode,
&&_unknown_opcode,
@ -251,8 +254,5 @@ static void *opcode_targets[256] = {
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_DO_TRACING
};

View file

@ -28,6 +28,7 @@ uint8_t _PyOpcode_Adaptive[256] = {
[BINARY_OP] = BINARY_OP_ADAPTIVE,
[COMPARE_OP] = COMPARE_OP_ADAPTIVE,
[UNPACK_SEQUENCE] = UNPACK_SEQUENCE_ADAPTIVE,
[FOR_ITER] = FOR_ITER_ADAPTIVE,
};
Py_ssize_t _Py_QuickenedCount = 0;
@ -2092,3 +2093,33 @@ int
}
#endif
void
_Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr)
{
assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER);
_PyForIterCache *cache = (_PyForIterCache *)(instr + 1);
PyTypeObject *tp = Py_TYPE(iter);
_Py_CODEUNIT next = instr[1+INLINE_CACHE_ENTRIES_FOR_ITER];
int next_op = _PyOpcode_Deopt[_Py_OPCODE(next)];
if (tp == &PyListIter_Type) {
_Py_SET_OPCODE(*instr, FOR_ITER_LIST);
goto success;
}
else if (tp == &PyRangeIter_Type && next_op == STORE_FAST) {
_Py_SET_OPCODE(*instr, FOR_ITER_RANGE);
goto success;
}
else {
SPECIALIZATION_FAIL(FOR_ITER,
_PySpecialization_ClassifyIterator(iter));
goto failure;
}
failure:
STAT_INC(FOR_ITER, failure);
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(FOR_ITER, success);
cache->counter = miss_counter_start();
}