GH-135379: Support limited scalar replacement for replicated uops in the JIT code generator. (GH-135563)

* Use it to support efficient specializations of COPY and SWAP in the JIT.
This commit is contained in:
Mark Shannon 2025-06-17 13:43:09 +01:00 committed by GitHub
parent a9e66a7c50
commit 8dd8b5c2f0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 313 additions and 198 deletions

View file

@ -86,86 +86,89 @@ extern "C" {
#define _CONTAINS_OP_DICT 362
#define _CONTAINS_OP_SET 363
#define _CONVERT_VALUE CONVERT_VALUE
#define _COPY COPY
#define _COPY 364
#define _COPY_1 365
#define _COPY_2 366
#define _COPY_3 367
#define _COPY_FREE_VARS COPY_FREE_VARS
#define _CREATE_INIT_FRAME 364
#define _CREATE_INIT_FRAME 368
#define _DELETE_ATTR DELETE_ATTR
#define _DELETE_DEREF DELETE_DEREF
#define _DELETE_FAST DELETE_FAST
#define _DELETE_GLOBAL DELETE_GLOBAL
#define _DELETE_NAME DELETE_NAME
#define _DELETE_SUBSCR DELETE_SUBSCR
#define _DEOPT 365
#define _DEOPT 369
#define _DICT_MERGE DICT_MERGE
#define _DICT_UPDATE DICT_UPDATE
#define _DO_CALL 366
#define _DO_CALL_FUNCTION_EX 367
#define _DO_CALL_KW 368
#define _DO_CALL 370
#define _DO_CALL_FUNCTION_EX 371
#define _DO_CALL_KW 372
#define _END_FOR END_FOR
#define _END_SEND END_SEND
#define _ERROR_POP_N 369
#define _ERROR_POP_N 373
#define _EXIT_INIT_CHECK EXIT_INIT_CHECK
#define _EXPAND_METHOD 370
#define _EXPAND_METHOD_KW 371
#define _FATAL_ERROR 372
#define _EXPAND_METHOD 374
#define _EXPAND_METHOD_KW 375
#define _FATAL_ERROR 376
#define _FORMAT_SIMPLE FORMAT_SIMPLE
#define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
#define _FOR_ITER 373
#define _FOR_ITER_GEN_FRAME 374
#define _FOR_ITER_TIER_TWO 375
#define _FOR_ITER 377
#define _FOR_ITER_GEN_FRAME 378
#define _FOR_ITER_TIER_TWO 379
#define _GET_AITER GET_AITER
#define _GET_ANEXT GET_ANEXT
#define _GET_AWAITABLE GET_AWAITABLE
#define _GET_ITER GET_ITER
#define _GET_LEN GET_LEN
#define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
#define _GUARD_BINARY_OP_EXTEND 376
#define _GUARD_CALLABLE_ISINSTANCE 377
#define _GUARD_CALLABLE_LEN 378
#define _GUARD_CALLABLE_LIST_APPEND 379
#define _GUARD_CALLABLE_STR_1 380
#define _GUARD_CALLABLE_TUPLE_1 381
#define _GUARD_CALLABLE_TYPE_1 382
#define _GUARD_DORV_NO_DICT 383
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384
#define _GUARD_GLOBALS_VERSION 385
#define _GUARD_IS_FALSE_POP 386
#define _GUARD_IS_NONE_POP 387
#define _GUARD_IS_NOT_NONE_POP 388
#define _GUARD_IS_TRUE_POP 389
#define _GUARD_KEYS_VERSION 390
#define _GUARD_NOS_DICT 391
#define _GUARD_NOS_FLOAT 392
#define _GUARD_NOS_INT 393
#define _GUARD_NOS_LIST 394
#define _GUARD_NOS_NOT_NULL 395
#define _GUARD_NOS_NULL 396
#define _GUARD_NOS_TUPLE 397
#define _GUARD_NOS_UNICODE 398
#define _GUARD_NOT_EXHAUSTED_LIST 399
#define _GUARD_NOT_EXHAUSTED_RANGE 400
#define _GUARD_NOT_EXHAUSTED_TUPLE 401
#define _GUARD_THIRD_NULL 402
#define _GUARD_TOS_ANY_SET 403
#define _GUARD_TOS_DICT 404
#define _GUARD_TOS_FLOAT 405
#define _GUARD_TOS_INT 406
#define _GUARD_TOS_LIST 407
#define _GUARD_TOS_SLICE 408
#define _GUARD_TOS_TUPLE 409
#define _GUARD_TOS_UNICODE 410
#define _GUARD_TYPE_VERSION 411
#define _GUARD_TYPE_VERSION_AND_LOCK 412
#define _GUARD_BINARY_OP_EXTEND 380
#define _GUARD_CALLABLE_ISINSTANCE 381
#define _GUARD_CALLABLE_LEN 382
#define _GUARD_CALLABLE_LIST_APPEND 383
#define _GUARD_CALLABLE_STR_1 384
#define _GUARD_CALLABLE_TUPLE_1 385
#define _GUARD_CALLABLE_TYPE_1 386
#define _GUARD_DORV_NO_DICT 387
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 388
#define _GUARD_GLOBALS_VERSION 389
#define _GUARD_IS_FALSE_POP 390
#define _GUARD_IS_NONE_POP 391
#define _GUARD_IS_NOT_NONE_POP 392
#define _GUARD_IS_TRUE_POP 393
#define _GUARD_KEYS_VERSION 394
#define _GUARD_NOS_DICT 395
#define _GUARD_NOS_FLOAT 396
#define _GUARD_NOS_INT 397
#define _GUARD_NOS_LIST 398
#define _GUARD_NOS_NOT_NULL 399
#define _GUARD_NOS_NULL 400
#define _GUARD_NOS_TUPLE 401
#define _GUARD_NOS_UNICODE 402
#define _GUARD_NOT_EXHAUSTED_LIST 403
#define _GUARD_NOT_EXHAUSTED_RANGE 404
#define _GUARD_NOT_EXHAUSTED_TUPLE 405
#define _GUARD_THIRD_NULL 406
#define _GUARD_TOS_ANY_SET 407
#define _GUARD_TOS_DICT 408
#define _GUARD_TOS_FLOAT 409
#define _GUARD_TOS_INT 410
#define _GUARD_TOS_LIST 411
#define _GUARD_TOS_SLICE 412
#define _GUARD_TOS_TUPLE 413
#define _GUARD_TOS_UNICODE 414
#define _GUARD_TYPE_VERSION 415
#define _GUARD_TYPE_VERSION_AND_LOCK 416
#define _IMPORT_FROM IMPORT_FROM
#define _IMPORT_NAME IMPORT_NAME
#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413
#define _INIT_CALL_PY_EXACT_ARGS 414
#define _INIT_CALL_PY_EXACT_ARGS_0 415
#define _INIT_CALL_PY_EXACT_ARGS_1 416
#define _INIT_CALL_PY_EXACT_ARGS_2 417
#define _INIT_CALL_PY_EXACT_ARGS_3 418
#define _INIT_CALL_PY_EXACT_ARGS_4 419
#define _INSERT_NULL 420
#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 417
#define _INIT_CALL_PY_EXACT_ARGS 418
#define _INIT_CALL_PY_EXACT_ARGS_0 419
#define _INIT_CALL_PY_EXACT_ARGS_1 420
#define _INIT_CALL_PY_EXACT_ARGS_2 421
#define _INIT_CALL_PY_EXACT_ARGS_3 422
#define _INIT_CALL_PY_EXACT_ARGS_4 423
#define _INSERT_NULL 424
#define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
#define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION
#define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD
@ -175,171 +178,173 @@ extern "C" {
#define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
#define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
#define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
#define _IS_NONE 421
#define _IS_NONE 425
#define _IS_OP IS_OP
#define _ITER_CHECK_LIST 422
#define _ITER_CHECK_RANGE 423
#define _ITER_CHECK_TUPLE 424
#define _ITER_JUMP_LIST 425
#define _ITER_JUMP_RANGE 426
#define _ITER_JUMP_TUPLE 427
#define _ITER_NEXT_LIST 428
#define _ITER_NEXT_LIST_TIER_TWO 429
#define _ITER_NEXT_RANGE 430
#define _ITER_NEXT_TUPLE 431
#define _JUMP_TO_TOP 432
#define _ITER_CHECK_LIST 426
#define _ITER_CHECK_RANGE 427
#define _ITER_CHECK_TUPLE 428
#define _ITER_JUMP_LIST 429
#define _ITER_JUMP_RANGE 430
#define _ITER_JUMP_TUPLE 431
#define _ITER_NEXT_LIST 432
#define _ITER_NEXT_LIST_TIER_TWO 433
#define _ITER_NEXT_RANGE 434
#define _ITER_NEXT_TUPLE 435
#define _JUMP_TO_TOP 436
#define _LIST_APPEND LIST_APPEND
#define _LIST_EXTEND LIST_EXTEND
#define _LOAD_ATTR 433
#define _LOAD_ATTR_CLASS 434
#define _LOAD_ATTR 437
#define _LOAD_ATTR_CLASS 438
#define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
#define _LOAD_ATTR_INSTANCE_VALUE 435
#define _LOAD_ATTR_METHOD_LAZY_DICT 436
#define _LOAD_ATTR_METHOD_NO_DICT 437
#define _LOAD_ATTR_METHOD_WITH_VALUES 438
#define _LOAD_ATTR_MODULE 439
#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440
#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441
#define _LOAD_ATTR_PROPERTY_FRAME 442
#define _LOAD_ATTR_SLOT 443
#define _LOAD_ATTR_WITH_HINT 444
#define _LOAD_ATTR_INSTANCE_VALUE 439
#define _LOAD_ATTR_METHOD_LAZY_DICT 440
#define _LOAD_ATTR_METHOD_NO_DICT 441
#define _LOAD_ATTR_METHOD_WITH_VALUES 442
#define _LOAD_ATTR_MODULE 443
#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 444
#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 445
#define _LOAD_ATTR_PROPERTY_FRAME 446
#define _LOAD_ATTR_SLOT 447
#define _LOAD_ATTR_WITH_HINT 448
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
#define _LOAD_BYTECODE 445
#define _LOAD_BYTECODE 449
#define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
#define _LOAD_CONST LOAD_CONST
#define _LOAD_CONST_INLINE 446
#define _LOAD_CONST_INLINE_BORROW 447
#define _LOAD_CONST_UNDER_INLINE 448
#define _LOAD_CONST_UNDER_INLINE_BORROW 449
#define _LOAD_CONST_INLINE 450
#define _LOAD_CONST_INLINE_BORROW 451
#define _LOAD_CONST_UNDER_INLINE 452
#define _LOAD_CONST_UNDER_INLINE_BORROW 453
#define _LOAD_DEREF LOAD_DEREF
#define _LOAD_FAST 450
#define _LOAD_FAST_0 451
#define _LOAD_FAST_1 452
#define _LOAD_FAST_2 453
#define _LOAD_FAST_3 454
#define _LOAD_FAST_4 455
#define _LOAD_FAST_5 456
#define _LOAD_FAST_6 457
#define _LOAD_FAST_7 458
#define _LOAD_FAST 454
#define _LOAD_FAST_0 455
#define _LOAD_FAST_1 456
#define _LOAD_FAST_2 457
#define _LOAD_FAST_3 458
#define _LOAD_FAST_4 459
#define _LOAD_FAST_5 460
#define _LOAD_FAST_6 461
#define _LOAD_FAST_7 462
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
#define _LOAD_FAST_BORROW 459
#define _LOAD_FAST_BORROW_0 460
#define _LOAD_FAST_BORROW_1 461
#define _LOAD_FAST_BORROW_2 462
#define _LOAD_FAST_BORROW_3 463
#define _LOAD_FAST_BORROW_4 464
#define _LOAD_FAST_BORROW_5 465
#define _LOAD_FAST_BORROW_6 466
#define _LOAD_FAST_BORROW_7 467
#define _LOAD_FAST_BORROW 463
#define _LOAD_FAST_BORROW_0 464
#define _LOAD_FAST_BORROW_1 465
#define _LOAD_FAST_BORROW_2 466
#define _LOAD_FAST_BORROW_3 467
#define _LOAD_FAST_BORROW_4 468
#define _LOAD_FAST_BORROW_5 469
#define _LOAD_FAST_BORROW_6 470
#define _LOAD_FAST_BORROW_7 471
#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
#define _LOAD_GLOBAL 468
#define _LOAD_GLOBAL_BUILTINS 469
#define _LOAD_GLOBAL_MODULE 470
#define _LOAD_GLOBAL 472
#define _LOAD_GLOBAL_BUILTINS 473
#define _LOAD_GLOBAL_MODULE 474
#define _LOAD_LOCALS LOAD_LOCALS
#define _LOAD_NAME LOAD_NAME
#define _LOAD_SMALL_INT 471
#define _LOAD_SMALL_INT_0 472
#define _LOAD_SMALL_INT_1 473
#define _LOAD_SMALL_INT_2 474
#define _LOAD_SMALL_INT_3 475
#define _LOAD_SPECIAL 476
#define _LOAD_SMALL_INT 475
#define _LOAD_SMALL_INT_0 476
#define _LOAD_SMALL_INT_1 477
#define _LOAD_SMALL_INT_2 478
#define _LOAD_SMALL_INT_3 479
#define _LOAD_SPECIAL 480
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
#define _MAKE_CALLARGS_A_TUPLE 477
#define _MAKE_CALLARGS_A_TUPLE 481
#define _MAKE_CELL MAKE_CELL
#define _MAKE_FUNCTION MAKE_FUNCTION
#define _MAKE_WARM 478
#define _MAKE_WARM 482
#define _MAP_ADD MAP_ADD
#define _MATCH_CLASS MATCH_CLASS
#define _MATCH_KEYS MATCH_KEYS
#define _MATCH_MAPPING MATCH_MAPPING
#define _MATCH_SEQUENCE MATCH_SEQUENCE
#define _MAYBE_EXPAND_METHOD 479
#define _MAYBE_EXPAND_METHOD_KW 480
#define _MONITOR_CALL 481
#define _MONITOR_CALL_KW 482
#define _MONITOR_JUMP_BACKWARD 483
#define _MONITOR_RESUME 484
#define _MAYBE_EXPAND_METHOD 483
#define _MAYBE_EXPAND_METHOD_KW 484
#define _MONITOR_CALL 485
#define _MONITOR_CALL_KW 486
#define _MONITOR_JUMP_BACKWARD 487
#define _MONITOR_RESUME 488
#define _NOP NOP
#define _POP_CALL 485
#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486
#define _POP_CALL_ONE 487
#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488
#define _POP_CALL_TWO 489
#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490
#define _POP_CALL 489
#define _POP_CALL_LOAD_CONST_INLINE_BORROW 490
#define _POP_CALL_ONE 491
#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 492
#define _POP_CALL_TWO 493
#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 494
#define _POP_EXCEPT POP_EXCEPT
#define _POP_ITER POP_ITER
#define _POP_JUMP_IF_FALSE 491
#define _POP_JUMP_IF_TRUE 492
#define _POP_JUMP_IF_FALSE 495
#define _POP_JUMP_IF_TRUE 496
#define _POP_TOP POP_TOP
#define _POP_TOP_LOAD_CONST_INLINE 493
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494
#define _POP_TWO 495
#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496
#define _POP_TOP_LOAD_CONST_INLINE 497
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 498
#define _POP_TWO 499
#define _POP_TWO_LOAD_CONST_INLINE_BORROW 500
#define _PUSH_EXC_INFO PUSH_EXC_INFO
#define _PUSH_FRAME 497
#define _PUSH_FRAME 501
#define _PUSH_NULL PUSH_NULL
#define _PUSH_NULL_CONDITIONAL 498
#define _PY_FRAME_GENERAL 499
#define _PY_FRAME_KW 500
#define _QUICKEN_RESUME 501
#define _REPLACE_WITH_TRUE 502
#define _PUSH_NULL_CONDITIONAL 502
#define _PY_FRAME_GENERAL 503
#define _PY_FRAME_KW 504
#define _QUICKEN_RESUME 505
#define _REPLACE_WITH_TRUE 506
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
#define _SAVE_RETURN_OFFSET 503
#define _SEND 504
#define _SEND_GEN_FRAME 505
#define _SAVE_RETURN_OFFSET 507
#define _SEND 508
#define _SEND_GEN_FRAME 509
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
#define _START_EXECUTOR 506
#define _STORE_ATTR 507
#define _STORE_ATTR_INSTANCE_VALUE 508
#define _STORE_ATTR_SLOT 509
#define _STORE_ATTR_WITH_HINT 510
#define _START_EXECUTOR 510
#define _STORE_ATTR 511
#define _STORE_ATTR_INSTANCE_VALUE 512
#define _STORE_ATTR_SLOT 513
#define _STORE_ATTR_WITH_HINT 514
#define _STORE_DEREF STORE_DEREF
#define _STORE_FAST 511
#define _STORE_FAST_0 512
#define _STORE_FAST_1 513
#define _STORE_FAST_2 514
#define _STORE_FAST_3 515
#define _STORE_FAST_4 516
#define _STORE_FAST_5 517
#define _STORE_FAST_6 518
#define _STORE_FAST_7 519
#define _STORE_FAST 515
#define _STORE_FAST_0 516
#define _STORE_FAST_1 517
#define _STORE_FAST_2 518
#define _STORE_FAST_3 519
#define _STORE_FAST_4 520
#define _STORE_FAST_5 521
#define _STORE_FAST_6 522
#define _STORE_FAST_7 523
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
#define _STORE_SLICE 520
#define _STORE_SUBSCR 521
#define _STORE_SUBSCR_DICT 522
#define _STORE_SUBSCR_LIST_INT 523
#define _SWAP SWAP
#define _TIER2_RESUME_CHECK 524
#define _TO_BOOL 525
#define _STORE_SLICE 524
#define _STORE_SUBSCR 525
#define _STORE_SUBSCR_DICT 526
#define _STORE_SUBSCR_LIST_INT 527
#define _SWAP 528
#define _SWAP_2 529
#define _SWAP_3 530
#define _TIER2_RESUME_CHECK 531
#define _TO_BOOL 532
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
#define _TO_BOOL_LIST 526
#define _TO_BOOL_LIST 533
#define _TO_BOOL_NONE TO_BOOL_NONE
#define _TO_BOOL_STR 527
#define _TO_BOOL_STR 534
#define _UNARY_INVERT UNARY_INVERT
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
#define _UNPACK_SEQUENCE 528
#define _UNPACK_SEQUENCE_LIST 529
#define _UNPACK_SEQUENCE_TUPLE 530
#define _UNPACK_SEQUENCE_TWO_TUPLE 531
#define _UNPACK_SEQUENCE 535
#define _UNPACK_SEQUENCE_LIST 536
#define _UNPACK_SEQUENCE_TUPLE 537
#define _UNPACK_SEQUENCE_TWO_TUPLE 538
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
#define MAX_UOP_ID 531
#define MAX_UOP_ID 538
#ifdef __cplusplus
}

View file

@ -12,7 +12,8 @@ extern "C" {
#include <stdint.h>
#include "pycore_uop_ids.h"
extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];
extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];
typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;
extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];
extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
extern int _PyUop_num_popped(int opcode, int oparg);
@ -288,8 +289,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_COPY_1] = HAS_PURE_FLAG,
[_COPY_2] = HAS_PURE_FLAG,
[_COPY_3] = HAS_PURE_FLAG,
[_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG,
[_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_SWAP_2] = HAS_PURE_FLAG,
[_SWAP_3] = HAS_PURE_FLAG,
[_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG,
[_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG,
[_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG,
@ -323,12 +329,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG,
};
const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {
[_LOAD_FAST] = 8,
[_LOAD_FAST_BORROW] = 8,
[_LOAD_SMALL_INT] = 4,
[_STORE_FAST] = 8,
[_INIT_CALL_PY_EXACT_ARGS] = 5,
const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {
[_LOAD_FAST] = { 0, 8 },
[_LOAD_FAST_BORROW] = { 0, 8 },
[_LOAD_SMALL_INT] = { 0, 4 },
[_STORE_FAST] = { 0, 8 },
[_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 },
[_COPY] = { 1, 4 },
[_SWAP] = { 2, 4 },
};
const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
@ -408,6 +416,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_CONTAINS_OP_SET] = "_CONTAINS_OP_SET",
[_CONVERT_VALUE] = "_CONVERT_VALUE",
[_COPY] = "_COPY",
[_COPY_1] = "_COPY_1",
[_COPY_2] = "_COPY_2",
[_COPY_3] = "_COPY_3",
[_COPY_FREE_VARS] = "_COPY_FREE_VARS",
[_CREATE_INIT_FRAME] = "_CREATE_INIT_FRAME",
[_DELETE_ATTR] = "_DELETE_ATTR",
@ -617,6 +628,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT",
[_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT",
[_SWAP] = "_SWAP",
[_SWAP_2] = "_SWAP_2",
[_SWAP_3] = "_SWAP_3",
[_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK",
[_TO_BOOL] = "_TO_BOOL",
[_TO_BOOL_BOOL] = "_TO_BOOL_BOOL",
@ -1176,10 +1189,20 @@ int _PyUop_num_popped(int opcode, int oparg)
return 1;
case _FORMAT_WITH_SPEC:
return 2;
case _COPY_1:
return 0;
case _COPY_2:
return 0;
case _COPY_3:
return 0;
case _COPY:
return 0;
case _BINARY_OP:
return 2;
case _SWAP_2:
return 0;
case _SWAP_3:
return 0;
case _SWAP:
return 0;
case _GUARD_IS_TRUE_POP:

View file

@ -4946,8 +4946,7 @@ dummy_func(
res = PyStackRef_FromPyObjectSteal(res_o);
}
pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
assert(oparg > 0);
pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
top = PyStackRef_DUP(bottom);
}
@ -4980,12 +4979,11 @@ dummy_func(
macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP;
pure inst(SWAP, (bottom, unused[oparg-2], top --
pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
bottom, unused[oparg-2], top)) {
_PyStackRef temp = bottom;
bottom = top;
top = temp;
assert(oparg >= 2);
}
inst(INSTRUMENTED_LINE, ( -- )) {

View file

@ -6763,12 +6763,44 @@
break;
}
case _COPY_1: {
_PyStackRef bottom;
_PyStackRef top;
bottom = stack_pointer[-1];
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _COPY_2: {
_PyStackRef bottom;
_PyStackRef top;
bottom = stack_pointer[-2];
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _COPY_3: {
_PyStackRef bottom;
_PyStackRef top;
bottom = stack_pointer[-3];
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _COPY: {
_PyStackRef bottom;
_PyStackRef top;
oparg = CURRENT_OPARG();
bottom = stack_pointer[-1 - (oparg-1)];
assert(oparg > 0);
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
@ -6808,6 +6840,32 @@
break;
}
case _SWAP_2: {
_PyStackRef top;
_PyStackRef bottom;
top = stack_pointer[-1];
bottom = stack_pointer[-2];
_PyStackRef temp = bottom;
bottom = top;
top = temp;
stack_pointer[-2] = bottom;
stack_pointer[-1] = top;
break;
}
case _SWAP_3: {
_PyStackRef top;
_PyStackRef bottom;
top = stack_pointer[-1];
bottom = stack_pointer[-3];
_PyStackRef temp = bottom;
bottom = top;
top = temp;
stack_pointer[-3] = bottom;
stack_pointer[-1] = top;
break;
}
case _SWAP: {
_PyStackRef top;
_PyStackRef bottom;
@ -6817,7 +6875,6 @@
_PyStackRef temp = bottom;
bottom = top;
top = temp;
assert(oparg >= 2);
stack_pointer[-2 - (oparg-2)] = bottom;
stack_pointer[-1] = top;
break;

View file

@ -5228,7 +5228,6 @@
_PyStackRef bottom;
_PyStackRef top;
bottom = stack_pointer[-1 - (oparg-1)];
assert(oparg > 0);
top = PyStackRef_DUP(bottom);
stack_pointer[0] = top;
stack_pointer += 1;
@ -11568,7 +11567,6 @@
_PyStackRef temp = bottom;
bottom = top;
top = temp;
assert(oparg >= 2);
stack_pointer[-2 - (oparg-2)] = bottom;
stack_pointer[-1] = top;
DISPATCH();

View file

@ -1292,8 +1292,8 @@ uop_optimize(
for (int pc = 0; pc < length; pc++) {
int opcode = buffer[pc].opcode;
int oparg = buffer[pc].oparg;
if (oparg < _PyUop_Replication[opcode]) {
buffer[pc].opcode = opcode + oparg + 1;
if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) {
buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start;
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
}
else if (is_terminator(&buffer[pc])) {

View file

@ -180,7 +180,7 @@ class Uop:
properties: Properties
_size: int = -1
implicitly_created: bool = False
replicated = 0
replicated = range(0)
replicates: "Uop | None" = None
# Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
instruction_size: int | None = None
@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
needs_prev=variable_used(op, "prev_instr"),
)
def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
# Only replace array item with scalar if no more than one item is an array
index = -1
for i, item in enumerate(items):
if "oparg" in item.size:
if index >= 0:
return items
index = i
if index < 0:
return items
try:
count = int(eval(items[index].size.replace("oparg", str(oparg))))
except ValueError:
return items
return items[:index] + [
StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count)
] + items[index+1:]
def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
stack.inputs = expand(stack.inputs, oparg)
stack.outputs = expand(stack.outputs, oparg)
return stack
def make_uop(
name: str,
@ -887,20 +909,26 @@ def make_uop(
)
for anno in op.annotations:
if anno.startswith("replicate"):
result.replicated = int(anno[10:-1])
text = anno[10:-1]
start, stop = text.split(":")
result.replicated = range(int(start), int(stop))
break
else:
return result
for oparg in range(result.replicated):
for oparg in result.replicated:
name_x = name + "_" + str(oparg)
properties = compute_properties(op)
properties.oparg = False
properties.const_oparg = oparg
stack = analyze_stack(op)
if not variable_used(op, "oparg"):
stack = scalarize_stack(stack, oparg)
else:
properties.const_oparg = oparg
rep = Uop(
name=name_x,
context=op.context,
annotations=op.annotations,
stack=analyze_stack(op),
stack=stack,
caches=analyze_caches(inputs),
local_stores=find_variable_stores(op),
body=op.block,

View file

@ -379,9 +379,13 @@ class Parser(PLexer):
while anno := self.expect(lx.ANNOTATION):
if anno.text == "replicate":
self.require(lx.LPAREN)
times = self.require(lx.NUMBER)
stop = self.require(lx.NUMBER)
start_text = "0"
if self.expect(lx.COLON):
start_text = stop.text
stop = self.require(lx.NUMBER)
self.require(lx.RPAREN)
annotations.append(f"replicate({times.text})")
annotations.append(f"replicate({start_text}:{stop.text})")
else:
annotations.append(anno.text)
tkn = self.expect(lx.INST)

View file

@ -24,7 +24,8 @@ DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_uop_metadata.h"
def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n")
out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n")
out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
out.emit("#ifdef NEED_OPCODE_METADATA\n")
@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
out.emit("};\n\n")
out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
for uop in analysis.uops.values():
if uop.replicated:
out.emit(f"[{uop.name}] = {uop.replicated},\n")
assert(uop.replicated.step == 1)
out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n")
out.emit("};\n\n")
out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")