mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
gh-106149: move jump target resolution from optimizer to assembler (#106150)
This commit is contained in:
parent
eaa1eae55e
commit
5290881009
5 changed files with 113 additions and 105 deletions
|
@ -28,7 +28,7 @@ extern int _PyAST_Optimize(
|
|||
int ff_features);
|
||||
|
||||
typedef struct {
|
||||
int h_offset;
|
||||
int h_label;
|
||||
int h_startdepth;
|
||||
int h_preserve_lasti;
|
||||
} _PyCompile_ExceptHandlerInfo;
|
||||
|
@ -38,6 +38,10 @@ typedef struct {
|
|||
int i_oparg;
|
||||
_PyCompilerSrcLocation i_loc;
|
||||
_PyCompile_ExceptHandlerInfo i_except_handler_info;
|
||||
|
||||
/* Used by the assembler */
|
||||
int i_target;
|
||||
int i_offset;
|
||||
} _PyCompile_Instruction;
|
||||
|
||||
typedef struct {
|
||||
|
@ -85,8 +89,6 @@ int _PyCompile_EnsureArrayLargeEnough(
|
|||
|
||||
int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
|
||||
|
||||
int _PyCompile_InstrSize(int opcode, int oparg);
|
||||
|
||||
/* Access compiler internals for unit testing */
|
||||
|
||||
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
|
||||
|
|
|
@ -55,8 +55,6 @@ typedef struct _PyCfgBasicblock_ {
|
|||
int b_predecessors;
|
||||
/* depth of stack upon entry of block, computed by stackdepth() */
|
||||
int b_startdepth;
|
||||
/* instruction offset for block, computed by assemble_jump_offsets() */
|
||||
int b_offset;
|
||||
/* Basic block is an exception handler that preserves lasti */
|
||||
unsigned b_preserve_lasti : 1;
|
||||
/* Used by compiler passes to mark whether they have visited a basic block. */
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "pycore_code.h" // write_location_entry_start()
|
||||
#include "pycore_compile.h"
|
||||
#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
|
||||
#include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE
|
||||
#include "opcode_metadata.h" // IS_PSEUDO_INSTR
|
||||
|
||||
|
||||
|
@ -34,6 +35,18 @@ same_location(location a, location b)
|
|||
a.end_col_offset == b.end_col_offset;
|
||||
}
|
||||
|
||||
static int
|
||||
instr_size(instruction *instr)
|
||||
{
|
||||
int opcode = instr->i_opcode;
|
||||
int oparg = instr->i_oparg;
|
||||
assert(!IS_PSEUDO_INSTR(opcode));
|
||||
assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
|
||||
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
|
||||
int caches = _PyOpcode_Caches[opcode];
|
||||
return extended_args + 1 + caches;
|
||||
}
|
||||
|
||||
struct assembler {
|
||||
PyObject *a_bytecode; /* bytes containing bytecode */
|
||||
int a_offset; /* offset into bytecode */
|
||||
|
@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
|
|||
|
||||
static int
|
||||
assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
|
||||
int handler_offset,
|
||||
_PyCompile_ExceptHandlerInfo *handler)
|
||||
{
|
||||
Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
|
||||
|
@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
|
|||
}
|
||||
int size = end-start;
|
||||
assert(end > start);
|
||||
int target = handler->h_offset;
|
||||
int target = handler_offset;
|
||||
int depth = handler->h_startdepth - 1;
|
||||
if (handler->h_preserve_lasti > 0) {
|
||||
depth -= 1;
|
||||
|
@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs)
|
|||
{
|
||||
int ioffset = 0;
|
||||
_PyCompile_ExceptHandlerInfo handler;
|
||||
handler.h_offset = -1;
|
||||
handler.h_label = -1;
|
||||
handler.h_startdepth = -1;
|
||||
handler.h_preserve_lasti = -1;
|
||||
int start = -1;
|
||||
for (int i = 0; i < instrs->s_used; i++) {
|
||||
instruction *instr = &instrs->s_instrs[i];
|
||||
if (instr->i_except_handler_info.h_offset != handler.h_offset) {
|
||||
if (handler.h_offset >= 0) {
|
||||
if (instr->i_except_handler_info.h_label != handler.h_label) {
|
||||
if (handler.h_label >= 0) {
|
||||
int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
|
||||
RETURN_IF_ERROR(
|
||||
assemble_emit_exception_table_entry(a, start, ioffset, &handler));
|
||||
assemble_emit_exception_table_entry(a, start, ioffset,
|
||||
handler_offset,
|
||||
&handler));
|
||||
}
|
||||
start = ioffset;
|
||||
handler = instr->i_except_handler_info;
|
||||
}
|
||||
ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
|
||||
ioffset += instr_size(instr);
|
||||
}
|
||||
if (handler.h_offset >= 0) {
|
||||
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
|
||||
if (handler.h_label >= 0) {
|
||||
int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
|
||||
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset,
|
||||
handler_offset,
|
||||
&handler));
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs,
|
|||
loc = instr->i_loc;
|
||||
size = 0;
|
||||
}
|
||||
size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
|
||||
size += instr_size(instr);
|
||||
}
|
||||
RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
|
||||
return SUCCESS;
|
||||
|
@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr)
|
|||
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
|
||||
_Py_CODEUNIT *code;
|
||||
|
||||
int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
|
||||
int size = instr_size(instr);
|
||||
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
|
||||
if (len > PY_SSIZE_T_MAX / 2) {
|
||||
return ERROR;
|
||||
|
@ -585,12 +605,83 @@ error:
|
|||
return co;
|
||||
}
|
||||
|
||||
static int
|
||||
resolve_jump_offsets(instr_sequence *instrs)
|
||||
{
|
||||
/* Compute the size of each instruction and fixup jump args.
|
||||
* Replace instruction index with position in bytecode.
|
||||
*/
|
||||
|
||||
for (int i = 0; i < instrs->s_used; i++) {
|
||||
instruction *instr = &instrs->s_instrs[i];
|
||||
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
|
||||
instr->i_target = instr->i_oparg;
|
||||
}
|
||||
}
|
||||
|
||||
int extended_arg_recompile;
|
||||
|
||||
do {
|
||||
int totsize = 0;
|
||||
for (int i = 0; i < instrs->s_used; i++) {
|
||||
instruction *instr = &instrs->s_instrs[i];
|
||||
instr->i_offset = totsize;
|
||||
int isize = instr_size(instr);
|
||||
totsize += isize;
|
||||
}
|
||||
extended_arg_recompile = 0;
|
||||
|
||||
int offset = 0;
|
||||
for (int i = 0; i < instrs->s_used; i++) {
|
||||
instruction *instr = &instrs->s_instrs[i];
|
||||
int isize = instr_size(instr);
|
||||
/* jump offsets are computed relative to
|
||||
* the instruction pointer after fetching
|
||||
* the jump instruction.
|
||||
*/
|
||||
offset += isize;
|
||||
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
|
||||
instruction *target = &instrs->s_instrs[instr->i_target];
|
||||
instr->i_oparg = target->i_offset;
|
||||
if (instr->i_oparg < offset) {
|
||||
assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
|
||||
instr->i_oparg = offset - instr->i_oparg;
|
||||
}
|
||||
else {
|
||||
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
|
||||
instr->i_oparg = instr->i_oparg - offset;
|
||||
}
|
||||
if (instr_size(instr) != isize) {
|
||||
extended_arg_recompile = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* XXX: This is an awful hack that could hurt performance, but
|
||||
on the bright side it should work until we come up
|
||||
with a better solution.
|
||||
|
||||
The issue is that in the first loop instr_size() is
|
||||
called, and it requires i_oparg be set appropriately.
|
||||
There is a bootstrap problem because i_oparg is
|
||||
calculated in the second loop above.
|
||||
|
||||
So we loop until we stop seeing new EXTENDED_ARGs.
|
||||
The only EXTENDED_ARGs that could be popping up are
|
||||
ones in jump instructions. So this should converge
|
||||
fairly quickly.
|
||||
*/
|
||||
} while (extended_arg_recompile);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
|
||||
PyObject *consts, int maxdepth, instr_sequence *instrs,
|
||||
int nlocalsplus, int code_flags, PyObject *filename)
|
||||
{
|
||||
if (resolve_jump_offsets(instrs) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
PyCodeObject *co = NULL;
|
||||
|
||||
struct assembler a;
|
||||
|
|
|
@ -131,16 +131,6 @@ enum {
|
|||
};
|
||||
|
||||
|
||||
int
|
||||
_PyCompile_InstrSize(int opcode, int oparg)
|
||||
{
|
||||
assert(!IS_PSEUDO_INSTR(opcode));
|
||||
assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
|
||||
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
|
||||
int caches = _PyOpcode_Caches[opcode];
|
||||
return extended_args + 1 + caches;
|
||||
}
|
||||
|
||||
typedef _PyCompile_Instruction instruction;
|
||||
typedef _PyCompile_InstructionSequence instr_sequence;
|
||||
|
||||
|
@ -7717,17 +7707,20 @@ cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq)
|
|||
RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id));
|
||||
for (int i = 0; i < b->b_iused; i++) {
|
||||
cfg_instr *instr = &b->b_instr[i];
|
||||
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
|
||||
instr->i_oparg = instr->i_target->b_label.id;
|
||||
}
|
||||
RETURN_IF_ERROR(
|
||||
instr_sequence_addop(seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
|
||||
|
||||
_PyCompile_ExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
|
||||
if (instr->i_except != NULL) {
|
||||
hi->h_offset = instr->i_except->b_offset;
|
||||
hi->h_label = instr->i_except->b_label.id;
|
||||
hi->h_startdepth = instr->i_except->b_startdepth;
|
||||
hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
|
||||
}
|
||||
else {
|
||||
hi->h_offset = -1;
|
||||
hi->h_label = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,22 +166,6 @@ _PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) {
|
|||
return SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
instr_size(cfg_instr *instruction)
|
||||
{
|
||||
return _PyCompile_InstrSize(instruction->i_opcode, instruction->i_oparg);
|
||||
}
|
||||
|
||||
static int
|
||||
blocksize(basicblock *b)
|
||||
{
|
||||
int size = 0;
|
||||
for (int i = 0; i < b->b_iused; i++) {
|
||||
size += instr_size(&b->b_instr[i]);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/* For debugging purposes only */
|
||||
#if 0
|
||||
static void
|
||||
|
@ -212,9 +196,9 @@ static void
|
|||
dump_basicblock(const basicblock *b)
|
||||
{
|
||||
const char *b_return = basicblock_returns(b) ? "return " : "";
|
||||
fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n",
|
||||
fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, %s\n",
|
||||
b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
|
||||
b->b_startdepth, b->b_offset, b_return);
|
||||
b->b_startdepth, b_return);
|
||||
if (b->b_instr) {
|
||||
int i;
|
||||
for (i = 0; i < b->b_iused; i++) {
|
||||
|
@ -480,71 +464,11 @@ normalize_jumps(_PyCfgBuilder *g)
|
|||
return SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
resolve_jump_offsets(basicblock *entryblock)
|
||||
{
|
||||
int bsize, totsize, extended_arg_recompile;
|
||||
|
||||
/* Compute the size of each block and fixup jump args.
|
||||
Replace block pointer with position in bytecode. */
|
||||
do {
|
||||
totsize = 0;
|
||||
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
|
||||
bsize = blocksize(b);
|
||||
b->b_offset = totsize;
|
||||
totsize += bsize;
|
||||
}
|
||||
extended_arg_recompile = 0;
|
||||
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
|
||||
bsize = b->b_offset;
|
||||
for (int i = 0; i < b->b_iused; i++) {
|
||||
cfg_instr *instr = &b->b_instr[i];
|
||||
int isize = instr_size(instr);
|
||||
/* jump offsets are computed relative to
|
||||
* the instruction pointer after fetching
|
||||
* the jump instruction.
|
||||
*/
|
||||
bsize += isize;
|
||||
if (is_jump(instr)) {
|
||||
instr->i_oparg = instr->i_target->b_offset;
|
||||
if (instr->i_oparg < bsize) {
|
||||
assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
|
||||
instr->i_oparg = bsize - instr->i_oparg;
|
||||
}
|
||||
else {
|
||||
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
|
||||
instr->i_oparg -= bsize;
|
||||
}
|
||||
if (instr_size(instr) != isize) {
|
||||
extended_arg_recompile = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX: This is an awful hack that could hurt performance, but
|
||||
on the bright side it should work until we come up
|
||||
with a better solution.
|
||||
|
||||
The issue is that in the first loop blocksize() is called
|
||||
which calls instr_size() which requires i_oparg be set
|
||||
appropriately. There is a bootstrap problem because
|
||||
i_oparg is calculated in the second loop above.
|
||||
|
||||
So we loop until we stop seeing new EXTENDED_ARGs.
|
||||
The only EXTENDED_ARGs that could be popping up are
|
||||
ones in jump instructions. So this should converge
|
||||
fairly quickly.
|
||||
*/
|
||||
} while (extended_arg_recompile);
|
||||
}
|
||||
|
||||
int
|
||||
_PyCfg_ResolveJumps(_PyCfgBuilder *g)
|
||||
{
|
||||
RETURN_IF_ERROR(normalize_jumps(g));
|
||||
assert(no_redundant_jumps(g));
|
||||
resolve_jump_offsets(g->g_entryblock);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue