GH-88116: Use a compact format to represent end line and column offsets. (GH-91666)

* Stores all location info in linetable to conform to PEP 626.

* Remove column table from code objects.

* Remove end-line table from code objects.

* Document new location table format
This commit is contained in:
Mark Shannon 2022-04-21 16:10:37 +01:00 committed by GitHub
parent 2a5f171759
commit 944fffee89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 859 additions and 539 deletions

View file

@ -7053,25 +7053,23 @@ compiler_match(struct compiler *c, stmt_ty s)
XXX must handle implicit jumps from one block to next
*/
struct assembler {
PyObject *a_bytecode; /* bytes containing bytecode */
PyObject *a_lnotab; /* bytes containing lnotab */
PyObject *a_enotab; /* bytes containing enotab */
PyObject *a_cnotab; /* bytes containing cnotab */
PyObject *a_except_table; /* bytes containing exception table */
basicblock *a_entry;
int a_offset; /* offset into bytecode */
int a_nblocks; /* number of reachable blocks */
int a_except_table_off; /* offset into exception table */
int a_lnotab_off; /* offset into lnotab */
int a_enotab_off; /* offset into enotab */
int a_cnotab_off; /* offset into cnotab */
int a_prevlineno; /* lineno of last emitted line in line table */
int a_prev_end_lineno; /* end_lineno of last emitted line in line table */
int a_lineno; /* lineno of last emitted instruction */
int a_end_lineno; /* end_lineno of last emitted instruction */
int a_lineno_start; /* bytecode start offset of current lineno */
int a_end_lineno_start; /* bytecode start offset of current end_lineno */
/* Location Info */
PyObject* a_linetable; /* bytes containing location info */
int a_location_off; /* offset of last written location info frame */
};
Py_LOCAL_INLINE(void)
@ -7169,25 +7167,15 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno)
memset(a, 0, sizeof(struct assembler));
a->a_prevlineno = a->a_lineno = firstlineno;
a->a_prev_end_lineno = a->a_end_lineno = firstlineno;
a->a_lnotab = NULL;
a->a_enotab = NULL;
a->a_cnotab = NULL;
a->a_cnotab_off = 0;
a->a_linetable = NULL;
a->a_location_off = 0;
a->a_except_table = NULL;
a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE);
if (a->a_bytecode == NULL) {
goto error;
}
a->a_lnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
if (a->a_lnotab == NULL) {
goto error;
}
a->a_enotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
if (a->a_enotab == NULL) {
goto error;
}
a->a_cnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE);
if (a->a_cnotab == NULL) {
a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE);
if (a->a_linetable == NULL) {
goto error;
}
a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
@ -7201,9 +7189,7 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno)
return 1;
error:
Py_XDECREF(a->a_bytecode);
Py_XDECREF(a->a_lnotab);
Py_XDECREF(a->a_enotab);
Py_XDECREF(a->a_cnotab);
Py_XDECREF(a->a_linetable);
Py_XDECREF(a->a_except_table);
return 0;
}
@ -7212,9 +7198,7 @@ static void
assemble_free(struct assembler *a)
{
Py_XDECREF(a->a_bytecode);
Py_XDECREF(a->a_lnotab);
Py_XDECREF(a->a_enotab);
Py_XDECREF(a->a_cnotab);
Py_XDECREF(a->a_linetable);
Py_XDECREF(a->a_except_table);
}
@ -7230,25 +7214,6 @@ blocksize(basicblock *b)
return size;
}
static int
assemble_emit_table_pair(struct assembler* a, PyObject** table, int* offset,
int left, int right)
{
Py_ssize_t len = PyBytes_GET_SIZE(*table);
if (*offset + 2 >= len) {
if (_PyBytes_Resize(table, len * 2) < 0)
return 0;
}
unsigned char* table_entry = (unsigned char*)PyBytes_AS_STRING(*table);
table_entry += *offset;
*offset += 2;
*table_entry++ = left;
*table_entry++ = right;
return 1;
}
static basicblock *
push_except_block(ExceptStack *stack, struct instr *setup) {
assert(is_block_push(setup));
@ -7492,118 +7457,153 @@ assemble_exception_table(struct assembler *a)
return 1;
}
/* Appends a range to the end of the line number table. See
* Objects/lnotab_notes.txt for the description of the line number table. */
/* Code location emitting code. See locations.md for a description of the format. */
#define MSB 0x80
static void
write_location_byte(struct assembler* a, int val)
{
PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255;
a->a_location_off++;
}
static uint8_t *
location_pointer(struct assembler* a)
{
return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) +
a->a_location_off;
}
static void
write_location_first_byte(struct assembler* a, int code, int length)
{
a->a_location_off += write_location_entry_start(
location_pointer(a), code, length);
}
static void
write_location_varint(struct assembler* a, unsigned int val)
{
uint8_t *ptr = location_pointer(a);
a->a_location_off += write_varint(ptr, val);
}
static void
write_location_signed_varint(struct assembler* a, int val)
{
uint8_t *ptr = location_pointer(a);
a->a_location_off += write_signed_varint(ptr, val);
}
static void
write_location_info_short_form(struct assembler* a, int length, int column, int end_column)
{
assert(length > 0 && length <= 8);
int column_low_bits = column & 7;
int column_group = column >> 3;
assert(column < 80);
assert(end_column - column < 16);
write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length);
write_location_byte(a, (column_low_bits << 4) | (end_column - column));
}
static void
write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column)
{
assert(length > 0 && length <= 8);
assert(line_delta >= 0 && line_delta < 3);
assert(column < 128);
assert(end_column < 128);
write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length);
write_location_byte(a, column);
write_location_byte(a, end_column);
}
static void
write_location_info_long_form(struct assembler* a, struct instr* i, int length)
{
assert(length > 0 && length <= 8);
write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length);
write_location_signed_varint(a, i->i_lineno - a->a_lineno);
assert(i->i_end_lineno >= i->i_lineno);
write_location_varint(a, i->i_end_lineno - i->i_lineno);
write_location_varint(a, i->i_col_offset+1);
write_location_varint(a, i->i_end_col_offset+1);
}
static void
write_location_info_none(struct assembler* a, int length)
{
write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length);
}
static void
write_location_info_no_column(struct assembler* a, int length, int line_delta)
{
write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length);
write_location_signed_varint(a, line_delta);
}
#define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */
static int
assemble_line_range(struct assembler* a, int current, PyObject** table,
int* prev, int* start, int* offset)
write_location_info_entry(struct assembler* a, struct instr* i, int isize)
{
int ldelta, bdelta;
bdelta = (a->a_offset - *start) * sizeof(_Py_CODEUNIT);
if (bdelta == 0) {
return 1;
}
if (current < 0) {
ldelta = -128;
}
else {
ldelta = current - *prev;
*prev = current;
while (ldelta > 127) {
if (!assemble_emit_table_pair(a, table, offset, 0, 127)) {
return 0;
}
ldelta -= 127;
}
while (ldelta < -127) {
if (!assemble_emit_table_pair(a, table, offset, 0, -127)) {
return 0;
}
ldelta += 127;
}
}
assert(-128 <= ldelta && ldelta < 128);
while (bdelta > 254) {
if (!assemble_emit_table_pair(a, table, offset, 254, ldelta)) {
Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable);
if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) {
assert(len > THEORETICAL_MAX_ENTRY_SIZE);
if (_PyBytes_Resize(&a->a_linetable, len*2) < 0) {
return 0;
}
ldelta = current < 0 ? -128 : 0;
bdelta -= 254;
}
if (!assemble_emit_table_pair(a, table, offset, bdelta, ldelta)) {
return 0;
}
*start = a->a_offset;
return 1;
}
static int
assemble_start_line_range(struct assembler* a) {
return assemble_line_range(a, a->a_lineno, &a->a_lnotab,
&a->a_prevlineno, &a->a_lineno_start, &a->a_lnotab_off);
}
static int
assemble_end_line_range(struct assembler* a) {
return assemble_line_range(a, a->a_end_lineno, &a->a_enotab,
&a->a_prev_end_lineno, &a->a_end_lineno_start, &a->a_enotab_off);
}
static int
assemble_lnotab(struct assembler* a, struct instr* i)
{
if (i->i_lineno == a->a_lineno) {
if (i->i_lineno < 0) {
write_location_info_none(a, isize);
return 1;
}
if (!assemble_start_line_range(a)) {
return 0;
int line_delta = i->i_lineno - a->a_lineno;
int column = i->i_col_offset;
int end_column = i->i_end_col_offset;
assert(column >= -1);
assert(end_column >= -1);
if (column < 0 || end_column < 0) {
if (i->i_end_lineno == i->i_lineno || i->i_end_lineno == -1) {
write_location_info_no_column(a, isize, line_delta);
a->a_lineno = i->i_lineno;
return 1;
}
}
else if (i->i_end_lineno == i->i_lineno) {
if (line_delta == 0 && column < 80 && end_column - column < 16) {
write_location_info_short_form(a, isize, column, end_column);
return 1;
}
if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) {
write_location_info_oneline_form(a, isize, line_delta, column, end_column);
a->a_lineno = i->i_lineno;
return 1;
}
}
write_location_info_long_form(a, i, isize);
a->a_lineno = i->i_lineno;
return 1;
}
static int
assemble_enotab(struct assembler* a, struct instr* i)
assemble_emit_location(struct assembler* a, struct instr* i)
{
if (i->i_end_lineno == a->a_end_lineno) {
return 1;
}
if (!assemble_end_line_range(a)) {
return 0;
}
a->a_end_lineno = i->i_end_lineno;
return 1;
}
static int
assemble_cnotab(struct assembler* a, struct instr* i, int instr_size)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_cnotab);
int difference = instr_size * 2;
if (a->a_cnotab_off + difference >= len) {
if (_PyBytes_Resize(&a->a_cnotab, difference + (len * 2)) < 0) {
int isize = instr_size(i);
while (isize > 8) {
if (!write_location_info_entry(a, i, 8)) {
return 0;
}
isize -= 8;
}
unsigned char* cnotab = (unsigned char*)PyBytes_AS_STRING(a->a_cnotab);
cnotab += a->a_cnotab_off;
a->a_cnotab_off += difference;
for (int j = 0; j < instr_size; j++) {
if (i->i_col_offset > 255 || i->i_end_col_offset > 255) {
*cnotab++ = 0;
*cnotab++ = 0;
continue;
}
*cnotab++ = i->i_col_offset + 1;
*cnotab++ = i->i_end_col_offset + 1;
}
return 1;
return write_location_info_entry(a, i, isize);
}
/* assemble_emit()
Extend the bytecode with a new instruction.
Update lnotab if necessary.
@ -7616,15 +7616,6 @@ assemble_emit(struct assembler *a, struct instr *i)
_Py_CODEUNIT *code;
int size = instr_size(i);
if (i->i_lineno && !assemble_lnotab(a, i)) {
return 0;
}
if (!assemble_enotab(a, i)) {
return 0;
}
if (!assemble_cnotab(a, i, size)) {
return 0;
}
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
if (len > PY_SSIZE_T_MAX / 2)
return 0;
@ -7976,9 +7967,7 @@ makecode(struct compiler *c, struct assembler *a, PyObject *constslist,
.code = a->a_bytecode,
.firstlineno = c->u->u_firstlineno,
.linetable = a->a_lnotab,
.endlinetable = a->a_enotab,
.columntable = a->a_cnotab,
.linetable = a->a_linetable,
.consts = consts,
.names = names,
@ -8413,6 +8402,14 @@ assemble(struct compiler *c, int addNone)
goto error;
}
/* Emit location info */
a.a_lineno = c->u->u_firstlineno;
for(b = entryblock; b != NULL; b = b->b_next) {
for (j = 0; j < b->b_iused; j++)
if (!assemble_emit_location(&a, &b->b_instr[j]))
goto error;
}
if (!assemble_exception_table(&a)) {
goto error;
}
@ -8422,30 +8419,14 @@ assemble(struct compiler *c, int addNone)
if (!merge_const_one(c, &a.a_except_table)) {
goto error;
}
if (!assemble_start_line_range(&a)) {
return 0;
}
if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) {
if (_PyBytes_Resize(&a.a_linetable, a.a_location_off) < 0) {
goto error;
}
if (!merge_const_one(c, &a.a_lnotab)) {
goto error;
}
if (!assemble_end_line_range(&a)) {
return 0;
}
if (_PyBytes_Resize(&a.a_enotab, a.a_enotab_off) < 0) {
goto error;
}
if (!merge_const_one(c, &a.a_enotab)) {
goto error;
}
if (_PyBytes_Resize(&a.a_cnotab, a.a_cnotab_off) < 0) {
goto error;
}
if (!merge_const_one(c, &a.a_cnotab)) {
if (!merge_const_one(c, &a.a_linetable)) {
goto error;
}
if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) {
goto error;
}