gh-136396: Include instrumentation when creating new copies of the bytecode (#136525)

Previously, we assumed that instrumentation would happen for all copies of
the bytecode if the instrumentation version on the code object didn't match
the per-interpreter instrumentation version. That assumption was incorrect:
instrumentation will exit early if there are no new "events," even if there
is an instrumentation version mismatch.

To fix this, include the instrumented opcodes when creating new copies of
the bytecode, rather than replacing them with their uninstrumented variants.
I don't think we have to worry about races between instrumentation and creating
new copies of the bytecode: instrumentation and new bytecode creation cannot happen
concurrently. Instrumentation requires that either the world is stopped or the
code object's per-object lock is held and new bytecode creation requires holding
the code object's per-object lock.
This commit is contained in:
mpage 2025-07-14 10:48:10 -07:00 committed by GitHub
parent 3d8c38f6db
commit d995922198
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 161 additions and 1 deletions

View file

@ -2,10 +2,12 @@
environment to verify things are thread-safe in a free-threaded build"""
import sys
import threading
import time
import unittest
import weakref
from contextlib import contextmanager
from sys import monitoring
from test.support import threading_helper
from threading import Thread, _PyRLock, Barrier
@ -192,6 +194,16 @@ class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
self.set = not self.set
class TraceBuf:
def __init__(self):
self.traces = []
self.traces_lock = threading.Lock()
def append(self, trace):
with self.traces_lock:
self.traces.append(trace)
@threading_helper.requires_working_threading()
class MonitoringMisc(MonitoringTestMixin, TestCase):
def register_callback(self, barrier):
@ -246,6 +258,135 @@ class MonitoringMisc(MonitoringTestMixin, TestCase):
finally:
sys.settrace(None)
def test_toggle_setprofile_no_new_events(self):
# gh-136396: Make sure that profile functions are called for newly
# created threads when profiling is toggled but the set of monitoring
# events doesn't change
traces = []
def profiler(frame, event, arg):
traces.append((frame.f_code.co_name, event, arg))
def a(x, y):
return b(x, y)
def b(x, y):
return max(x, y)
sys.setprofile(profiler)
try:
a(1, 2)
finally:
sys.setprofile(None)
traces.clear()
def thread_main(x, y):
sys.setprofile(profiler)
try:
a(x, y)
finally:
sys.setprofile(None)
t = Thread(target=thread_main, args=(100, 200))
t.start()
t.join()
expected = [
("a", "call", None),
("b", "call", None),
("b", "c_call", max),
("b", "c_return", max),
("b", "return", 200),
("a", "return", 200),
("thread_main", "c_call", sys.setprofile),
]
self.assertEqual(traces, expected)
def observe_threads(self, observer, buf):
def in_child(ident):
return ident
def child(ident):
with observer():
in_child(ident)
def in_parent(ident):
return ident
def parent(barrier, ident):
barrier.wait()
with observer():
t = Thread(target=child, args=(ident,))
t.start()
t.join()
in_parent(ident)
num_threads = 5
barrier = Barrier(num_threads)
threads = []
for i in range(num_threads):
t = Thread(target=parent, args=(barrier, i))
t.start()
threads.append(t)
for t in threads:
t.join()
for i in range(num_threads):
self.assertIn(("in_parent", "return", i), buf.traces)
self.assertIn(("in_child", "return", i), buf.traces)
def test_profile_threads(self):
buf = TraceBuf()
def profiler(frame, event, arg):
buf.append((frame.f_code.co_name, event, arg))
@contextmanager
def profile():
sys.setprofile(profiler)
try:
yield
finally:
sys.setprofile(None)
self.observe_threads(profile, buf)
def test_trace_threads(self):
buf = TraceBuf()
def tracer(frame, event, arg):
buf.append((frame.f_code.co_name, event, arg))
return tracer
@contextmanager
def trace():
sys.settrace(tracer)
try:
yield
finally:
sys.settrace(None)
self.observe_threads(trace, buf)
def test_monitor_threads(self):
buf = TraceBuf()
def monitor_py_return(code, off, retval):
buf.append((code.co_name, "return", retval))
monitoring.register_callback(
self.tool_id, monitoring.events.PY_RETURN, monitor_py_return
)
monitoring.set_events(
self.tool_id, monitoring.events.PY_RETURN
)
@contextmanager
def noop():
yield
self.observe_threads(noop, buf)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,2 @@
Fix issue where per-thread bytecode was not instrumented for newly created
threads.

View file

@ -3330,12 +3330,29 @@ _PyCodeArray_New(Py_ssize_t size)
return arr;
}
// Get the underlying code unit, leaving instrumentation
static _Py_CODEUNIT
deopt_code_unit(PyCodeObject *code, int i)
{
_Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i;
_Py_CODEUNIT inst = {
.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
int opcode = inst.op.code;
if (opcode < MIN_INSTRUMENTED_OPCODE) {
inst.op.code = _PyOpcode_Deopt[opcode];
assert(inst.op.code < MIN_SPECIALIZED_OPCODE);
}
// JIT should not be enabled with free-threading
assert(inst.op.code != ENTER_EXECUTOR);
return inst;
}
static void
copy_code(_Py_CODEUNIT *dst, PyCodeObject *co)
{
int code_len = (int) Py_SIZE(co);
for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) {
dst[i] = _Py_GetBaseCodeUnit(co, i);
dst[i] = deopt_code_unit(co, i);
}
_PyCode_Quicken(dst, code_len, 1);
}