From 991fede562aaef383ffe2f1fe0514aa93e63c74b Mon Sep 17 00:00:00 2001 From: Rich Chiodo false Date: Tue, 9 Dec 2025 14:36:15 -0800 Subject: [PATCH] Try fixing gw worker failures --- pytest.ini | 2 +- tests/debug/session.py | 20 ++++++++++++++++++-- tests/net.py | 28 ++++++++++++++++++++++++++-- tests/pytest_fixtures.py | 14 +++++++++++--- 4 files changed, 56 insertions(+), 8 deletions(-) diff --git a/pytest.ini b/pytest.ini index 7f3d7d6b..3439ac33 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] testpaths=tests -timeout=60 +timeout=120 timeout_method=thread addopts=-n8 diff --git a/tests/debug/session.py b/tests/debug/session.py index d43ee0c7..0b2bea26 100644 --- a/tests/debug/session.py +++ b/tests/debug/session.py @@ -281,7 +281,11 @@ class Session(object): if self.adapter_endpoints is not None and self.expected_exit_code is not None: log.info("Waiting for {0} to close listener ports ...", self.adapter_id) + timeout_start = time.time() while self.adapter_endpoints.check(): + if time.time() - timeout_start > 10: + log.warning("{0} listener ports did not close within 10 seconds", self.adapter_id) + break time.sleep(0.1) if self.adapter is not None: @@ -290,8 +294,20 @@ class Session(object): self.adapter_id, self.adapter.pid, ) - self.adapter.wait() - watchdog.unregister_spawn(self.adapter.pid, self.adapter_id) + try: + self.adapter.wait(timeout=10) + except Exception: + log.warning("{0} did not exit gracefully within 10 seconds, force-killing", self.adapter_id) + try: + self.adapter.kill() + self.adapter.wait(timeout=5) + except Exception as e: + log.error("Failed to force-kill {0}: {1}", self.adapter_id, e) + + try: + watchdog.unregister_spawn(self.adapter.pid, self.adapter_id) + except Exception as e: + log.warning("Failed to unregister adapter spawn: {0}", e) self.adapter = None if self.backchannel is not None: diff --git a/tests/net.py b/tests/net.py index f248c1a4..2f468050 100644 --- a/tests/net.py +++ b/tests/net.py @@ -17,7 +17,7 @@ from tests.patterns import some used_ports = set() -def get_test_server_port(): +def get_test_server_port(max_retries=10): """Returns a server port number that can be safely used for listening without clashing with another test worker process, when running with pytest-xdist. @@ -27,6 +27,9 @@ def get_test_server_port(): Note that if multiple test workers invoke this function with different ranges that overlap, conflicts are possible! + + Args: + max_retries: Number of times to retry finding an available port """ try: @@ -39,11 +42,32 @@ def get_test_server_port(): ), "Unrecognized PYTEST_XDIST_WORKER format" n = int(worker_id[2:]) + # Try multiple times to find an available port, with retry logic + for attempt in range(max_retries): + port = 5678 + (n * 300) + attempt + while port in used_ports: + port += 1 + + # Verify the port is actually available by trying to bind to it + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + sock.bind(("127.0.0.1", port)) + sock.close() + used_ports.add(port) + log.info("Allocated port {0} for worker {1}", port, n) + return port + except OSError as e: + log.warning("Port {0} unavailable (attempt {1}/{2}): {3}", port, attempt + 1, max_retries, e) + sock.close() + time.sleep(0.1 * (attempt + 1)) # Exponential backoff + + # Fall back to original behavior if all retries fail port = 5678 + (n * 300) while port in used_ports: port += 1 used_ports.add(port) - + log.warning("Using fallback port {0} after {1} retries", port, max_retries) return port diff --git a/tests/pytest_fixtures.py b/tests/pytest_fixtures.py index 27d21f1a..6c99b760 100644 --- a/tests/pytest_fixtures.py +++ b/tests/pytest_fixtures.py @@ -46,19 +46,27 @@ def test_wrapper(request, long_tmpdir): session.Session.reset_counter() - session.Session.tmpdir = long_tmpdir + # Add worker-specific isolation for tmpdir and log directory + try: + worker_id = os.environ.get("PYTEST_XDIST_WORKER", "gw0") + worker_suffix = f"_{worker_id}" + except Exception: + worker_suffix = "" + + session.Session.tmpdir = long_tmpdir / f"session{worker_suffix}" + session.Session.tmpdir.ensure(dir=True) original_log_dir = log.log_dir failed = True try: if log.log_dir is None: - log.log_dir = (long_tmpdir / "debugpy_logs").strpath + log.log_dir = (long_tmpdir / f"debugpy_logs{worker_suffix}").strpath else: log_subdir = request.node.nodeid log_subdir = log_subdir.replace("::", "/") for ch in r":?*|<>": log_subdir = log_subdir.replace(ch, f"&#{ord(ch)};") - log.log_dir += "/" + log_subdir + log.log_dir += "/" + log_subdir + worker_suffix try: py.path.local(log.log_dir).remove()