mirror of
https://github.com/python/cpython.git
synced 2025-09-30 04:15:43 +00:00
[3.6] bpo-30703: Improve signal delivery (GH-2415) (#2527)
* [3.6] bpo-30703: Improve signal delivery (GH-2415)
* Improve signal delivery
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions.
* Remove unused function
* Improve comments
* Add stress test
* Adapt for --without-threads
* Add second stress test
* Add NEWS blurb
* Address comments @haypo.
(cherry picked from commit c08177a1cc
)
* bpo-30796: Fix failures in signal delivery stress test (#2488)
* bpo-30796: Fix failures in signal delivery stress test
setitimer() can have a poor minimum resolution on some machines,
this would make the test reach its deadline (and a stray signal
could then kill a subsequent test).
* Make sure to clear the itimer after the test
This commit is contained in:
parent
48290c1c30
commit
3024c05290
5 changed files with 207 additions and 38 deletions
|
@ -46,6 +46,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
|
PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
|
||||||
|
PyAPI_FUNC(void) _PyEval_SignalReceived(void);
|
||||||
PyAPI_FUNC(int) Py_MakePendingCalls(void);
|
PyAPI_FUNC(int) Py_MakePendingCalls(void);
|
||||||
|
|
||||||
/* Protection against deeply nested recursive calls
|
/* Protection against deeply nested recursive calls
|
||||||
|
|
|
@ -3,11 +3,13 @@ from test import support
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
import enum
|
import enum
|
||||||
import gc
|
import gc
|
||||||
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
|
import random
|
||||||
import select
|
import select
|
||||||
import signal
|
import signal
|
||||||
import socket
|
import socket
|
||||||
import struct
|
import statistics
|
||||||
import subprocess
|
import subprocess
|
||||||
import traceback
|
import traceback
|
||||||
import sys, os, time, errno
|
import sys, os, time, errno
|
||||||
|
@ -955,6 +957,135 @@ class PendingSignalsTests(unittest.TestCase):
|
||||||
(exitcode, stdout))
|
(exitcode, stdout))
|
||||||
|
|
||||||
|
|
||||||
|
class StressTest(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
Stress signal delivery, especially when a signal arrives in
|
||||||
|
the middle of recomputing the signal state or executing
|
||||||
|
previously tripped signal handlers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setsig(self, signum, handler):
|
||||||
|
old_handler = signal.signal(signum, handler)
|
||||||
|
self.addCleanup(signal.signal, signum, old_handler)
|
||||||
|
|
||||||
|
def measure_itimer_resolution(self):
|
||||||
|
N = 20
|
||||||
|
times = []
|
||||||
|
|
||||||
|
def handler(signum=None, frame=None):
|
||||||
|
if len(times) < N:
|
||||||
|
times.append(time.perf_counter())
|
||||||
|
# 1 µs is the smallest possible timer interval,
|
||||||
|
# we want to measure what the concrete duration
|
||||||
|
# will be on this platform
|
||||||
|
signal.setitimer(signal.ITIMER_REAL, 1e-6)
|
||||||
|
|
||||||
|
self.addCleanup(signal.setitimer, signal.ITIMER_REAL, 0)
|
||||||
|
self.setsig(signal.SIGALRM, handler)
|
||||||
|
handler()
|
||||||
|
while len(times) < N:
|
||||||
|
time.sleep(1e-3)
|
||||||
|
|
||||||
|
durations = [times[i+1] - times[i] for i in range(len(times) - 1)]
|
||||||
|
med = statistics.median(durations)
|
||||||
|
if support.verbose:
|
||||||
|
print("detected median itimer() resolution: %.6f s." % (med,))
|
||||||
|
return med
|
||||||
|
|
||||||
|
def decide_itimer_count(self):
|
||||||
|
# Some systems have poor setitimer() resolution (for example
|
||||||
|
# measured around 20 ms. on FreeBSD 9), so decide on a reasonable
|
||||||
|
# number of sequential timers based on that.
|
||||||
|
reso = self.measure_itimer_resolution()
|
||||||
|
if reso <= 1e-4:
|
||||||
|
return 10000
|
||||||
|
elif reso <= 1e-2:
|
||||||
|
return 100
|
||||||
|
else:
|
||||||
|
self.skipTest("detected itimer resolution (%.3f s.) too high "
|
||||||
|
"(> 10 ms.) on this platform (or system too busy)"
|
||||||
|
% (reso,))
|
||||||
|
|
||||||
|
@unittest.skipUnless(hasattr(signal, "setitimer"),
|
||||||
|
"test needs setitimer()")
|
||||||
|
def test_stress_delivery_dependent(self):
|
||||||
|
"""
|
||||||
|
This test uses dependent signal handlers.
|
||||||
|
"""
|
||||||
|
N = self.decide_itimer_count()
|
||||||
|
sigs = []
|
||||||
|
|
||||||
|
def first_handler(signum, frame):
|
||||||
|
# 1e-6 is the minimum non-zero value for `setitimer()`.
|
||||||
|
# Choose a random delay so as to improve chances of
|
||||||
|
# triggering a race condition. Ideally the signal is received
|
||||||
|
# when inside critical signal-handling routines such as
|
||||||
|
# Py_MakePendingCalls().
|
||||||
|
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
|
||||||
|
|
||||||
|
def second_handler(signum=None, frame=None):
|
||||||
|
sigs.append(signum)
|
||||||
|
|
||||||
|
# Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both
|
||||||
|
# ascending and descending sequences (SIGUSR1 then SIGALRM,
|
||||||
|
# SIGPROF then SIGALRM), we maximize chances of hitting a bug.
|
||||||
|
self.setsig(signal.SIGPROF, first_handler)
|
||||||
|
self.setsig(signal.SIGUSR1, first_handler)
|
||||||
|
self.setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL
|
||||||
|
|
||||||
|
expected_sigs = 0
|
||||||
|
deadline = time.time() + 15.0
|
||||||
|
|
||||||
|
while expected_sigs < N:
|
||||||
|
os.kill(os.getpid(), signal.SIGPROF)
|
||||||
|
expected_sigs += 1
|
||||||
|
# Wait for handlers to run to avoid signal coalescing
|
||||||
|
while len(sigs) < expected_sigs and time.time() < deadline:
|
||||||
|
time.sleep(1e-5)
|
||||||
|
|
||||||
|
os.kill(os.getpid(), signal.SIGUSR1)
|
||||||
|
expected_sigs += 1
|
||||||
|
while len(sigs) < expected_sigs and time.time() < deadline:
|
||||||
|
time.sleep(1e-5)
|
||||||
|
|
||||||
|
# All ITIMER_REAL signals should have been delivered to the
|
||||||
|
# Python handler
|
||||||
|
self.assertEqual(len(sigs), N, "Some signals were lost")
|
||||||
|
|
||||||
|
@unittest.skipUnless(hasattr(signal, "setitimer"),
|
||||||
|
"test needs setitimer()")
|
||||||
|
def test_stress_delivery_simultaneous(self):
|
||||||
|
"""
|
||||||
|
This test uses simultaneous signal handlers.
|
||||||
|
"""
|
||||||
|
N = self.decide_itimer_count()
|
||||||
|
sigs = []
|
||||||
|
|
||||||
|
def handler(signum, frame):
|
||||||
|
sigs.append(signum)
|
||||||
|
|
||||||
|
self.setsig(signal.SIGUSR1, handler)
|
||||||
|
self.setsig(signal.SIGALRM, handler) # for ITIMER_REAL
|
||||||
|
|
||||||
|
expected_sigs = 0
|
||||||
|
deadline = time.time() + 15.0
|
||||||
|
|
||||||
|
while expected_sigs < N:
|
||||||
|
# Hopefully the SIGALRM will be received somewhere during
|
||||||
|
# initial processing of SIGUSR1.
|
||||||
|
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
|
||||||
|
os.kill(os.getpid(), signal.SIGUSR1)
|
||||||
|
|
||||||
|
expected_sigs += 2
|
||||||
|
# Wait for handlers to run to avoid signal coalescing
|
||||||
|
while len(sigs) < expected_sigs and time.time() < deadline:
|
||||||
|
time.sleep(1e-5)
|
||||||
|
|
||||||
|
# All ITIMER_REAL signals should have been delivered to the
|
||||||
|
# Python handler
|
||||||
|
self.assertEqual(len(sigs), N, "Some signals were lost")
|
||||||
|
|
||||||
|
|
||||||
def tearDownModule():
|
def tearDownModule():
|
||||||
support.reap_children()
|
support.reap_children()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
Improve signal delivery.
|
||||||
|
|
||||||
|
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
|
||||||
|
unsafe functions. The tests I'm adding here fail without the rest of the
|
||||||
|
patch, on Linux and OS X. This means our signal delivery logic had defects
|
||||||
|
(some signals could be lost).
|
|
@ -192,12 +192,6 @@ The default handler for SIGINT installed by Python.\n\
|
||||||
It raises KeyboardInterrupt.");
|
It raises KeyboardInterrupt.");
|
||||||
|
|
||||||
|
|
||||||
static int
|
|
||||||
checksignals_witharg(void * unused)
|
|
||||||
{
|
|
||||||
return PyErr_CheckSignals();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
report_wakeup_write_error(void *data)
|
report_wakeup_write_error(void *data)
|
||||||
{
|
{
|
||||||
|
@ -248,17 +242,15 @@ trip_signal(int sig_num)
|
||||||
|
|
||||||
Handlers[sig_num].tripped = 1;
|
Handlers[sig_num].tripped = 1;
|
||||||
|
|
||||||
if (!is_tripped) {
|
|
||||||
/* Set is_tripped after setting .tripped, as it gets
|
/* Set is_tripped after setting .tripped, as it gets
|
||||||
cleared in PyErr_CheckSignals() before .tripped. */
|
cleared in PyErr_CheckSignals() before .tripped. */
|
||||||
is_tripped = 1;
|
is_tripped = 1;
|
||||||
Py_AddPendingCall(checksignals_witharg, NULL);
|
_PyEval_SignalReceived();
|
||||||
}
|
|
||||||
|
|
||||||
/* And then write to the wakeup fd *after* setting all the globals and
|
/* And then write to the wakeup fd *after* setting all the globals and
|
||||||
doing the Py_AddPendingCall. We used to write to the wakeup fd and then
|
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
|
||||||
set the flag, but this allowed the following sequence of events
|
and then set the flag, but this allowed the following sequence of events
|
||||||
(especially on windows, where trip_signal runs in a new thread):
|
(especially on windows, where trip_signal may run in a new thread):
|
||||||
|
|
||||||
- main thread blocks on select([wakeup_fd], ...)
|
- main thread blocks on select([wakeup_fd], ...)
|
||||||
- signal arrives
|
- signal arrives
|
||||||
|
@ -293,6 +285,8 @@ trip_signal(int sig_num)
|
||||||
wakeup.send_err_set = 1;
|
wakeup.send_err_set = 1;
|
||||||
wakeup.send_errno = errno;
|
wakeup.send_errno = errno;
|
||||||
wakeup.send_win_error = GetLastError();
|
wakeup.send_win_error = GetLastError();
|
||||||
|
/* Py_AddPendingCall() isn't signal-safe, but we
|
||||||
|
still use it for this exceptional case. */
|
||||||
Py_AddPendingCall(report_wakeup_send_error, NULL);
|
Py_AddPendingCall(report_wakeup_send_error, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,6 +300,8 @@ trip_signal(int sig_num)
|
||||||
rc = _Py_write_noraise(fd, &byte, 1);
|
rc = _Py_write_noraise(fd, &byte, 1);
|
||||||
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
|
/* Py_AddPendingCall() isn't signal-safe, but we
|
||||||
|
still use it for this exceptional case. */
|
||||||
Py_AddPendingCall(report_wakeup_write_error,
|
Py_AddPendingCall(report_wakeup_write_error,
|
||||||
(void *)(intptr_t)errno);
|
(void *)(intptr_t)errno);
|
||||||
}
|
}
|
||||||
|
@ -1555,8 +1551,10 @@ PyErr_CheckSignals(void)
|
||||||
arglist);
|
arglist);
|
||||||
Py_DECREF(arglist);
|
Py_DECREF(arglist);
|
||||||
}
|
}
|
||||||
if (!result)
|
if (!result) {
|
||||||
|
is_tripped = 1;
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,6 +195,15 @@ PyEval_GetCallStats(PyObject *self)
|
||||||
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
|
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
|
||||||
|
|
||||||
|
|
||||||
|
/* This single variable consolidates all requests to break out of the fast path
|
||||||
|
in the eval loop. */
|
||||||
|
static _Py_atomic_int eval_breaker = {0};
|
||||||
|
/* Request for running pending calls. */
|
||||||
|
static _Py_atomic_int pendingcalls_to_do = {0};
|
||||||
|
/* Request for looking at the `async_exc` field of the current thread state.
|
||||||
|
Guarded by the GIL. */
|
||||||
|
static int pending_async_exc = 0;
|
||||||
|
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
|
|
||||||
#ifdef HAVE_ERRNO_H
|
#ifdef HAVE_ERRNO_H
|
||||||
|
@ -204,16 +213,8 @@ PyEval_GetCallStats(PyObject *self)
|
||||||
|
|
||||||
static PyThread_type_lock pending_lock = 0; /* for pending calls */
|
static PyThread_type_lock pending_lock = 0; /* for pending calls */
|
||||||
static long main_thread = 0;
|
static long main_thread = 0;
|
||||||
/* This single variable consolidates all requests to break out of the fast path
|
|
||||||
in the eval loop. */
|
|
||||||
static _Py_atomic_int eval_breaker = {0};
|
|
||||||
/* Request for dropping the GIL */
|
/* Request for dropping the GIL */
|
||||||
static _Py_atomic_int gil_drop_request = {0};
|
static _Py_atomic_int gil_drop_request = {0};
|
||||||
/* Request for running pending calls. */
|
|
||||||
static _Py_atomic_int pendingcalls_to_do = {0};
|
|
||||||
/* Request for looking at the `async_exc` field of the current thread state.
|
|
||||||
Guarded by the GIL. */
|
|
||||||
static int pending_async_exc = 0;
|
|
||||||
|
|
||||||
#include "ceval_gil.h"
|
#include "ceval_gil.h"
|
||||||
|
|
||||||
|
@ -326,9 +327,6 @@ PyEval_ReInitThreads(void)
|
||||||
_PyThreadState_DeleteExcept(current_tstate);
|
_PyThreadState_DeleteExcept(current_tstate);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
|
||||||
static _Py_atomic_int eval_breaker = {0};
|
|
||||||
static int pending_async_exc = 0;
|
|
||||||
#endif /* WITH_THREAD */
|
#endif /* WITH_THREAD */
|
||||||
|
|
||||||
/* This function is used to signal that async exceptions are waiting to be
|
/* This function is used to signal that async exceptions are waiting to be
|
||||||
|
@ -403,6 +401,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
|
||||||
#endif
|
#endif
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
_PyEval_SignalReceived(void)
|
||||||
|
{
|
||||||
|
/* bpo-30703: Function called when the C signal handler of Python gets a
|
||||||
|
signal. We cannot queue a callback using Py_AddPendingCall() since
|
||||||
|
that function is not async-signal-safe. */
|
||||||
|
SIGNAL_PENDING_CALLS();
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
|
|
||||||
/* The WITH_THREAD implementation is thread-safe. It allows
|
/* The WITH_THREAD implementation is thread-safe. It allows
|
||||||
|
@ -467,6 +474,8 @@ Py_MakePendingCalls(void)
|
||||||
int i;
|
int i;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
|
||||||
|
assert(PyGILState_Check());
|
||||||
|
|
||||||
if (!pending_lock) {
|
if (!pending_lock) {
|
||||||
/* initial allocation of the lock */
|
/* initial allocation of the lock */
|
||||||
pending_lock = PyThread_allocate_lock();
|
pending_lock = PyThread_allocate_lock();
|
||||||
|
@ -481,6 +490,16 @@ Py_MakePendingCalls(void)
|
||||||
if (busy)
|
if (busy)
|
||||||
return 0;
|
return 0;
|
||||||
busy = 1;
|
busy = 1;
|
||||||
|
/* unsignal before starting to call callbacks, so that any callback
|
||||||
|
added in-between re-signals */
|
||||||
|
UNSIGNAL_PENDING_CALLS();
|
||||||
|
|
||||||
|
/* Python signal handler doesn't really queue a callback: it only signals
|
||||||
|
that a signal was received, see _PyEval_SignalReceived(). */
|
||||||
|
if (PyErr_CheckSignals() < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
/* perform a bounded number of calls, in case of recursion */
|
/* perform a bounded number of calls, in case of recursion */
|
||||||
for (i=0; i<NPENDINGCALLS; i++) {
|
for (i=0; i<NPENDINGCALLS; i++) {
|
||||||
int j;
|
int j;
|
||||||
|
@ -497,20 +516,23 @@ Py_MakePendingCalls(void)
|
||||||
arg = pendingcalls[j].arg;
|
arg = pendingcalls[j].arg;
|
||||||
pendingfirst = (j + 1) % NPENDINGCALLS;
|
pendingfirst = (j + 1) % NPENDINGCALLS;
|
||||||
}
|
}
|
||||||
if (pendingfirst != pendinglast)
|
|
||||||
SIGNAL_PENDING_CALLS();
|
|
||||||
else
|
|
||||||
UNSIGNAL_PENDING_CALLS();
|
|
||||||
PyThread_release_lock(pending_lock);
|
PyThread_release_lock(pending_lock);
|
||||||
/* having released the lock, perform the callback */
|
/* having released the lock, perform the callback */
|
||||||
if (func == NULL)
|
if (func == NULL)
|
||||||
break;
|
break;
|
||||||
r = func(arg);
|
r = func(arg);
|
||||||
if (r)
|
if (r) {
|
||||||
break;
|
goto error;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
busy = 0;
|
busy = 0;
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
error:
|
||||||
|
busy = 0;
|
||||||
|
SIGNAL_PENDING_CALLS(); /* We're not done yet */
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* if ! defined WITH_THREAD */
|
#else /* if ! defined WITH_THREAD */
|
||||||
|
@ -545,7 +567,6 @@ static struct {
|
||||||
} pendingcalls[NPENDINGCALLS];
|
} pendingcalls[NPENDINGCALLS];
|
||||||
static volatile int pendingfirst = 0;
|
static volatile int pendingfirst = 0;
|
||||||
static volatile int pendinglast = 0;
|
static volatile int pendinglast = 0;
|
||||||
static _Py_atomic_int pendingcalls_to_do = {0};
|
|
||||||
|
|
||||||
int
|
int
|
||||||
Py_AddPendingCall(int (*func)(void *), void *arg)
|
Py_AddPendingCall(int (*func)(void *), void *arg)
|
||||||
|
@ -579,7 +600,16 @@ Py_MakePendingCalls(void)
|
||||||
if (busy)
|
if (busy)
|
||||||
return 0;
|
return 0;
|
||||||
busy = 1;
|
busy = 1;
|
||||||
|
|
||||||
|
/* unsignal before starting to call callbacks, so that any callback
|
||||||
|
added in-between re-signals */
|
||||||
UNSIGNAL_PENDING_CALLS();
|
UNSIGNAL_PENDING_CALLS();
|
||||||
|
/* Python signal handler doesn't really queue a callback: it only signals
|
||||||
|
that a signal was received, see _PyEval_SignalReceived(). */
|
||||||
|
if (PyErr_CheckSignals() < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int i;
|
int i;
|
||||||
int (*func)(void *);
|
int (*func)(void *);
|
||||||
|
@ -591,13 +621,16 @@ Py_MakePendingCalls(void)
|
||||||
arg = pendingcalls[i].arg;
|
arg = pendingcalls[i].arg;
|
||||||
pendingfirst = (i + 1) % NPENDINGCALLS;
|
pendingfirst = (i + 1) % NPENDINGCALLS;
|
||||||
if (func(arg) < 0) {
|
if (func(arg) < 0) {
|
||||||
busy = 0;
|
goto error:
|
||||||
SIGNAL_PENDING_CALLS(); /* We're not done yet */
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
busy = 0;
|
busy = 0;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
error:
|
||||||
|
busy = 0;
|
||||||
|
SIGNAL_PENDING_CALLS(); /* We're not done yet */
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* WITH_THREAD */
|
#endif /* WITH_THREAD */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue