mirror of
https://github.com/python/cpython.git
synced 2025-10-17 20:28:43 +00:00
Use the faulthandler module's infrastructure to write a GIL-less
memory watchdog for timely stats collection.
This commit is contained in:
parent
031487eb3b
commit
75e78b6c77
2 changed files with 249 additions and 43 deletions
|
@ -23,6 +23,7 @@ import time
|
||||||
import sysconfig
|
import sysconfig
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
|
import struct
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import _thread, threading
|
import _thread, threading
|
||||||
|
@ -34,6 +35,10 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
multiprocessing = None
|
multiprocessing = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import faulthandler
|
||||||
|
except ImportError:
|
||||||
|
faulthandler = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import zlib
|
import zlib
|
||||||
|
@ -1133,41 +1138,66 @@ def set_memlimit(limit):
|
||||||
raise ValueError('Memory limit %r too low to be useful' % (limit,))
|
raise ValueError('Memory limit %r too low to be useful' % (limit,))
|
||||||
max_memuse = memlimit
|
max_memuse = memlimit
|
||||||
|
|
||||||
def _memory_watchdog(start_evt, finish_evt, period=10.0):
|
class _MemoryWatchdog:
|
||||||
"""A function which periodically watches the process' memory consumption
|
"""An object which periodically watches the process' memory consumption
|
||||||
and prints it out.
|
and prints it out.
|
||||||
"""
|
"""
|
||||||
# XXX: because of the GIL, and because the very long operations tested
|
|
||||||
# in most bigmem tests are uninterruptible, the loop below gets woken up
|
def __init__(self):
|
||||||
# much less often than expected.
|
self.procfile = '/proc/{pid}/statm'.format(pid=os.getpid())
|
||||||
# The polling code should be rewritten in raw C, without holding the GIL,
|
self.started = False
|
||||||
# and push results onto an anonymous pipe.
|
self.thread = None
|
||||||
try:
|
try:
|
||||||
page_size = os.sysconf('SC_PAGESIZE')
|
self.page_size = os.sysconf('SC_PAGESIZE')
|
||||||
except (ValueError, AttributeError):
|
except (ValueError, AttributeError):
|
||||||
try:
|
try:
|
||||||
page_size = os.sysconf('SC_PAGE_SIZE')
|
self.page_size = os.sysconf('SC_PAGE_SIZE')
|
||||||
except (ValueError, AttributeError):
|
except (ValueError, AttributeError):
|
||||||
page_size = 4096
|
self.page_size = 4096
|
||||||
procfile = '/proc/{pid}/statm'.format(pid=os.getpid())
|
|
||||||
|
def consumer(self, fd):
|
||||||
|
HEADER = "l"
|
||||||
|
header_size = struct.calcsize(HEADER)
|
||||||
try:
|
try:
|
||||||
f = open(procfile, 'rb')
|
while True:
|
||||||
except IOError as e:
|
header = os.read(fd, header_size)
|
||||||
|
if len(header) < header_size:
|
||||||
|
# Pipe closed on other end
|
||||||
|
break
|
||||||
|
data_len, = struct.unpack(HEADER, header)
|
||||||
|
data = os.read(fd, data_len)
|
||||||
|
statm = data.decode('ascii')
|
||||||
|
data = int(statm.split()[5])
|
||||||
|
print(" ... process data size: {data:.1f}G"
|
||||||
|
.format(data=data * self.page_size / (1024 ** 3)))
|
||||||
|
finally:
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
if not faulthandler or not hasattr(faulthandler, '_file_watchdog'):
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
rfd = os.open(self.procfile, os.O_RDONLY)
|
||||||
|
except OSError as e:
|
||||||
warnings.warn('/proc not available for stats: {}'.format(e),
|
warnings.warn('/proc not available for stats: {}'.format(e),
|
||||||
RuntimeWarning)
|
RuntimeWarning)
|
||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
return
|
return
|
||||||
with f:
|
pipe_fd, wfd = os.pipe()
|
||||||
start_evt.set()
|
# _file_watchdog() doesn't take the GIL in its child thread, and
|
||||||
old_data = -1
|
# therefore collects statistics timely
|
||||||
while not finish_evt.wait(period):
|
faulthandler._file_watchdog(rfd, wfd, 3.0)
|
||||||
f.seek(0)
|
self.started = True
|
||||||
statm = f.read().decode('ascii')
|
self.thread = threading.Thread(target=self.consumer, args=(pipe_fd,))
|
||||||
data = int(statm.split()[5])
|
self.thread.daemon = True
|
||||||
if data != old_data:
|
self.thread.start()
|
||||||
old_data = data
|
|
||||||
print(" ... process data size: {data:.1f}G"
|
def stop(self):
|
||||||
.format(data=data * page_size / (1024 ** 3)))
|
if not self.started:
|
||||||
|
return
|
||||||
|
faulthandler._cancel_file_watchdog()
|
||||||
|
self.thread.join()
|
||||||
|
|
||||||
|
|
||||||
def bigmemtest(size, memuse, dry_run=True):
|
def bigmemtest(size, memuse, dry_run=True):
|
||||||
"""Decorator for bigmem tests.
|
"""Decorator for bigmem tests.
|
||||||
|
@ -1194,27 +1224,20 @@ def bigmemtest(size, memuse, dry_run=True):
|
||||||
"not enough memory: %.1fG minimum needed"
|
"not enough memory: %.1fG minimum needed"
|
||||||
% (size * memuse / (1024 ** 3)))
|
% (size * memuse / (1024 ** 3)))
|
||||||
|
|
||||||
if real_max_memuse and verbose and threading:
|
if real_max_memuse and verbose and faulthandler and threading:
|
||||||
print()
|
print()
|
||||||
print(" ... expected peak memory use: {peak:.1f}G"
|
print(" ... expected peak memory use: {peak:.1f}G"
|
||||||
.format(peak=size * memuse / (1024 ** 3)))
|
.format(peak=size * memuse / (1024 ** 3)))
|
||||||
sys.stdout.flush()
|
watchdog = _MemoryWatchdog()
|
||||||
start_evt = threading.Event()
|
watchdog.start()
|
||||||
finish_evt = threading.Event()
|
|
||||||
t = threading.Thread(target=_memory_watchdog,
|
|
||||||
args=(start_evt, finish_evt, 0.5))
|
|
||||||
t.daemon = True
|
|
||||||
t.start()
|
|
||||||
start_evt.set()
|
|
||||||
else:
|
else:
|
||||||
t = None
|
watchdog = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return f(self, maxsize)
|
return f(self, maxsize)
|
||||||
finally:
|
finally:
|
||||||
if t:
|
if watchdog:
|
||||||
finish_evt.set()
|
watchdog.stop()
|
||||||
t.join()
|
|
||||||
|
|
||||||
wrapper.size = size
|
wrapper.size = size
|
||||||
wrapper.memuse = memuse
|
wrapper.memuse = memuse
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
# define FAULTHANDLER_LATER
|
# define FAULTHANDLER_LATER
|
||||||
|
# define FAULTHANDLER_WATCHDOG
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef MS_WINDOWS
|
#ifndef MS_WINDOWS
|
||||||
|
@ -65,6 +66,20 @@ static struct {
|
||||||
} thread;
|
} thread;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FAULTHANDLER_WATCHDOG
|
||||||
|
static struct {
|
||||||
|
int rfd;
|
||||||
|
int wfd;
|
||||||
|
PY_TIMEOUT_T period_us; /* period in microseconds */
|
||||||
|
/* The main thread always holds this lock. It is only released when
|
||||||
|
faulthandler_watchdog() is interrupted before this thread exits, or at
|
||||||
|
Python exit. */
|
||||||
|
PyThread_type_lock cancel_event;
|
||||||
|
/* released by child thread when joined */
|
||||||
|
PyThread_type_lock running;
|
||||||
|
} watchdog;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FAULTHANDLER_USER
|
#ifdef FAULTHANDLER_USER
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int enabled;
|
int enabled;
|
||||||
|
@ -587,6 +602,138 @@ faulthandler_cancel_dump_tracebacks_later_py(PyObject *self)
|
||||||
}
|
}
|
||||||
#endif /* FAULTHANDLER_LATER */
|
#endif /* FAULTHANDLER_LATER */
|
||||||
|
|
||||||
|
#ifdef FAULTHANDLER_WATCHDOG
|
||||||
|
|
||||||
|
static void
|
||||||
|
file_watchdog(void *unused)
|
||||||
|
{
|
||||||
|
PyLockStatus st;
|
||||||
|
PY_TIMEOUT_T timeout;
|
||||||
|
|
||||||
|
const int MAXDATA = 1024;
|
||||||
|
char buf1[MAXDATA], buf2[MAXDATA];
|
||||||
|
char *data = buf1, *old_data = buf2;
|
||||||
|
Py_ssize_t data_len, old_data_len = -1;
|
||||||
|
|
||||||
|
#if defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
|
||||||
|
sigset_t set;
|
||||||
|
|
||||||
|
/* we don't want to receive any signal */
|
||||||
|
sigfillset(&set);
|
||||||
|
pthread_sigmask(SIG_SETMASK, &set, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* On first pass, feed file contents immediately */
|
||||||
|
timeout = 0;
|
||||||
|
do {
|
||||||
|
st = PyThread_acquire_lock_timed(watchdog.cancel_event,
|
||||||
|
timeout, 0);
|
||||||
|
timeout = watchdog.period_us;
|
||||||
|
if (st == PY_LOCK_ACQUIRED) {
|
||||||
|
PyThread_release_lock(watchdog.cancel_event);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* Timeout => read and write data */
|
||||||
|
assert(st == PY_LOCK_FAILURE);
|
||||||
|
|
||||||
|
if (lseek(watchdog.rfd, 0, SEEK_SET) < 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
data_len = read(watchdog.rfd, data, MAXDATA);
|
||||||
|
if (data_len < 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (data_len != old_data_len || memcmp(data, old_data, data_len)) {
|
||||||
|
char *tdata;
|
||||||
|
Py_ssize_t tlen;
|
||||||
|
/* Contents changed, feed them to wfd */
|
||||||
|
long x = (long) data_len;
|
||||||
|
/* We can't do anything if the consumer is too slow, just bail out */
|
||||||
|
if (write(watchdog.wfd, (void *) &x, sizeof(x)) < sizeof(x))
|
||||||
|
break;
|
||||||
|
if (write(watchdog.wfd, data, data_len) < data_len)
|
||||||
|
break;
|
||||||
|
tdata = data;
|
||||||
|
data = old_data;
|
||||||
|
old_data = tdata;
|
||||||
|
tlen = data_len;
|
||||||
|
data_len = old_data_len;
|
||||||
|
old_data_len = tlen;
|
||||||
|
}
|
||||||
|
} while (1);
|
||||||
|
|
||||||
|
close(watchdog.rfd);
|
||||||
|
close(watchdog.wfd);
|
||||||
|
|
||||||
|
/* The only way out */
|
||||||
|
PyThread_release_lock(watchdog.running);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
cancel_file_watchdog(void)
|
||||||
|
{
|
||||||
|
/* Notify cancellation */
|
||||||
|
PyThread_release_lock(watchdog.cancel_event);
|
||||||
|
|
||||||
|
/* Wait for thread to join */
|
||||||
|
PyThread_acquire_lock(watchdog.running, 1);
|
||||||
|
PyThread_release_lock(watchdog.running);
|
||||||
|
|
||||||
|
/* The main thread should always hold the cancel_event lock */
|
||||||
|
PyThread_acquire_lock(watchdog.cancel_event, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
faulthandler_file_watchdog(PyObject *self,
|
||||||
|
PyObject *args, PyObject *kwargs)
|
||||||
|
{
|
||||||
|
static char *kwlist[] = {"rfd", "wfd", "period", NULL};
|
||||||
|
double period;
|
||||||
|
PY_TIMEOUT_T period_us;
|
||||||
|
int rfd, wfd;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
|
||||||
|
"iid:_file_watchdog", kwlist,
|
||||||
|
&rfd, &wfd, &period))
|
||||||
|
return NULL;
|
||||||
|
if ((period * 1e6) >= (double) PY_TIMEOUT_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "period value is too large");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
period_us = (PY_TIMEOUT_T)(period * 1e6);
|
||||||
|
if (period_us <= 0) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "period must be greater than 0");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Cancel previous thread, if running */
|
||||||
|
cancel_file_watchdog();
|
||||||
|
|
||||||
|
watchdog.rfd = rfd;
|
||||||
|
watchdog.wfd = wfd;
|
||||||
|
watchdog.period_us = period_us;
|
||||||
|
|
||||||
|
/* Arm these locks to serve as events when released */
|
||||||
|
PyThread_acquire_lock(watchdog.running, 1);
|
||||||
|
|
||||||
|
if (PyThread_start_new_thread(file_watchdog, NULL) == -1) {
|
||||||
|
PyThread_release_lock(watchdog.running);
|
||||||
|
PyErr_SetString(PyExc_RuntimeError,
|
||||||
|
"unable to start file watchdog thread");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
faulthandler_cancel_file_watchdog(PyObject *self)
|
||||||
|
{
|
||||||
|
cancel_file_watchdog();
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
#endif /* FAULTHANDLER_WATCHDOG */
|
||||||
|
|
||||||
#ifdef FAULTHANDLER_USER
|
#ifdef FAULTHANDLER_USER
|
||||||
static int
|
static int
|
||||||
faulthandler_register(int signum, int chain, _Py_sighandler_t *p_previous)
|
faulthandler_register(int signum, int chain, _Py_sighandler_t *p_previous)
|
||||||
|
@ -973,6 +1120,18 @@ static PyMethodDef module_methods[] = {
|
||||||
"to dump_tracebacks_later().")},
|
"to dump_tracebacks_later().")},
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FAULTHANDLER_WATCHDOG
|
||||||
|
{"_file_watchdog",
|
||||||
|
(PyCFunction)faulthandler_file_watchdog, METH_VARARGS|METH_KEYWORDS,
|
||||||
|
PyDoc_STR("_file_watchdog(rfd, wfd, period):\n"
|
||||||
|
"feed the contents of 'rfd' to 'wfd', if changed,\n"
|
||||||
|
"every 'period seconds'.")},
|
||||||
|
{"_cancel_file_watchdog",
|
||||||
|
(PyCFunction)faulthandler_cancel_file_watchdog, METH_NOARGS,
|
||||||
|
PyDoc_STR("_cancel_file_watchdog():\ncancel the previous call "
|
||||||
|
"to _file_watchdog().")},
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FAULTHANDLER_USER
|
#ifdef FAULTHANDLER_USER
|
||||||
{"register",
|
{"register",
|
||||||
(PyCFunction)faulthandler_register_py, METH_VARARGS|METH_KEYWORDS,
|
(PyCFunction)faulthandler_register_py, METH_VARARGS|METH_KEYWORDS,
|
||||||
|
@ -1097,6 +1256,16 @@ int _PyFaulthandler_Init(void)
|
||||||
}
|
}
|
||||||
PyThread_acquire_lock(thread.cancel_event, 1);
|
PyThread_acquire_lock(thread.cancel_event, 1);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef FAULTHANDLER_WATCHDOG
|
||||||
|
watchdog.cancel_event = PyThread_allocate_lock();
|
||||||
|
watchdog.running = PyThread_allocate_lock();
|
||||||
|
if (!watchdog.cancel_event || !watchdog.running) {
|
||||||
|
PyErr_SetString(PyExc_RuntimeError,
|
||||||
|
"could not allocate locks for faulthandler");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
PyThread_acquire_lock(watchdog.cancel_event, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
return faulthandler_env_options();
|
return faulthandler_env_options();
|
||||||
}
|
}
|
||||||
|
@ -1121,6 +1290,20 @@ void _PyFaulthandler_Fini(void)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FAULTHANDLER_WATCHDOG
|
||||||
|
/* file watchdog */
|
||||||
|
cancel_file_watchdog();
|
||||||
|
if (watchdog.cancel_event) {
|
||||||
|
PyThread_release_lock(watchdog.cancel_event);
|
||||||
|
PyThread_free_lock(watchdog.cancel_event);
|
||||||
|
watchdog.cancel_event = NULL;
|
||||||
|
}
|
||||||
|
if (watchdog.running) {
|
||||||
|
PyThread_free_lock(watchdog.running);
|
||||||
|
watchdog.running = NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FAULTHANDLER_USER
|
#ifdef FAULTHANDLER_USER
|
||||||
/* user */
|
/* user */
|
||||||
if (user_signals != NULL) {
|
if (user_signals != NULL) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue