mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
gh-108724: Add PyMutex and _PyParkingLot APIs (gh-109344)
PyMutex is a one byte lock with fast, inlineable lock and unlock functions for the common uncontended case. The design is based on WebKit's WTF::Lock. PyMutex is built using the _PyParkingLot APIs, which provides a cross-platform futex-like API (based on WebKit's WTF::ParkingLot). This internal API will be used for building other synchronization primitives used to implement PEP 703, such as one-time initialization and events. This also includes tests and a mini benchmark in Tools/lockbench/lockbench.py to compare with the existing PyThread_type_lock. Uncontended acquisition + release: * Linux (x86-64): PyMutex: 11 ns, PyThread_type_lock: 44 ns * macOS (arm64): PyMutex: 13 ns, PyThread_type_lock: 18 ns * Windows (x86-64): PyMutex: 13 ns, PyThread_type_lock: 38 ns PR Overview: The primary purpose of this PR is to implement PyMutex, but there are a number of support pieces (described below). * PyMutex: A 1-byte lock that doesn't require memory allocation to initialize and is generally faster than the existing PyThread_type_lock. The API is internal only for now. * _PyParking_Lot: A futex-like API based on the API of the same name in WebKit. Used to implement PyMutex. * _PyRawMutex: A word sized lock used to implement _PyParking_Lot. * PyEvent: A one time event. This was used a bunch in the "nogil" fork and is useful for testing the PyMutex implementation, so I've included it as part of the PR. * pycore_llist.h: Defines common operations on doubly-linked list. Not strictly necessary (could do the list operations manually), but they come up frequently in the "nogil" fork. ( Similar to https://man.freebsd.org/cgi/man.cgi?queue) --------- Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
parent
0a31ff0050
commit
0c89056fe5
29 changed files with 1665 additions and 21 deletions
|
@ -48,6 +48,7 @@
|
|||
#include "pytypedefs.h"
|
||||
#include "pybuffer.h"
|
||||
#include "pystats.h"
|
||||
#include "pyatomic.h"
|
||||
#include "object.h"
|
||||
#include "objimpl.h"
|
||||
#include "typeslots.h"
|
||||
|
|
|
@ -83,9 +83,9 @@
|
|||
// # release
|
||||
// ...
|
||||
|
||||
#ifndef Py_ATOMIC_H
|
||||
#define Py_ATOMIC_H
|
||||
|
||||
#ifndef Py_CPYTHON_ATOMIC_H
|
||||
# error "this header file must not be included directly"
|
||||
#endif
|
||||
|
||||
// --- _Py_atomic_add --------------------------------------------------------
|
||||
// Atomically adds `value` to `obj` and returns the previous value
|
||||
|
@ -501,6 +501,3 @@ static inline void _Py_atomic_fence_release(void);
|
|||
#else
|
||||
# error "no available pyatomic implementation for this platform/compiler"
|
||||
#endif
|
||||
|
||||
#endif /* Py_ATOMIC_H */
|
||||
|
||||
|
|
|
@ -906,7 +906,7 @@ _Py_atomic_store_ptr_release(void *obj, void *value)
|
|||
#if defined(_M_X64) || defined(_M_IX86)
|
||||
*(void * volatile *)obj = value;
|
||||
#elif defined(_M_ARM64)
|
||||
__stlr64(obj, (uintptr_t)value);
|
||||
__stlr64((unsigned __int64 volatile *)obj, (uintptr_t)value);
|
||||
#else
|
||||
# error "no implementation of _Py_atomic_store_ptr_release"
|
||||
#endif
|
||||
|
|
107
Include/internal/pycore_llist.h
Normal file
107
Include/internal/pycore_llist.h
Normal file
|
@ -0,0 +1,107 @@
|
|||
// A doubly-linked list that can be embedded in a struct.
|
||||
//
|
||||
// Usage:
|
||||
// struct llist_node head = LLIST_INIT(head);
|
||||
// typedef struct {
|
||||
// ...
|
||||
// struct llist_node node;
|
||||
// ...
|
||||
// } MyObj;
|
||||
//
|
||||
// llist_insert_tail(&head, &obj->node);
|
||||
// llist_remove(&obj->node);
|
||||
//
|
||||
// struct llist_node *node;
|
||||
// llist_for_each(node, &head) {
|
||||
// MyObj *obj = llist_data(node, MyObj, node);
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
|
||||
#ifndef Py_INTERNAL_LLIST_H
|
||||
#define Py_INTERNAL_LLIST_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "Py_BUILD_CORE must be defined to include this header"
|
||||
#endif
|
||||
|
||||
struct llist_node {
|
||||
struct llist_node *next;
|
||||
struct llist_node *prev;
|
||||
};
|
||||
|
||||
// Get the struct containing a node.
|
||||
#define llist_data(node, type, member) \
|
||||
(type*)((char*)node - offsetof(type, member))
|
||||
|
||||
// Iterate over a list.
|
||||
#define llist_for_each(node, head) \
|
||||
for (node = (head)->next; node != (head); node = node->next)
|
||||
|
||||
// Iterate over a list, but allow removal of the current node.
|
||||
#define llist_for_each_safe(node, head) \
|
||||
for (struct llist_node *_next = (node = (head)->next, node->next); \
|
||||
node != (head); node = _next, _next = node->next)
|
||||
|
||||
#define LLIST_INIT(head) { &head, &head }
|
||||
|
||||
static inline void
|
||||
llist_init(struct llist_node *head)
|
||||
{
|
||||
head->next = head;
|
||||
head->prev = head;
|
||||
}
|
||||
|
||||
// Returns 1 if the list is empty, 0 otherwise.
|
||||
static inline int
|
||||
llist_empty(struct llist_node *head)
|
||||
{
|
||||
return head->next == head;
|
||||
}
|
||||
|
||||
// Appends to the tail of the list.
|
||||
static inline void
|
||||
llist_insert_tail(struct llist_node *head, struct llist_node *node)
|
||||
{
|
||||
node->prev = head->prev;
|
||||
node->next = head;
|
||||
head->prev->next = node;
|
||||
head->prev = node;
|
||||
}
|
||||
|
||||
// Remove a node from the list.
|
||||
static inline void
|
||||
llist_remove(struct llist_node *node)
|
||||
{
|
||||
struct llist_node *prev = node->prev;
|
||||
struct llist_node *next = node->next;
|
||||
prev->next = next;
|
||||
next->prev = prev;
|
||||
node->prev = NULL;
|
||||
node->next = NULL;
|
||||
}
|
||||
|
||||
// Append all nodes from head2 onto head1. head2 is left empty.
|
||||
static inline void
|
||||
llist_concat(struct llist_node *head1, struct llist_node *head2)
|
||||
{
|
||||
if (!llist_empty(head2)) {
|
||||
head1->prev->next = head2->next;
|
||||
head2->next->prev = head1->prev;
|
||||
|
||||
head1->prev = head2->prev;
|
||||
head2->prev->next = head1;
|
||||
llist_init(head2);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_LLIST_H */
|
158
Include/internal/pycore_lock.h
Normal file
158
Include/internal/pycore_lock.h
Normal file
|
@ -0,0 +1,158 @@
|
|||
// Lightweight locks and other synchronization mechanisms.
|
||||
//
|
||||
// These implementations are based on WebKit's WTF::Lock. See
|
||||
// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the
|
||||
// design.
|
||||
#ifndef Py_INTERNAL_LOCK_H
|
||||
#define Py_INTERNAL_LOCK_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_time.h" // _PyTime_t
|
||||
|
||||
|
||||
// A mutex that occupies one byte. The lock can be zero initialized.
|
||||
//
|
||||
// Only the two least significant bits are used. The remaining bits should be
|
||||
// zero:
|
||||
// 0b00: unlocked
|
||||
// 0b01: locked
|
||||
// 0b10: unlocked and has parked threads
|
||||
// 0b11: locked and has parked threads
|
||||
//
|
||||
// Typical initialization:
|
||||
// PyMutex m = (PyMutex){0};
|
||||
//
|
||||
// Typical usage:
|
||||
// PyMutex_Lock(&m);
|
||||
// ...
|
||||
// PyMutex_Unlock(&m);
|
||||
typedef struct _PyMutex {
|
||||
uint8_t v;
|
||||
} PyMutex;
|
||||
|
||||
#define _Py_UNLOCKED 0
|
||||
#define _Py_LOCKED 1
|
||||
#define _Py_HAS_PARKED 2
|
||||
|
||||
// (private) slow path for locking the mutex
|
||||
PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
|
||||
|
||||
// (private) slow path for unlocking the mutex
|
||||
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
|
||||
|
||||
// Locks the mutex.
|
||||
//
|
||||
// If the mutex is currently locked, the calling thread will be parked until
|
||||
// the mutex is unlocked. If the current thread holds the GIL, then the GIL
|
||||
// will be released while the thread is parked.
|
||||
static inline void
|
||||
PyMutex_Lock(PyMutex *m)
|
||||
{
|
||||
uint8_t expected = _Py_UNLOCKED;
|
||||
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
|
||||
_PyMutex_LockSlow(m);
|
||||
}
|
||||
}
|
||||
|
||||
// Unlocks the mutex.
|
||||
static inline void
|
||||
PyMutex_Unlock(PyMutex *m)
|
||||
{
|
||||
uint8_t expected = _Py_LOCKED;
|
||||
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) {
|
||||
_PyMutex_UnlockSlow(m);
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if the mutex is currently locked.
|
||||
static inline int
|
||||
PyMutex_IsLocked(PyMutex *m)
|
||||
{
|
||||
return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0;
|
||||
}
|
||||
|
||||
typedef enum _PyLockFlags {
|
||||
// Do not detach/release the GIL when waiting on the lock.
|
||||
_Py_LOCK_DONT_DETACH = 0,
|
||||
|
||||
// Detach/release the GIL while waiting on the lock.
|
||||
_PY_LOCK_DETACH = 1,
|
||||
|
||||
// Handle signals if interrupted while waiting on the lock.
|
||||
_PY_LOCK_HANDLE_SIGNALS = 2,
|
||||
} _PyLockFlags;
|
||||
|
||||
// Lock a mutex with an optional timeout and additional options. See
|
||||
// _PyLockFlags for details.
|
||||
extern PyLockStatus
|
||||
_PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout_ns, _PyLockFlags flags);
|
||||
|
||||
// Unlock a mutex, returns 0 if the mutex is not locked (used for improved
|
||||
// error messages).
|
||||
extern int _PyMutex_TryUnlock(PyMutex *m);
|
||||
|
||||
|
||||
// PyEvent is a one-time event notification
|
||||
typedef struct {
|
||||
uint8_t v;
|
||||
} PyEvent;
|
||||
|
||||
// Set the event and notify any waiting threads.
|
||||
// Export for '_testinternalcapi' shared extension
|
||||
PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt);
|
||||
|
||||
// Wait for the event to be set. If the event is already set, then this returns
|
||||
// immediately.
|
||||
PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
|
||||
|
||||
// Wait for the event to be set, or until the timeout expires. If the event is
|
||||
// already set, then this returns immediately. Returns 1 if the event was set,
|
||||
// and 0 if the timeout expired or thread was interrupted.
|
||||
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns);
|
||||
|
||||
|
||||
// _PyRawMutex implements a word-sized mutex that that does not depend on the
|
||||
// parking lot API, and therefore can be used in the parking lot
|
||||
// implementation.
|
||||
//
|
||||
// The mutex uses a packed representation: the least significant bit is used to
|
||||
// indicate whether the mutex is locked or not. The remaining bits are either
|
||||
// zero or a pointer to a `struct raw_mutex_entry` (see lock.c).
|
||||
typedef struct {
|
||||
uintptr_t v;
|
||||
} _PyRawMutex;
|
||||
|
||||
// Slow paths for lock/unlock
|
||||
extern void _PyRawMutex_LockSlow(_PyRawMutex *m);
|
||||
extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m);
|
||||
|
||||
static inline void
|
||||
_PyRawMutex_Lock(_PyRawMutex *m)
|
||||
{
|
||||
uintptr_t unlocked = _Py_UNLOCKED;
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) {
|
||||
return;
|
||||
}
|
||||
_PyRawMutex_LockSlow(m);
|
||||
}
|
||||
|
||||
static inline void
|
||||
_PyRawMutex_Unlock(_PyRawMutex *m)
|
||||
{
|
||||
uintptr_t locked = _Py_LOCKED;
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) {
|
||||
return;
|
||||
}
|
||||
_PyRawMutex_UnlockSlow(m);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_LOCK_H */
|
99
Include/internal/pycore_parking_lot.h
Normal file
99
Include/internal/pycore_parking_lot.h
Normal file
|
@ -0,0 +1,99 @@
|
|||
// ParkingLot is an internal API for building efficient synchronization
|
||||
// primitives like mutexes and events.
|
||||
//
|
||||
// The API and name is inspired by WebKit's WTF::ParkingLot, which in turn
|
||||
// is inspired Linux's futex API.
|
||||
// See https://webkit.org/blog/6161/locking-in-webkit/.
|
||||
//
|
||||
// The core functionality is an atomic "compare-and-sleep" operation along with
|
||||
// an atomic "wake-up" operation.
|
||||
|
||||
#ifndef Py_INTERNAL_PARKING_LOT_H
|
||||
#define Py_INTERNAL_PARKING_LOT_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_time.h" // _PyTime_t
|
||||
|
||||
|
||||
enum {
|
||||
// The thread was unparked by another thread.
|
||||
Py_PARK_OK = 0,
|
||||
|
||||
// The value of `address` did not match `expected`.
|
||||
Py_PARK_AGAIN = -1,
|
||||
|
||||
// The thread was unparked due to a timeout.
|
||||
Py_PARK_TIMEOUT = -2,
|
||||
|
||||
// The thread was interrupted by a signal.
|
||||
Py_PARK_INTR = -3,
|
||||
};
|
||||
|
||||
// Checks that `*address == *expected` and puts the thread to sleep until an
|
||||
// unpark operation is called on the same `address`. Otherwise, the function
|
||||
// returns `Py_PARK_AGAIN`. The comparison behaves like memcmp, but is
|
||||
// performed atomically with respect to unpark operations.
|
||||
//
|
||||
// The `address_size` argument is the size of the data pointed to by the
|
||||
// `address` and `expected` pointers (i.e., sizeof(*address)). It must be
|
||||
// 1, 2, 4, or 8.
|
||||
//
|
||||
// The `timeout_ns` argument specifies the maximum amount of time to wait, with
|
||||
// -1 indicating an infinite wait.
|
||||
//
|
||||
// `park_arg`, which can be NULL, is passed to the unpark operation.
|
||||
//
|
||||
// If `detach` is true, then the thread will detach/release the GIL while
|
||||
// waiting.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// if (_Py_atomic_compare_exchange_uint8(address, &expected, new_value)) {
|
||||
// int res = _PyParkingLot_Park(address, &new_value, sizeof(*address),
|
||||
// timeout_ns, NULL, 1);
|
||||
// ...
|
||||
// }
|
||||
PyAPI_FUNC(int)
|
||||
_PyParkingLot_Park(const void *address, const void *expected,
|
||||
size_t address_size, _PyTime_t timeout_ns,
|
||||
void *park_arg, int detach);
|
||||
|
||||
// Callback for _PyParkingLot_Unpark:
|
||||
//
|
||||
// `arg` is the data of the same name provided to the _PyParkingLot_Unpark()
|
||||
// call.
|
||||
// `park_arg` is the data provided to _PyParkingLot_Park() call or NULL if
|
||||
// no waiting thread was found.
|
||||
// `has_more_waiters` is true if there are more threads waiting on the same
|
||||
// address. May be true in cases where threads are waiting on a different
|
||||
// address that map to the same internal bucket.
|
||||
typedef void _Py_unpark_fn_t(void *arg, void *park_arg, int has_more_waiters);
|
||||
|
||||
// Unparks a single thread waiting on `address`.
|
||||
//
|
||||
// Note that fn() is called regardless of whether a thread was unparked. If
|
||||
// no threads are waiting on `address` then the `park_arg` argument to fn()
|
||||
// will be NULL.
|
||||
//
|
||||
// Example usage:
|
||||
// void callback(void *arg, void *park_arg, int has_more_waiters);
|
||||
// _PyParkingLot_Unpark(address, &callback, arg);
|
||||
PyAPI_FUNC(void)
|
||||
_PyParkingLot_Unpark(const void *address, _Py_unpark_fn_t *fn, void *arg);
|
||||
|
||||
// Unparks all threads waiting on `address`.
|
||||
PyAPI_FUNC(void) _PyParkingLot_UnparkAll(const void *address);
|
||||
|
||||
// Resets the parking lot state after a fork. Forgets all parked threads.
|
||||
PyAPI_FUNC(void) _PyParkingLot_AfterFork(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_PARKING_LOT_H */
|
63
Include/internal/pycore_semaphore.h
Normal file
63
Include/internal/pycore_semaphore.h
Normal file
|
@ -0,0 +1,63 @@
|
|||
// The _PySemaphore API a simplified cross-platform semaphore used to implement
|
||||
// wakeup/sleep.
|
||||
#ifndef Py_INTERNAL_SEMAPHORE_H
|
||||
#define Py_INTERNAL_SEMAPHORE_H
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_time.h" // _PyTime_t
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# include <windows.h>
|
||||
#elif defined(HAVE_PTHREAD_H)
|
||||
# include <pthread.h>
|
||||
#elif defined(HAVE_PTHREAD_STUBS)
|
||||
# include "cpython/pthread_stubs.h"
|
||||
#else
|
||||
# error "Require native threads. See https://bugs.python.org/issue31370"
|
||||
#endif
|
||||
|
||||
#if defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES+0) != -1
|
||||
# define _Py_USE_SEMAPHORES
|
||||
# include <semaphore.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _PySemaphore {
|
||||
#if defined(MS_WINDOWS)
|
||||
HANDLE platform_sem;
|
||||
#elif defined(_Py_USE_SEMAPHORES)
|
||||
sem_t platform_sem;
|
||||
#else
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond;
|
||||
int counter;
|
||||
#endif
|
||||
} _PySemaphore;
|
||||
|
||||
// Puts the current thread to sleep until _PySemaphore_Wakeup() is called.
|
||||
// If `detach` is true, then the thread will detach/release the GIL while
|
||||
// sleeping.
|
||||
PyAPI_FUNC(int)
|
||||
_PySemaphore_Wait(_PySemaphore *sema, _PyTime_t timeout_ns, int detach);
|
||||
|
||||
// Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup()
|
||||
// can be called before _PySemaphore_Wait().
|
||||
PyAPI_FUNC(void)
|
||||
_PySemaphore_Wakeup(_PySemaphore *sema);
|
||||
|
||||
// Initializes/destroys a semaphore
|
||||
PyAPI_FUNC(void) _PySemaphore_Init(_PySemaphore *sema);
|
||||
PyAPI_FUNC(void) _PySemaphore_Destroy(_PySemaphore *sema);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_SEMAPHORE_H */
|
16
Include/pyatomic.h
Normal file
16
Include/pyatomic.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef Py_ATOMIC_H
|
||||
#define Py_ATOMIC_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
# define Py_CPYTHON_ATOMIC_H
|
||||
# include "cpython/pyatomic.h"
|
||||
# undef Py_CPYTHON_ATOMIC_H
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_ATOMIC_H */
|
Loading…
Add table
Add a link
Reference in a new issue