This PR can create deadlocks - there is nothing synchronizing the mutex/condvar with the `Arc` of the `Zalsa`, therefore code may wake up by the `Condvar` but not see an updated refcount, go to sleep again, and never wake up.
This commit is contained in:
Chayim Refael Friedman 2025-12-17 10:16:07 +02:00 committed by GitHub
parent 0a3eec6e65
commit 07310800eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -25,6 +25,8 @@ pub struct StorageHandle<Db> {
impl<Db> Clone for StorageHandle<Db> {
fn clone(&self) -> Self {
*self.coordinate.clones.lock() += 1;
Self {
zalsa_impl: self.zalsa_impl.clone(),
coordinate: CoordinateDrop(Arc::clone(&self.coordinate)),
@ -51,7 +53,7 @@ impl<Db: Database> StorageHandle<Db> {
Self {
zalsa_impl: Arc::new(Zalsa::new::<Db>(event_callback, jars)),
coordinate: CoordinateDrop(Arc::new(Coordinate {
coordinate_lock: Mutex::default(),
clones: Mutex::new(1),
cvar: Default::default(),
})),
phantom: PhantomData,
@ -93,6 +95,17 @@ impl<Db> Drop for Storage<Db> {
}
}
struct Coordinate {
/// Counter of the number of clones of actor. Begins at 1.
/// Incremented when cloned, decremented when dropped.
clones: Mutex<usize>,
cvar: Condvar,
}
// We cannot panic while holding a lock to `clones: Mutex<usize>` and therefore we cannot enter an
// inconsistent state.
impl RefUnwindSafe for Coordinate {}
impl<Db: Database> Default for Storage<Db> {
fn default() -> Self {
Self::new(None)
@ -155,15 +168,12 @@ impl<Db: Database> Storage<Db> {
.zalsa_impl
.event(&|| Event::new(EventKind::DidSetCancellationFlag));
let mut coordinate_lock = self.handle.coordinate.coordinate_lock.lock();
let zalsa = loop {
if Arc::strong_count(&self.handle.zalsa_impl) == 1 {
// SAFETY: The strong count is 1, and we never create any weak pointers,
// so we have a unique reference.
break unsafe { &mut *(Arc::as_ptr(&self.handle.zalsa_impl).cast_mut()) };
}
coordinate_lock = self.handle.coordinate.cvar.wait(coordinate_lock);
};
let mut clones = self.handle.coordinate.clones.lock();
while *clones != 1 {
clones = self.handle.coordinate.cvar.wait(clones);
}
// The ref count on the `Arc` should now be 1
let zalsa = Arc::get_mut(&mut self.handle.zalsa_impl).unwrap();
// cancellation is done, so reset the flag
zalsa.runtime_mut().reset_cancellation_flag();
zalsa
@ -250,16 +260,6 @@ impl<Db: Database> Clone for Storage<Db> {
}
}
/// A simplified `WaitGroup`, this is used together with `Arc<Zalsa>` as the actual counter
struct Coordinate {
coordinate_lock: Mutex<()>,
cvar: Condvar,
}
// We cannot panic while holding a lock to `clones: Mutex<usize>` and therefore we cannot enter an
// inconsistent state.
impl RefUnwindSafe for Coordinate {}
struct CoordinateDrop(Arc<Coordinate>);
impl std::ops::Deref for CoordinateDrop {
@ -272,6 +272,7 @@ impl std::ops::Deref for CoordinateDrop {
impl Drop for CoordinateDrop {
fn drop(&mut self) {
*self.0.clones.lock() -= 1;
self.0.cvar.notify_all();
}
}