fix: multithreaded nested fixpoint iteration (#882)
Some checks are pending
Test / Test (push) Waiting to run
Test / Miri (push) Waiting to run
Test / Shuttle (push) Waiting to run
Test / Benchmarks (push) Waiting to run
Book / Book (push) Waiting to run
Book / Deploy (push) Blocked by required conditions
Release-plz / Release-plz release (push) Waiting to run
Release-plz / Release-plz PR (push) Waiting to run

* Set `validate_final` in `execute` after removing the last cycle head

* Add runaway query repro

* Add tracing

* Fix part 1

* Fix `cycle_head_kinds` to always return provisional for memos that aren't verified final (They should be validated by `validate_same_iteration` or wait for the cycle head

* Fix cycle error

* Documentation

* Fix await for queries depending on initial value

* correctly initialize queued

* Cleanup

* Short circuit if entire query runs on single thread

* Move parallel code into its own method

* Rename method, add self_key to queued

* Revert self-key changes

* Move check *after* `deep_verify_memo`

* Add a test for a cycle with changing cycle heads

* Short circuit more often

* Consider iteration in `validate_provisional`

* Only yield if all heads result in a cycle. Retry if even just one inner cycle made progress (in which case there's a probably a new memo)

* Fix hangs

* Cargo fmt

* clippy

* Fix hang if cycle initial panics

* Rename `cycle_head_kind` enable `cycle_a_t1_b_t2_fallback` shuttle test

* Cleanup

* Docs
This commit is contained in:
Micha Reiser 2025-06-01 10:45:37 +02:00 committed by GitHub
parent 80fb79e910
commit 2b5188778e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 877 additions and 223 deletions

View file

@ -1,4 +1,4 @@
use crate::cycle::{CycleRecoveryStrategy, MAX_ITERATIONS};
use crate::cycle::{CycleRecoveryStrategy, IterationCount};
use crate::function::memo::Memo;
use crate::function::{Configuration, IngredientImpl};
use crate::sync::atomic::{AtomicBool, Ordering};
@ -74,7 +74,9 @@ where
// Cycle participants that don't have a fallback will be discarded in
// `validate_provisional()`.
let cycle_heads = std::mem::take(cycle_heads);
let active_query = db.zalsa_local().push_query(database_key_index, 0);
let active_query = db
.zalsa_local()
.push_query(database_key_index, IterationCount::initial());
new_value = C::cycle_initial(db, C::id_to_input(db, id));
revisions = active_query.pop();
// We need to set `cycle_heads` and `verified_final` because it needs to propagate to the callers.
@ -125,7 +127,7 @@ where
memo_ingredient_index: MemoIngredientIndex,
) -> (C::Output<'db>, QueryRevisions) {
let database_key_index = active_query.database_key_index;
let mut iteration_count: u32 = 0;
let mut iteration_count = IterationCount::initial();
let mut fell_back = false;
// Our provisional value from the previous iteration, when doing fixpoint iteration.
@ -189,12 +191,10 @@ where
match C::recover_from_cycle(
db,
&new_value,
iteration_count,
iteration_count.as_u32(),
C::id_to_input(db, id),
) {
crate::CycleRecoveryAction::Iterate => {
tracing::debug!("{database_key_index:?}: execute: iterate again");
}
crate::CycleRecoveryAction::Iterate => {}
crate::CycleRecoveryAction::Fallback(fallback_value) => {
tracing::debug!(
"{database_key_index:?}: execute: user cycle_fn says to fall back"
@ -208,10 +208,9 @@ where
}
// `iteration_count` can't overflow as we check it against `MAX_ITERATIONS`
// which is less than `u32::MAX`.
iteration_count += 1;
if iteration_count > MAX_ITERATIONS {
panic!("{database_key_index:?}: execute: too many cycle iterations");
}
iteration_count = iteration_count.increment().unwrap_or_else(|| {
panic!("{database_key_index:?}: execute: too many cycle iterations")
});
zalsa.event(&|| {
Event::new(EventKind::WillIterateCycle {
database_key: database_key_index,
@ -220,6 +219,10 @@ where
})
});
cycle_heads.update_iteration_count(database_key_index, iteration_count);
revisions.update_iteration_count(iteration_count);
tracing::debug!(
"{database_key_index:?}: execute: iterate again, revisions: {revisions:#?}"
);
opt_last_provisional = Some(self.insert_memo(
zalsa,
id,