Fix re-entrance deadlock in Package::files (#12948)

This commit is contained in:
Micha Reiser 2024-08-20 08:51:08 +02:00 committed by GitHub
parent abb4cdbf3d
commit 38c19fb96e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 141 additions and 125 deletions

View file

@ -127,7 +127,6 @@ impl TestCase {
fn collect_package_files(&self, path: &SystemPath) -> Vec<File> { fn collect_package_files(&self, path: &SystemPath) -> Vec<File> {
let package = self.db().workspace().package(self.db(), path).unwrap(); let package = self.db().workspace().package(self.db(), path).unwrap();
let files = package.files(self.db()); let files = package.files(self.db());
let files = files.read();
let mut collected: Vec<_> = files.into_iter().collect(); let mut collected: Vec<_> = files.into_iter().collect();
collected.sort_unstable_by_key(|file| file.path(self.db()).as_system_path().unwrap()); collected.sort_unstable_by_key(|file| file.path(self.db()).as_system_path().unwrap());
collected collected

View file

@ -11,7 +11,7 @@ use ruff_db::{
}; };
use ruff_python_ast::{name::Name, PySourceType}; use ruff_python_ast::{name::Name, PySourceType};
use crate::workspace::files::{Index, IndexedFiles, PackageFiles}; use crate::workspace::files::{Index, Indexed, PackageFiles};
use crate::{ use crate::{
db::Db, db::Db,
lint::{lint_semantic, lint_syntax, Diagnostics}, lint::{lint_semantic, lint_syntax, Diagnostics},
@ -259,7 +259,7 @@ impl Package {
/// Returns `true` if `file` is a first-party file part of this package. /// Returns `true` if `file` is a first-party file part of this package.
pub fn contains_file(self, db: &dyn Db, file: File) -> bool { pub fn contains_file(self, db: &dyn Db, file: File) -> bool {
self.files(db).read().contains(&file) self.files(db).contains(&file)
} }
#[tracing::instrument(level = "debug", skip(db))] #[tracing::instrument(level = "debug", skip(db))]
@ -292,7 +292,7 @@ impl Package {
tracing::debug!("Checking package {}", self.root(db)); tracing::debug!("Checking package {}", self.root(db));
let mut result = Vec::new(); let mut result = Vec::new();
for file in &self.files(db).read() { for file in &self.files(db) {
let diagnostics = check_file(db, file); let diagnostics = check_file(db, file);
result.extend_from_slice(&diagnostics); result.extend_from_slice(&diagnostics);
} }
@ -301,13 +301,14 @@ impl Package {
} }
/// Returns the files belonging to this package. /// Returns the files belonging to this package.
#[salsa::tracked] pub fn files(self, db: &dyn Db) -> Indexed<'_> {
pub fn files(self, db: &dyn Db) -> IndexedFiles {
let _entered = tracing::debug_span!("files").entered();
let files = self.file_set(db); let files = self.file_set(db);
let indexed = match files.get() { let indexed = match files.get() {
Index::Lazy(vacant) => { Index::Lazy(vacant) => {
let _entered =
tracing::debug_span!("index_package_files", package = %self.name(db)).entered();
tracing::debug!("Indexing files for package {}", self.name(db)); tracing::debug!("Indexing files for package {}", self.name(db));
let files = discover_package_files(db, self.root(db)); let files = discover_package_files(db, self.root(db));
vacant.set(files) vacant.set(files)

View file

@ -1,4 +1,4 @@
use std::iter::FusedIterator; use std::marker::PhantomData;
use std::ops::Deref; use std::ops::Deref;
use std::sync::Arc; use std::sync::Arc;
@ -10,6 +10,9 @@ use ruff_db::files::File;
use crate::db::Db; use crate::db::Db;
use crate::workspace::Package; use crate::workspace::Package;
/// Cheap cloneable hash set of files.
type FileSet = Arc<FxHashSet<File>>;
/// The indexed files of a package. /// The indexed files of a package.
/// ///
/// The indexing happens lazily, but the files are then cached for subsequent reads. /// The indexing happens lazily, but the files are then cached for subsequent reads.
@ -18,7 +21,7 @@ use crate::workspace::Package;
/// The implementation uses internal mutability to transition between the lazy and indexed state /// The implementation uses internal mutability to transition between the lazy and indexed state
/// without triggering a new salsa revision. This is safe because the initial indexing happens on first access, /// without triggering a new salsa revision. This is safe because the initial indexing happens on first access,
/// so no query can be depending on the contents of the indexed files before that. All subsequent mutations to /// so no query can be depending on the contents of the indexed files before that. All subsequent mutations to
/// the indexed files must go through `IndexedFilesMut`, which uses the Salsa setter `package.set_file_set` to /// the indexed files must go through `IndexedMut`, which uses the Salsa setter `package.set_file_set` to
/// ensure that Salsa always knows when the set of indexed files have changed. /// ensure that Salsa always knows when the set of indexed files have changed.
#[derive(Debug)] #[derive(Debug)]
pub struct PackageFiles { pub struct PackageFiles {
@ -32,46 +35,67 @@ impl PackageFiles {
} }
} }
fn indexed(indexed_files: IndexedFiles) -> Self { fn indexed(files: FileSet) -> Self {
Self { Self {
state: std::sync::Mutex::new(State::Indexed(indexed_files)), state: std::sync::Mutex::new(State::Indexed(files)),
} }
} }
pub fn get(&self) -> Index { pub(super) fn get(&self) -> Index {
let state = self.state.lock().unwrap(); let state = self.state.lock().unwrap();
match &*state { match &*state {
State::Lazy => Index::Lazy(LazyFiles { files: state }), State::Lazy => Index::Lazy(LazyFiles { files: state }),
State::Indexed(files) => Index::Indexed(files.clone()), State::Indexed(files) => Index::Indexed(Indexed {
files: Arc::clone(files),
_lifetime: PhantomData,
}),
} }
} }
pub fn is_lazy(&self) -> bool { pub(super) fn is_lazy(&self) -> bool {
matches!(*self.state.lock().unwrap(), State::Lazy) matches!(*self.state.lock().unwrap(), State::Lazy)
} }
/// Returns a mutable view on the index that allows cheap in-place mutations. /// Returns a mutable view on the index that allows cheap in-place mutations.
/// ///
/// The changes are automatically written back to the database once the view is dropped. /// The changes are automatically written back to the database once the view is dropped.
pub fn indexed_mut(db: &mut dyn Db, package: Package) -> Option<IndexedFilesMut> { pub(super) fn indexed_mut(db: &mut dyn Db, package: Package) -> Option<IndexedMut> {
// Calling `zalsa_mut` cancels all pending salsa queries. This ensures that there are no pending // Calling `zalsa_mut` cancels all pending salsa queries. This ensures that there are no pending
// reads to the file set. // reads to the file set.
// TODO: Use a non-internal API instead https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries // TODO: Use a non-internal API instead https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries
let _ = db.as_dyn_database_mut().zalsa_mut(); let _ = db.as_dyn_database_mut().zalsa_mut();
// Replace the state with lazy. The `IndexedMut` guard restores the state
// to `State::Indexed` or sets a new `PackageFiles` when it gets dropped to ensure the state
// is restored to how it has been before replacing the value.
//
// It isn't necessary to hold on to the lock after this point:
// * The above call to `zalsa_mut` guarantees that there's exactly **one** DB reference.
// * `Indexed` has a `'db` lifetime, and this method requires a `&mut db`.
// This means that there can't be any pending reference to `Indexed` because Rust
// doesn't allow borrowing `db` as mutable (to call this method) and immutable (`Indexed<'db>`) at the same time.
// There can't be any other `Indexed<'db>` references created by clones of this DB because
// all clones must have been dropped at this point and the `Indexed`
// can't outlive the database (constrained by the `db` lifetime).
let state = {
let files = package.file_set(db); let files = package.file_set(db);
let mut locked = files.state.lock().unwrap();
let indexed = match &*files.state.lock().unwrap() { std::mem::replace(&mut *locked, State::Lazy)
State::Lazy => return None,
State::Indexed(indexed) => indexed.clone(),
}; };
Some(IndexedFilesMut { let indexed = match state {
// If it's already lazy, just return. We also don't need to restore anything because the
// replace above was a no-op.
State::Lazy => return None,
State::Indexed(indexed) => indexed,
};
Some(IndexedMut {
db: Some(db), db: Some(db),
package, package,
new_revision: indexed.revision, files: indexed,
indexed, did_change: false,
}) })
} }
} }
@ -88,152 +112,93 @@ enum State {
Lazy, Lazy,
/// The files are indexed. Stores the known files of a package. /// The files are indexed. Stores the known files of a package.
Indexed(IndexedFiles), Indexed(FileSet),
} }
pub enum Index<'a> { pub(super) enum Index<'db> {
/// The index has not yet been computed. Allows inserting the files. /// The index has not yet been computed. Allows inserting the files.
Lazy(LazyFiles<'a>), Lazy(LazyFiles<'db>),
Indexed(IndexedFiles), Indexed(Indexed<'db>),
} }
/// Package files that have not been indexed yet. /// Package files that have not been indexed yet.
pub struct LazyFiles<'a> { pub(super) struct LazyFiles<'db> {
files: std::sync::MutexGuard<'a, State>, files: std::sync::MutexGuard<'db, State>,
} }
impl<'a> LazyFiles<'a> { impl<'db> LazyFiles<'db> {
/// Sets the indexed files of a package to `files`. /// Sets the indexed files of a package to `files`.
pub fn set(mut self, files: FxHashSet<File>) -> IndexedFiles { pub(super) fn set(mut self, files: FxHashSet<File>) -> Indexed<'db> {
let files = IndexedFiles::new(files); let files = Indexed {
*self.files = State::Indexed(files.clone()); files: Arc::new(files),
_lifetime: PhantomData,
};
*self.files = State::Indexed(Arc::clone(&files.files));
files files
} }
} }
/// The indexed files of a package. /// The indexed files of a package.
/// ///
/// # Salsa integration /// Note: This type is intentionally non-cloneable. Making it cloneable requires
/// The type is cheap clonable and allows for in-place mutation of the files. The in-place mutation requires /// revisiting the locking behavior in [`PackageFiles::indexed_mut`].
/// extra care because the type is used as the result of Salsa queries and Salsa relies on a type's equality #[derive(Debug, PartialEq, Eq)]
/// to determine if the output has changed. This is accomplished by using a `revision` that gets incremented pub struct Indexed<'db> {
/// whenever the files are changed. The revision ensures that salsa's comparison of the files: FileSet,
/// previous [`IndexedFiles`] with the next [`IndexedFiles`] returns false even though they both // Preserve the lifetime of `PackageFiles`.
/// point to the same underlying hash set. _lifetime: PhantomData<&'db ()>,
///
/// # Equality
/// Two [`IndexedFiles`] are only equal if they have the same revision and point to the **same** (identity) hash set.
#[derive(Debug, Clone)]
pub struct IndexedFiles {
revision: u64,
files: Arc<std::sync::Mutex<FxHashSet<File>>>,
} }
impl IndexedFiles { impl Deref for Indexed<'_> {
fn new(files: FxHashSet<File>) -> Self {
Self {
files: Arc::new(std::sync::Mutex::new(files)),
revision: 0,
}
}
/// Locks the file index for reading.
pub fn read(&self) -> IndexedFilesGuard {
IndexedFilesGuard {
guard: self.files.lock().unwrap(),
}
}
}
impl PartialEq for IndexedFiles {
fn eq(&self, other: &Self) -> bool {
self.revision == other.revision && Arc::ptr_eq(&self.files, &other.files)
}
}
impl Eq for IndexedFiles {}
pub struct IndexedFilesGuard<'a> {
guard: std::sync::MutexGuard<'a, FxHashSet<File>>,
}
impl Deref for IndexedFilesGuard<'_> {
type Target = FxHashSet<File>; type Target = FxHashSet<File>;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.guard &self.files
} }
} }
impl<'a> IntoIterator for &'a IndexedFilesGuard<'a> { impl<'a> IntoIterator for &'a Indexed<'_> {
type Item = File; type Item = File;
type IntoIter = IndexedFilesIter<'a>; type IntoIter = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
IndexedFilesIter { self.files.iter().copied()
inner: self.guard.iter(),
} }
} }
}
/// Iterator over the indexed files.
///
/// # Locks
/// Holding on to the iterator locks the file index for reading.
pub struct IndexedFilesIter<'a> {
inner: std::collections::hash_set::Iter<'a, File>,
}
impl<'a> Iterator for IndexedFilesIter<'a> {
type Item = File;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().copied()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl FusedIterator for IndexedFilesIter<'_> {}
impl ExactSizeIterator for IndexedFilesIter<'_> {}
/// A Mutable view of a package's indexed files. /// A Mutable view of a package's indexed files.
/// ///
/// Allows in-place mutation of the files without deep cloning the hash set. /// Allows in-place mutation of the files without deep cloning the hash set.
/// The changes are written back when the mutable view is dropped or by calling [`Self::set`] manually. /// The changes are written back when the mutable view is dropped or by calling [`Self::set`] manually.
pub struct IndexedFilesMut<'db> { pub(super) struct IndexedMut<'db> {
db: Option<&'db mut dyn Db>, db: Option<&'db mut dyn Db>,
package: Package, package: Package,
indexed: IndexedFiles, files: FileSet,
new_revision: u64, did_change: bool,
} }
impl IndexedFilesMut<'_> { impl IndexedMut<'_> {
pub fn insert(&mut self, file: File) -> bool { pub(super) fn insert(&mut self, file: File) -> bool {
if self.indexed.files.lock().unwrap().insert(file) { if self.files_mut().insert(file) {
self.new_revision += 1; self.did_change = true;
true true
} else { } else {
false false
} }
} }
pub fn remove(&mut self, file: File) -> bool { pub(super) fn remove(&mut self, file: File) -> bool {
if self.indexed.files.lock().unwrap().remove(&file) { if self.files_mut().remove(&file) {
self.new_revision += 1; self.did_change = true;
true true
} else { } else {
false false
} }
} }
/// Writes the changes back to the database. fn files_mut(&mut self) -> &mut FxHashSet<File> {
pub fn set(mut self) { Arc::get_mut(&mut self.files).expect("All references to `FilesSet` to have been dropped")
self.set_impl();
} }
fn set_impl(&mut self) { fn set_impl(&mut self) {
@ -241,19 +206,70 @@ impl IndexedFilesMut<'_> {
return; return;
}; };
if self.indexed.revision != self.new_revision { let files = Arc::clone(&self.files);
if self.did_change {
// If there are changes, set the new file_set to trigger a salsa revision change.
self.package self.package
.set_file_set(db) .set_file_set(db)
.to(PackageFiles::indexed(IndexedFiles { .to(PackageFiles::indexed(files));
revision: self.new_revision, } else {
files: self.indexed.files.clone(), // The `indexed_mut` replaced the `state` with Lazy. Restore it back to the indexed state.
})); *self.package.file_set(db).state.lock().unwrap() = State::Indexed(files);
} }
} }
} }
impl Drop for IndexedFilesMut<'_> { impl Drop for IndexedMut<'_> {
fn drop(&mut self) { fn drop(&mut self) {
self.set_impl(); self.set_impl();
} }
} }
#[cfg(test)]
mod tests {
use rustc_hash::FxHashSet;
use ruff_db::files::system_path_to_file;
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
use ruff_python_ast::name::Name;
use crate::db::tests::TestDb;
use crate::workspace::files::Index;
use crate::workspace::Package;
#[test]
fn re_entrance() -> anyhow::Result<()> {
let mut db = TestDb::new();
db.write_file("test.py", "")?;
let package = Package::new(&db, Name::new("test"), SystemPathBuf::from("/test"));
let file = system_path_to_file(&db, "test.py").unwrap();
let files = match package.file_set(&db).get() {
Index::Lazy(lazy) => lazy.set(FxHashSet::from_iter([file])),
Index::Indexed(files) => files,
};
// Calling files a second time should not dead-lock.
// This can e.g. happen when `check_file` iterates over all files and
// `is_file_open` queries the open files.
let files_2 = package.file_set(&db).get();
match files_2 {
Index::Lazy(_) => {
panic!("Expected indexed files, got lazy files");
}
Index::Indexed(files_2) => {
assert_eq!(
files_2.iter().collect::<Vec<_>>(),
files.iter().collect::<Vec<_>>()
);
}
}
Ok(())
}
}