ruff/crates/ty_project/src/files.rs
Dhruv Manilawala 99d0ac60b4
[ty] Track open files in the server (#19264)
## Summary

This PR updates the server to keep track of open files both system and
virtual files.

This is done by updating the project by adding the file in the open file
set in `didOpen` notification and removing it in `didClose`
notification.

This does mean that for workspace diagnostics, ty will only check open
files because the behavior of different diagnostic builder is to first
check `is_file_open` and only add diagnostics for open files. So, this
required updating the `is_file_open` model to be `should_check_file`
model which validates whether the file needs to be checked based on the
`CheckMode`. If the check mode is open files only then it will check
whether the file is open. If it's all files then it'll return `true` by
default.

Closes: astral-sh/ty#619

## Test Plan

### Before

There are two files in the project: `__init__.py` and `diagnostics.py`.

In the video, I'm demonstrating the old behavior where making changes to
the (open) `diagnostics.py` file results in re-parsing the file:


https://github.com/user-attachments/assets/c2ac0ecd-9c77-42af-a924-c3744b146045

### After

Same setup as above.

In the video, I'm demonstrating the new behavior where making changes to
the (open) `diagnostics.py` file doesn't result in re-parting the file:


https://github.com/user-attachments/assets/7b82fe92-f330-44c7-b527-c841c4545f8f
2025-07-18 19:33:35 +05:30

300 lines
9.2 KiB
Rust

use std::marker::PhantomData;
use std::ops::Deref;
use std::sync::Arc;
use rustc_hash::FxHashSet;
use salsa::Setter;
use ruff_db::files::File;
use crate::db::Db;
use crate::{IOErrorDiagnostic, Project};
/// The indexed files of a project.
///
/// The indexing happens lazily, but the files are then cached for subsequent reads.
///
/// ## Implementation
/// The implementation uses internal mutability to transition between the lazy and indexed state
/// without triggering a new salsa revision. This is safe because the initial indexing happens on first access,
/// so no query can be depending on the contents of the indexed files before that. All subsequent mutations to
/// the indexed files must go through `IndexedMut`, which uses the Salsa setter `project.set_file_set` to
/// ensure that Salsa always knows when the set of indexed files have changed.
#[derive(Debug)]
pub struct IndexedFiles {
state: std::sync::Mutex<State>,
}
impl IndexedFiles {
pub fn lazy() -> Self {
Self {
state: std::sync::Mutex::new(State::Lazy),
}
}
fn indexed(inner: Arc<IndexedInner>) -> Self {
Self {
state: std::sync::Mutex::new(State::Indexed(inner)),
}
}
pub(super) fn get(&self) -> Index {
let state = self.state.lock().unwrap();
match &*state {
State::Lazy => Index::Lazy(LazyFiles { files: state }),
State::Indexed(inner) => Index::Indexed(Indexed {
inner: Arc::clone(inner),
_lifetime: PhantomData,
}),
}
}
pub(super) fn is_lazy(&self) -> bool {
matches!(*self.state.lock().unwrap(), State::Lazy)
}
/// Returns a mutable view on the index that allows cheap in-place mutations.
///
/// The changes are automatically written back to the database once the view is dropped.
pub(super) fn indexed_mut(db: &mut dyn Db, project: Project) -> Option<IndexedMut> {
// Calling `zalsa_mut` cancels all pending salsa queries. This ensures that there are no pending
// reads to the file set.
// TODO: Use a non-internal API instead https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries
let _ = db.as_dyn_database_mut().zalsa_mut();
// Replace the state with lazy. The `IndexedMut` guard restores the state
// to `State::Indexed` or sets a new `PackageFiles` when it gets dropped to ensure the state
// is restored to how it has been before replacing the value.
//
// It isn't necessary to hold on to the lock after this point:
// * The above call to `zalsa_mut` guarantees that there's exactly **one** DB reference.
// * `Indexed` has a `'db` lifetime, and this method requires a `&mut db`.
// This means that there can't be any pending reference to `Indexed` because Rust
// doesn't allow borrowing `db` as mutable (to call this method) and immutable (`Indexed<'db>`) at the same time.
// There can't be any other `Indexed<'db>` references created by clones of this DB because
// all clones must have been dropped at this point and the `Indexed`
// can't outlive the database (constrained by the `db` lifetime).
let state = {
let files = project.file_set(db);
let mut locked = files.state.lock().unwrap();
std::mem::replace(&mut *locked, State::Lazy)
};
let indexed = match state {
// If it's already lazy, just return. We also don't need to restore anything because the
// replace above was a no-op.
State::Lazy => return None,
State::Indexed(indexed) => indexed,
};
Some(IndexedMut {
db: Some(db),
project,
indexed,
did_change: false,
})
}
}
impl Default for IndexedFiles {
fn default() -> Self {
Self::lazy()
}
}
#[derive(Debug)]
enum State {
/// The files of a package haven't been indexed yet.
Lazy,
/// The files are indexed. Stores the known files of a package.
Indexed(Arc<IndexedInner>),
}
pub(super) enum Index<'db> {
/// The index has not yet been computed. Allows inserting the files.
Lazy(LazyFiles<'db>),
Indexed(Indexed<'db>),
}
/// Package files that have not been indexed yet.
pub(super) struct LazyFiles<'db> {
files: std::sync::MutexGuard<'db, State>,
}
impl<'db> LazyFiles<'db> {
/// Sets the indexed files of a package to `files`.
pub(super) fn set(
mut self,
files: FxHashSet<File>,
diagnostics: Vec<IOErrorDiagnostic>,
) -> Indexed<'db> {
let files = Indexed {
inner: Arc::new(IndexedInner { files, diagnostics }),
_lifetime: PhantomData,
};
*self.files = State::Indexed(Arc::clone(&files.inner));
files
}
}
/// The indexed files of the project.
///
/// Note: This type is intentionally non-cloneable. Making it cloneable requires
/// revisiting the locking behavior in [`IndexedFiles::indexed_mut`].
#[derive(Debug)]
pub struct Indexed<'db> {
inner: Arc<IndexedInner>,
// Preserve the lifetime of `PackageFiles`.
_lifetime: PhantomData<&'db ()>,
}
#[derive(Debug)]
struct IndexedInner {
files: FxHashSet<File>,
diagnostics: Vec<IOErrorDiagnostic>,
}
impl Indexed<'_> {
pub(super) fn diagnostics(&self) -> &[IOErrorDiagnostic] {
&self.inner.diagnostics
}
pub(super) fn len(&self) -> usize {
self.inner.files.len()
}
}
impl Deref for Indexed<'_> {
type Target = FxHashSet<File>;
fn deref(&self) -> &Self::Target {
&self.inner.files
}
}
pub(super) type IndexedIter<'a> = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>;
impl<'a> IntoIterator for &'a Indexed<'_> {
type Item = File;
type IntoIter = IndexedIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.inner.files.iter().copied()
}
}
/// A Mutable view of a project's indexed files.
///
/// Allows in-place mutation of the files without deep cloning the hash set.
/// The changes are written back when the mutable view is dropped or by calling [`Self::set`] manually.
pub(super) struct IndexedMut<'db> {
db: Option<&'db mut dyn Db>,
project: Project,
indexed: Arc<IndexedInner>,
did_change: bool,
}
impl IndexedMut<'_> {
pub(super) fn insert(&mut self, file: File) -> bool {
if self.inner_mut().files.insert(file) {
self.did_change = true;
true
} else {
false
}
}
pub(super) fn remove(&mut self, file: File) -> bool {
if self.inner_mut().files.remove(&file) {
self.did_change = true;
true
} else {
false
}
}
pub(super) fn set_diagnostics(&mut self, diagnostics: Vec<IOErrorDiagnostic>) {
self.inner_mut().diagnostics = diagnostics;
}
fn inner_mut(&mut self) -> &mut IndexedInner {
Arc::get_mut(&mut self.indexed)
.expect("All references to `FilesSet` should have been dropped")
}
fn set_impl(&mut self) {
let Some(db) = self.db.take() else {
return;
};
let indexed = Arc::clone(&self.indexed);
if self.did_change {
// If there are changes, set the new file_set to trigger a salsa revision change.
self.project
.set_file_set(db)
.to(IndexedFiles::indexed(indexed));
} else {
// The `indexed_mut` replaced the `state` with Lazy. Restore it back to the indexed state.
*self.project.file_set(db).state.lock().unwrap() = State::Indexed(indexed);
}
}
}
impl Drop for IndexedMut<'_> {
fn drop(&mut self) {
self.set_impl();
}
}
#[cfg(test)]
mod tests {
use rustc_hash::FxHashSet;
use crate::ProjectMetadata;
use crate::db::Db;
use crate::db::tests::TestDb;
use crate::files::Index;
use ruff_db::files::system_path_to_file;
use ruff_db::system::{DbWithWritableSystem as _, SystemPathBuf};
use ruff_python_ast::name::Name;
#[test]
fn re_entrance() -> anyhow::Result<()> {
let metadata = ProjectMetadata::new(Name::new_static("test"), SystemPathBuf::from("/test"));
let mut db = TestDb::new(metadata);
db.write_file("test.py", "")?;
let project = db.project();
let file = system_path_to_file(&db, "test.py").unwrap();
let files = match project.file_set(&db).get() {
Index::Lazy(lazy) => lazy.set(FxHashSet::from_iter([file]), Vec::new()),
Index::Indexed(files) => files,
};
// Calling files a second time should not dead-lock.
// This can e.g. happen when `check_file` iterates over all files and
// `should_check_file` queries the open files.
let files_2 = project.file_set(&db).get();
match files_2 {
Index::Lazy(_) => {
panic!("Expected indexed files, got lazy files");
}
Index::Indexed(files_2) => {
assert_eq!(
files_2.iter().collect::<Vec<_>>(),
files.iter().collect::<Vec<_>>()
);
}
}
Ok(())
}
}