[ty] Make check_file a salsa query (#19255)

## Summary
We noticed that all files get reparsed when workspace diagnostics are
enabled.

I realised that this is because `check_file_impl` access the parsed
module but itself isn't a salsa query.
This pr makes `check_file_impl` a salsa query, so that we only access
the `parsed_module` when the file actually changed. I decided to remove
the salsa query from `check_types` because most functions it calls are
salsa queries itself and having both `check_types` and `check_file` as
salsa querise has the downside that we double cache the diagnostics.

## Test Plan

**Before**

```
2025-07-10 12:54:16.620766000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0c))}: File `/Users/micha/astral/test/yaml/yaml-stubs/__init__.pyi` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.621942000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c13))}: File `/Users/micha/astral/test/ignore2 2/nested-repository/main.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.622107000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c09))}: File `/Users/micha/astral/test/notebook.ipynb` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.622357000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c04))}: File `/Users/micha/astral/test/no-trailing.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.622634000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c02))}: File `/Users/micha/astral/test/simple.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.623056000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c07))}: File `/Users/micha/astral/test/open/more.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.623254000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c11))}: File `/Users/micha/astral/test/ignore-bug/backend/src/subdir/log/some_logging_lib.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.623450000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0f))}: File `/Users/micha/astral/test/yaml/tomllib/__init__.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.624599000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c05))}: File `/Users/micha/astral/test/create.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.624784000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c00))}: File `/Users/micha/astral/test/lib.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.624911000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0a))}: File `/Users/micha/astral/test/sub/test.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625032000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c12))}: File `/Users/micha/astral/test/ignore2/nested-repository/main.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625101000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c08))}: File `/Users/micha/astral/test/open/test.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625227000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c03))}: File `/Users/micha/astral/test/pseudocode_with_bom.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625353000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0b))}: File `/Users/micha/astral/test/yaml/yaml-stubs/loader.pyi` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625543000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c01))}: File `/Users/micha/astral/test/test_trailing.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625616000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0d))}: File `/Users/micha/astral/test/yaml/tomllib/_re.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625667000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c06))}: File `/Users/micha/astral/test/yaml/main.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.625779000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c10))}: File `/Users/micha/astral/test/yaml/tomllib/_types.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.627526000  WARN request{id=19 method="workspace/diagnostic"}:Project::check:check_file{file=file(Id(c0e))}: File `/Users/micha/astral/test/yaml/tomllib/_parser.py` was reparsed after being collected in the current Salsa revision
2025-07-10 12:54:16.627959000 DEBUG request{id=19 method="workspace/diagnostic"}:Project::check: Checking all files took 0.007s
```

Now, no more logs regarding reparsing
This commit is contained in:
Micha Reiser 2025-07-10 15:16:56 +02:00 committed by GitHub
parent 59114d0301
commit 87f6f08ef5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 82 additions and 76 deletions

View file

@ -257,8 +257,11 @@ impl Project {
tracing::debug_span!(parent: project_span, "check_file", ?file);
let _entered = check_file_span.entered();
let result = self.check_file_impl(&db, file);
file_diagnostics.lock().unwrap().extend(result);
let result = check_file_impl(&db, file);
file_diagnostics
.lock()
.unwrap()
.extend(result.iter().map(Clone::clone));
reporter.report_file(&file);
});
@ -285,7 +288,7 @@ impl Project {
return Vec::new();
}
self.check_file_impl(db, file)
check_file_impl(db, file).iter().map(Clone::clone).collect()
}
/// Opens a file in the project.
@ -466,71 +469,73 @@ impl Project {
self.set_file_set(db).to(IndexedFiles::lazy());
}
}
}
fn check_file_impl(self, db: &dyn Db, file: File) -> Vec<Diagnostic> {
let mut diagnostics: Vec<Diagnostic> = Vec::new();
#[salsa::tracked(returns(deref), heap_size=get_size2::GetSize::get_heap_size)]
pub(crate) fn check_file_impl(db: &dyn Db, file: File) -> Box<[Diagnostic]> {
let mut diagnostics: Vec<Diagnostic> = Vec::new();
// Abort checking if there are IO errors.
let source = source_text(db, file);
// Abort checking if there are IO errors.
let source = source_text(db, file);
if let Some(read_error) = source.read_error() {
diagnostics.push(
IOErrorDiagnostic {
file: Some(file),
error: read_error.clone().into(),
}
.to_diagnostic(),
);
return diagnostics;
}
let parsed = parsed_module(db, file);
let parsed_ref = parsed.load(db);
diagnostics.extend(
parsed_ref
.errors()
.iter()
.map(|error| Diagnostic::invalid_syntax(file, &error.error, error)),
);
diagnostics.extend(parsed_ref.unsupported_syntax_errors().iter().map(|error| {
let mut error = Diagnostic::invalid_syntax(file, error, error);
add_inferred_python_version_hint_to_diagnostic(db, &mut error, "parsing syntax");
error
}));
{
let db = AssertUnwindSafe(db);
match catch(&**db, file, || check_types(*db, file)) {
Ok(Some(type_check_diagnostics)) => {
diagnostics.extend(type_check_diagnostics.into_iter().cloned());
}
Ok(None) => {}
Err(diagnostic) => diagnostics.push(diagnostic),
if let Some(read_error) = source.read_error() {
diagnostics.push(
IOErrorDiagnostic {
file: Some(file),
error: read_error.clone().into(),
}
}
if self
.open_fileset(db)
.is_none_or(|files| !files.contains(&file))
{
// Drop the AST now that we are done checking this file. It is not currently open,
// so it is unlikely to be accessed again soon. If any queries need to access the AST
// from across files, it will be re-parsed.
parsed.clear();
}
diagnostics.sort_unstable_by_key(|diagnostic| {
diagnostic
.primary_span()
.and_then(|span| span.range())
.unwrap_or_default()
.start()
});
diagnostics
.to_diagnostic(),
);
return diagnostics.into_boxed_slice();
}
let parsed = parsed_module(db, file);
let parsed_ref = parsed.load(db);
diagnostics.extend(
parsed_ref
.errors()
.iter()
.map(|error| Diagnostic::invalid_syntax(file, &error.error, error)),
);
diagnostics.extend(parsed_ref.unsupported_syntax_errors().iter().map(|error| {
let mut error = Diagnostic::invalid_syntax(file, error, error);
add_inferred_python_version_hint_to_diagnostic(db, &mut error, "parsing syntax");
error
}));
{
let db = AssertUnwindSafe(db);
match catch(&**db, file, || check_types(*db, file)) {
Ok(Some(type_check_diagnostics)) => {
diagnostics.extend(type_check_diagnostics);
}
Ok(None) => {}
Err(diagnostic) => diagnostics.push(diagnostic),
}
}
if db
.project()
.open_fileset(db)
.is_none_or(|files| !files.contains(&file))
{
// Drop the AST now that we are done checking this file. It is not currently open,
// so it is unlikely to be accessed again soon. If any queries need to access the AST
// from across files, it will be re-parsed.
parsed.clear();
}
diagnostics.sort_unstable_by_key(|diagnostic| {
diagnostic
.primary_span()
.and_then(|span| span.range())
.unwrap_or_default()
.start()
});
diagnostics.into_boxed_slice()
}
#[derive(Debug)]
@ -701,8 +706,8 @@ where
#[cfg(test)]
mod tests {
use crate::Db;
use crate::ProjectMetadata;
use crate::check_file_impl;
use crate::db::tests::TestDb;
use ruff_db::Db as _;
use ruff_db::files::system_path_to_file;
@ -741,9 +746,8 @@ mod tests {
assert_eq!(source_text(&db, file).as_str(), "");
assert_eq!(
db.project()
.check_file_impl(&db, file)
.into_iter()
check_file_impl(&db, file)
.iter()
.map(|diagnostic| diagnostic.primary_message().to_string())
.collect::<Vec<_>>(),
vec!["Failed to read file: No such file or directory".to_string()]
@ -758,9 +762,8 @@ mod tests {
assert_eq!(source_text(&db, file).as_str(), "");
assert_eq!(
db.project()
.check_file_impl(&db, file)
.into_iter()
check_file_impl(&db, file)
.iter()
.map(|diagnostic| diagnostic.primary_message().to_string())
.collect::<Vec<_>>(),
vec![] as Vec<String>

View file

@ -88,8 +88,7 @@ mod definition;
#[cfg(test)]
mod property_tests;
#[salsa::tracked(returns(ref), heap_size=get_size2::GetSize::get_heap_size)]
pub fn check_types(db: &dyn Db, file: File) -> TypeCheckDiagnostics {
pub fn check_types(db: &dyn Db, file: File) -> Vec<Diagnostic> {
let _span = tracing::trace_span!("check_types", ?file).entered();
tracing::debug!("Checking file '{path}'", path = file.path(db));
@ -111,7 +110,7 @@ pub fn check_types(db: &dyn Db, file: File) -> TypeCheckDiagnostics {
check_suppressions(db, file, &mut diagnostics);
diagnostics
diagnostics.into_vec()
}
/// Infer the type of a binding.

View file

@ -1598,6 +1598,10 @@ impl TypeCheckDiagnostics {
self.diagnostics.shrink_to_fit();
}
pub(crate) fn into_vec(self) -> Vec<Diagnostic> {
self.diagnostics
}
pub fn iter(&self) -> std::slice::Iter<'_, Diagnostic> {
self.diagnostics.iter()
}

View file

@ -10035,7 +10035,7 @@ mod tests {
}
#[track_caller]
fn assert_diagnostic_messages(diagnostics: &TypeCheckDiagnostics, expected: &[&str]) {
fn assert_diagnostic_messages(diagnostics: &[Diagnostic], expected: &[&str]) {
let messages: Vec<&str> = diagnostics
.iter()
.map(Diagnostic::primary_message)
@ -10048,7 +10048,7 @@ mod tests {
let file = system_path_to_file(db, filename).unwrap();
let diagnostics = check_types(db, file);
assert_diagnostic_messages(diagnostics, expected);
assert_diagnostic_messages(&diagnostics, expected);
}
#[test]

View file

@ -340,7 +340,7 @@ fn run_test(
Err(failures) => return Some(failures),
};
diagnostics.extend(type_diagnostics.into_iter().cloned());
diagnostics.extend(type_diagnostics);
diagnostics.sort_by(|left, right| {
left.rendering_sort_key(db)
.cmp(&right.rendering_sort_key(db))