[ty] Re-arrange "list modules" implementation for Salsa caching

This basically splits `list_modules` into a higher level "aggregation"
routine and a lower level "get modules for one search path" routine.
This permits Salsa to cache the lower level components, e.g., many
search paths refer to directories that rarely change. This saves us
interaction with the system.

This did require a fair bit of surgery in terms of being careful about
adding file roots. Namely, now that we rely even more on file roots
existing for correct handling of cache invalidation, there were several
spots in our code that needed to be updated to add roots (that we
weren't previously doing). This feels Not Great, and it would be better
if we had some kind of abstraction that handled this for us. But it
isn't clear to me at this time what that looks like.
This commit is contained in:
Andrew Gallant 2025-08-14 14:18:40 -04:00 committed by Andrew Gallant
parent 468eb37d75
commit ddd4bab67c
4 changed files with 177 additions and 47 deletions

View file

@ -12,25 +12,69 @@ use super::resolver::{
ModuleResolveMode, ResolverContext, is_non_shadowable, resolve_file_module, search_paths,
};
/// List all available modules.
/// List all available top-level modules.
#[salsa::tracked]
pub fn list_modules(db: &dyn Db) -> Vec<Module<'_>> {
let mut lister = Lister::new(db);
let mut modules = BTreeMap::new();
for search_path in search_paths(db, ModuleResolveMode::StubsAllowed) {
match search_path.as_path() {
SystemOrVendoredPathRef::System(system_search_path) => {
let Ok(it) = db.system().read_directory(system_search_path) else {
continue;
};
for result in it {
let Ok(entry) = result else { continue };
lister.add_path(search_path, &entry.path().into(), entry.file_type().into());
for module in list_modules_in(db, SearchPathIngredient::new(db, search_path.clone())) {
match modules.entry(module.name(db)) {
Entry::Vacant(entry) => {
entry.insert(module);
}
Entry::Occupied(mut entry) => {
// The only case where a module can override
// a module with the same name in a higher
// precedent search path is if the higher
// precedent search path contained a namespace
// package and the lower precedent search path
// contained a "regular" module.
if let (None, Some(_)) = (entry.get().search_path(db), module.search_path(db)) {
entry.insert(module);
}
}
}
SystemOrVendoredPathRef::Vendored(vendored_search_path) => {
for entry in db.vendored().read_directory(vendored_search_path) {
lister.add_path(search_path, &entry.path().into(), entry.file_type().into());
}
}
}
modules.into_values().collect()
}
#[salsa::tracked(debug, heap_size=ruff_memory_usage::heap_size)]
struct SearchPathIngredient<'db> {
#[returns(ref)]
path: SearchPath,
}
/// List all available top-level modules in the given `SearchPath`.
#[salsa::tracked]
fn list_modules_in<'db>(
db: &'db dyn Db,
search_path: SearchPathIngredient<'db>,
) -> Vec<Module<'db>> {
let mut lister = Lister::new(db, search_path.path(db));
match search_path.path(db).as_path() {
SystemOrVendoredPathRef::System(system_search_path) => {
// Read the revision on the corresponding file root to
// register an explicit dependency on this directory. When
// the revision gets bumped, the cache that Salsa creates
// for this routine will be invalidated.
let root = db
.files()
.root(db, system_search_path)
.expect("System search path should have a registered root");
let _ = root.revision(db);
let Ok(it) = db.system().read_directory(system_search_path) else {
return vec![];
};
for result in it {
let Ok(entry) = result else { continue };
lister.add_path(&entry.path().into(), entry.file_type().into());
}
}
SystemOrVendoredPathRef::Vendored(vendored_search_path) => {
for entry in db.vendored().read_directory(vendored_search_path) {
lister.add_path(&entry.path().into(), entry.file_type().into());
}
}
}
@ -47,17 +91,19 @@ pub fn list_modules(db: &dyn Db) -> Vec<Module<'_>> {
struct Lister<'db> {
db: &'db dyn Db,
program: Program,
search_path: &'db SearchPath,
modules: BTreeMap<&'db ModuleName, Module<'db>>,
}
impl<'db> Lister<'db> {
/// Create new state that can accumulate modules from a list
/// of file paths.
fn new(db: &'db dyn Db) -> Lister<'db> {
fn new(db: &'db dyn Db, search_path: &'db SearchPath) -> Lister<'db> {
let program = Program::get(db);
Lister {
db,
program,
search_path,
modules: BTreeMap::new(),
}
}
@ -67,19 +113,17 @@ impl<'db> Lister<'db> {
self.modules.into_values().collect()
}
/// Add the given `path` (from `search_path`) as a possible
/// module to this lister. The `file_type` should be the type
/// of `path` (file, directory or symlink).
/// Add the given `path` as a possible module to this lister. The
/// `file_type` should be the type of `path` (file, directory or
/// symlink).
///
/// This may decide that the given path does not correspond to
/// a valid Python module. In which case, it is dropped and this
/// is a no-op.
fn add_path(
&mut self,
search_path: &SearchPath,
path: &SystemOrVendoredPathRef<'_>,
file_type: FileType,
) {
///
/// Callers must ensure that the path given came from the same
/// `SearchPath` used to create this `Lister`.
fn add_path(&mut self, path: &SystemOrVendoredPathRef<'_>, file_type: FileType) {
let mut has_py_extension = false;
// We must have no extension, a Python source file extension (`.py`)
// or a Python stub file extension (`.pyi`).
@ -91,7 +135,7 @@ impl<'db> Lister<'db> {
}
let Some(name) = path.file_name() else { return };
let mut module_path = search_path.to_module_path();
let mut module_path = self.search_path.to_module_path();
module_path.push(name);
let Some(module_name) = module_path.to_module_name() else {
return;
@ -99,7 +143,7 @@ impl<'db> Lister<'db> {
// Some modules cannot shadow a subset of special
// modules from the standard library.
if !search_path.is_standard_library() && self.is_non_shadowable(&module_name) {
if !self.search_path.is_standard_library() && self.is_non_shadowable(&module_name) {
return;
}
@ -113,7 +157,7 @@ impl<'db> Lister<'db> {
self.db,
module_name,
ModuleKind::Package,
search_path.clone(),
self.search_path.clone(),
file,
),
);
@ -152,7 +196,7 @@ impl<'db> Lister<'db> {
let is_dir =
file_type.is_definitely_directory() || module_path.is_directory(&self.context());
if is_dir {
if !search_path.is_standard_library() {
if !self.search_path.is_standard_library() {
self.add_module(
&module_path,
Module::namespace_package(self.db, module_name),
@ -185,7 +229,7 @@ impl<'db> Lister<'db> {
self.db,
module_name,
ModuleKind::Module,
search_path.clone(),
self.search_path.clone(),
file,
),
);
@ -214,15 +258,14 @@ impl<'db> Lister<'db> {
(None, Some(_)) => {
entry.insert(module);
}
(Some(search_path_existing), Some(search_path_new)) => {
(Some(_), Some(_)) => {
// Merging across search paths is only necessary for
// namespace packages. For all other modules, entries
// from earlier search paths take precedence. Thus, all
// of the cases below require that we're in the same
// directory.
if search_path_existing != search_path_new {
return;
}
// directory. ... Which is true here, because a `Lister`
// only works for one specific search path.
// When we have a `foo/__init__.py` and a `foo.py` in
// the same directory, the former takes precedent.
// (This case can only occur when both have a search
@ -322,7 +365,7 @@ fn is_python_extension(ext: &str) -> bool {
mod tests {
use camino::{Utf8Component, Utf8Path};
use ruff_db::Db as _;
use ruff_db::files::{File, FilePath};
use ruff_db::files::{File, FilePath, FileRootKind};
use ruff_db::system::{
DbWithTestSystem, DbWithWritableSystem, OsSystem, SystemPath, SystemPathBuf,
};
@ -906,6 +949,12 @@ mod tests {
std::fs::write(foo.as_std_path(), "")?;
std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?;
db.files().try_add_root(&db, &src, FileRootKind::Project);
db.files()
.try_add_root(&db, &site_packages, FileRootKind::LibrarySearchPath);
db.files()
.try_add_root(&db, &custom_typeshed, FileRootKind::LibrarySearchPath);
Program::from_settings(
&db,
ProgramSettings {
@ -966,11 +1015,13 @@ mod tests {
// Now write the foo file
db.write_file(&foo_path, "x = 1")?;
// FIXME: This is obviously wrong. The Salsa cache
// isn't being invalidated.
insta::assert_debug_snapshot!(
list_snapshot(&db),
@"[]",
@r#"
[
Module::File("foo", "first-party", "/src/foo.py", Module, None),
]
"#,
);
Ok(())
@ -1090,14 +1141,11 @@ mod tests {
let src_functools_path = src.join("functools.py");
db.write_file(&src_functools_path, "FOO: int").unwrap();
// FIXME: This looks wrong! This is a cache invalidation
// problem, not a logic problem in the "list modules"
// implementation.
insta::assert_debug_snapshot!(
list_snapshot(&db),
@r#"
[
Module::File("functools", "std-custom", "/typeshed/stdlib/functools.pyi", Module, None),
Module::File("functools", "first-party", "/src/functools.py", Module, None),
]
"#,
);
@ -1157,6 +1205,7 @@ mod tests {
let TestCase { mut db, .. } = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/x")
.build();
db.write_files(x_directory).unwrap();
@ -1181,6 +1230,7 @@ mod tests {
let TestCase { mut db, .. } = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/y/src")
.build();
db.write_files(external_files).unwrap();
@ -1207,6 +1257,7 @@ mod tests {
let TestCase { db, .. } = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/x")
.build();
insta::assert_debug_snapshot!(
@ -1246,6 +1297,9 @@ not_a_directory
let TestCase { mut db, .. } = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/x/y/src")
.with_library_root("/")
.with_library_root("/baz")
.build();
db.write_files(root_files).unwrap();
@ -1279,6 +1333,8 @@ not_a_directory
let TestCase { mut db, .. } = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/x")
.with_library_root("/y")
.build();
db.write_files(external_directories).unwrap();
@ -1325,6 +1381,7 @@ not_a_directory
..
} = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/x")
.build();
db.write_files(x_directory).unwrap();
@ -1360,6 +1417,7 @@ not_a_directory
let TestCase { mut db, .. } = TestCaseBuilder::new()
.with_site_packages_files(SITE_PACKAGES)
.with_library_root("/x")
.build();
let src_path = SystemPathBuf::from("/x/src");
@ -1411,6 +1469,15 @@ not_a_directory
])
.unwrap();
db.files()
.try_add_root(&db, SystemPath::new("/src"), FileRootKind::Project);
db.files()
.try_add_root(&db, &venv_site_packages, FileRootKind::LibrarySearchPath);
db.files()
.try_add_root(&db, &system_site_packages, FileRootKind::LibrarySearchPath);
db.files()
.try_add_root(&db, SystemPath::new("/x"), FileRootKind::LibrarySearchPath);
Program::from_settings(
&db,
ProgramSettings {
@ -1497,6 +1564,8 @@ not_a_directory
std::os::unix::fs::symlink(a_package_target.as_std_path(), a_src.as_std_path())
.context("Failed to symlink `src/a` to `a-package`")?;
db.files().try_add_root(&db, &root, FileRootKind::Project);
Program::from_settings(
&db,
ProgramSettings {
@ -1535,6 +1604,11 @@ not_a_directory
let mut db = TestDb::new();
db.write_file(&installed_foo_module, "").unwrap();
db.files()
.try_add_root(&db, &project_directory, FileRootKind::Project);
db.files()
.try_add_root(&db, &site_packages, FileRootKind::LibrarySearchPath);
Program::from_settings(
&db,
ProgramSettings {

View file

@ -433,13 +433,16 @@ pub(crate) fn dynamic_resolution_paths<'db>(
let site_packages_dir = site_packages_search_path
.as_system_path()
.expect("Expected site package path to be a system path");
let site_packages_dir = system
.canonicalize_path(site_packages_dir)
.unwrap_or_else(|_| site_packages_dir.to_path_buf());
if !existing_paths.insert(Cow::Borrowed(site_packages_dir)) {
if !existing_paths.insert(Cow::Owned(site_packages_dir.clone())) {
continue;
}
let site_packages_root = files
.root(db, site_packages_dir)
.root(db, &site_packages_dir)
.expect("Site-package root to have been created");
// This query needs to be re-executed each time a `.pth` file
@ -457,7 +460,7 @@ pub(crate) fn dynamic_resolution_paths<'db>(
// containing a (relative or absolute) path.
// Each of these paths may point to an editable install of a package,
// so should be considered an additional search path.
let pth_file_iterator = match PthFileIterator::new(db, site_packages_dir) {
let pth_file_iterator = match PthFileIterator::new(db, &site_packages_dir) {
Ok(iterator) => iterator,
Err(error) => {
tracing::warn!(

View file

@ -1,4 +1,5 @@
use ruff_db::Db;
use ruff_db::files::FileRootKind;
use ruff_db::system::{
DbWithTestSystem as _, DbWithWritableSystem as _, SystemPath, SystemPathBuf,
};
@ -107,6 +108,14 @@ pub(crate) struct TestCaseBuilder<T> {
python_platform: PythonPlatform,
first_party_files: Vec<FileSpec>,
site_packages_files: Vec<FileSpec>,
// Additional file roots (beyond site_packages, src and stdlib)
// that should be registered with the `Db` abstraction.
//
// This is necessary to make testing "list modules" work. Namely,
// "list modules" relies on caching via a file root's revision,
// and if file roots aren't registered, then the implementation
// can't access the root's revision.
roots: Vec<SystemPathBuf>,
}
impl<T> TestCaseBuilder<T> {
@ -128,6 +137,12 @@ impl<T> TestCaseBuilder<T> {
self
}
/// Add a "library" root to this test case.
pub(crate) fn with_library_root(mut self, root: impl AsRef<SystemPath>) -> Self {
self.roots.push(root.as_ref().to_path_buf());
self
}
fn write_mock_directory(
db: &mut TestDb,
location: impl AsRef<SystemPath>,
@ -154,6 +169,7 @@ impl TestCaseBuilder<UnspecifiedTypeshed> {
python_platform: PythonPlatform::default(),
first_party_files: vec![],
site_packages_files: vec![],
roots: vec![],
}
}
@ -165,6 +181,7 @@ impl TestCaseBuilder<UnspecifiedTypeshed> {
python_platform,
first_party_files,
site_packages_files,
roots,
} = self;
TestCaseBuilder {
typeshed_option: VendoredTypeshed,
@ -172,6 +189,7 @@ impl TestCaseBuilder<UnspecifiedTypeshed> {
python_platform,
first_party_files,
site_packages_files,
roots,
}
}
@ -186,6 +204,7 @@ impl TestCaseBuilder<UnspecifiedTypeshed> {
python_platform,
first_party_files,
site_packages_files,
roots,
} = self;
TestCaseBuilder {
@ -194,6 +213,7 @@ impl TestCaseBuilder<UnspecifiedTypeshed> {
python_platform,
first_party_files,
site_packages_files,
roots,
}
}
@ -224,6 +244,7 @@ impl TestCaseBuilder<MockedTypeshed> {
python_platform,
first_party_files,
site_packages_files,
roots,
} = self;
let mut db = TestDb::new();
@ -233,6 +254,19 @@ impl TestCaseBuilder<MockedTypeshed> {
let src = Self::write_mock_directory(&mut db, "/src", first_party_files);
let typeshed = Self::build_typeshed_mock(&mut db, &typeshed_option);
// This root is needed for correct Salsa tracking.
// Namely, a `SearchPath` is treated as an input, and
// thus the revision number must be bumped accordingly
// when the directory tree changes. We rely on detecting
// this revision from the file root. If we don't add them
// here, they won't get added.
//
// Roots for other search paths are added as part of
// search path initialization in `Program::from_settings`,
// and any remaining are added below.
db.files()
.try_add_root(&db, SystemPath::new("/src"), FileRootKind::Project);
Program::from_settings(
&db,
ProgramSettings {
@ -251,10 +285,17 @@ impl TestCaseBuilder<MockedTypeshed> {
},
);
let stdlib = typeshed.join("stdlib");
db.files()
.try_add_root(&db, &stdlib, FileRootKind::LibrarySearchPath);
for root in &roots {
db.files()
.try_add_root(&db, root, FileRootKind::LibrarySearchPath);
}
TestCase {
db,
src,
stdlib: typeshed.join("stdlib"),
stdlib,
site_packages,
python_version,
}
@ -286,6 +327,7 @@ impl TestCaseBuilder<VendoredTypeshed> {
python_platform,
first_party_files,
site_packages_files,
roots,
} = self;
let mut db = TestDb::new();
@ -294,6 +336,9 @@ impl TestCaseBuilder<VendoredTypeshed> {
Self::write_mock_directory(&mut db, "/site-packages", site_packages_files);
let src = Self::write_mock_directory(&mut db, "/src", first_party_files);
db.files()
.try_add_root(&db, SystemPath::new("/src"), FileRootKind::Project);
Program::from_settings(
&db,
ProgramSettings {
@ -311,6 +356,10 @@ impl TestCaseBuilder<VendoredTypeshed> {
},
);
for root in &roots {
db.files()
.try_add_root(&db, root, FileRootKind::LibrarySearchPath);
}
TestCase {
db,
src,