[ty] Implement stdlib stub mapping (#19529)

by using essentially the same logic for system site-packages, on the
assumption that system site-packages are always a subdir of the stdlib
we were looking for.
This commit is contained in:
Aria Desires 2025-08-08 15:52:15 -04:00 committed by GitHub
parent 0ec4801b0d
commit 7cc3f1ebe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 495 additions and 31 deletions

View file

@ -9,7 +9,7 @@ pub use resolver::{resolve_module, resolve_real_module};
use ruff_db::system::SystemPath;
use crate::Db;
use crate::module_resolver::resolver::search_paths;
use crate::module_resolver::resolver::{ModuleResolveMode, search_paths};
use resolver::SearchPathIterator;
mod module;
@ -23,7 +23,9 @@ mod testing;
/// Returns an iterator over all search paths pointing to a system path
pub fn system_module_search_paths(db: &dyn Db) -> SystemModuleSearchPathsIter<'_> {
SystemModuleSearchPathsIter {
inner: search_paths(db),
// Always run in `StubsAllowed` mode because we want to include as much as possible
// and we don't care about the "real" stdlib
inner: search_paths(db, ModuleResolveMode::StubsAllowed),
}
}

View file

@ -76,7 +76,8 @@ impl ModulePath {
SearchPathInner::Extra(search_path)
| SearchPathInner::FirstParty(search_path)
| SearchPathInner::SitePackages(search_path)
| SearchPathInner::Editable(search_path) => {
| SearchPathInner::Editable(search_path)
| SearchPathInner::StandardLibraryReal(search_path) => {
system_path_to_file(resolver.db, search_path.join(relative_path))
== Err(FileError::IsADirectory)
}
@ -119,6 +120,11 @@ impl ModulePath {
system_path_to_file(resolver.db, absolute_path.join("__init__.py")).is_ok()
|| system_path_to_file(resolver.db, absolute_path.join("__init__.pyi")).is_ok()
}
SearchPathInner::StandardLibraryReal(search_path) => {
let absolute_path = search_path.join(relative_path);
system_path_to_file(resolver.db, absolute_path.join("__init__.py")).is_ok()
}
SearchPathInner::StandardLibraryCustom(search_path) => {
match query_stdlib_version(relative_path, resolver) {
TypeshedVersionsQueryResult::DoesNotExist => false,
@ -152,7 +158,8 @@ impl ModulePath {
| SearchPathInner::FirstParty(search_path)
| SearchPathInner::SitePackages(search_path)
| SearchPathInner::Editable(search_path) => Some(search_path.join(relative_path)),
SearchPathInner::StandardLibraryCustom(stdlib_root) => {
SearchPathInner::StandardLibraryReal(stdlib_root)
| SearchPathInner::StandardLibraryCustom(stdlib_root) => {
Some(stdlib_root.join(relative_path))
}
SearchPathInner::StandardLibraryVendored(_) => None,
@ -173,6 +180,9 @@ impl ModulePath {
| SearchPathInner::Editable(search_path) => {
system_path_to_file(db, search_path.join(relative_path)).ok()
}
SearchPathInner::StandardLibraryReal(search_path) => {
system_path_to_file(db, search_path.join(relative_path)).ok()
}
SearchPathInner::StandardLibraryCustom(stdlib_root) => {
match query_stdlib_version(relative_path, resolver) {
TypeshedVersionsQueryResult::DoesNotExist => None,
@ -381,6 +391,7 @@ enum SearchPathInner {
FirstParty(SystemPathBuf),
StandardLibraryCustom(SystemPathBuf),
StandardLibraryVendored(VendoredPathBuf),
StandardLibraryReal(SystemPathBuf),
SitePackages(SystemPathBuf),
Editable(SystemPathBuf),
}
@ -391,11 +402,13 @@ enum SearchPathInner {
/// The different kinds of search paths are:
/// - "Extra" search paths: these go at the start of the module resolution order
/// - First-party search paths: the user code that we are directly invoked on.
/// - Standard-library search paths: these come in two different forms:
/// - Standard-library search paths: these come in three different forms:
/// - Custom standard-library search paths: paths provided by the user
/// pointing to a custom typeshed directory on disk
/// - Vendored standard-library search paths: paths pointing to a directory
/// in the vendored zip archive.
/// - Real standard-library search paths: path pointing to a directory
/// of the real python stdlib for the environment.
/// - Site-packages search paths: search paths that point to the `site-packages`
/// directory, in which packages are installed from ``PyPI``.
/// - Editable search paths: Additional search paths added to the end of the module
@ -468,6 +481,13 @@ impl SearchPath {
)))
}
/// Create a new search path pointing to the real stdlib of a python install
pub(crate) fn real_stdlib(system: &dyn System, root: SystemPathBuf) -> SearchPathResult<Self> {
Ok(Self(Arc::new(SearchPathInner::StandardLibraryReal(
Self::directory_path(system, root)?,
))))
}
/// Create a new search path pointing to the `site-packages` directory on disk
///
/// TODO: the validation done here is somewhat redundant given that `site-packages`
@ -504,7 +524,9 @@ impl SearchPath {
pub(crate) fn is_standard_library(&self) -> bool {
matches!(
&*self.0,
SearchPathInner::StandardLibraryCustom(_) | SearchPathInner::StandardLibraryVendored(_)
SearchPathInner::StandardLibraryCustom(_)
| SearchPathInner::StandardLibraryVendored(_)
| SearchPathInner::StandardLibraryReal(_)
)
}
@ -533,6 +555,7 @@ impl SearchPath {
SearchPathInner::Extra(search_path)
| SearchPathInner::FirstParty(search_path)
| SearchPathInner::StandardLibraryCustom(search_path)
| SearchPathInner::StandardLibraryReal(search_path)
| SearchPathInner::SitePackages(search_path)
| SearchPathInner::Editable(search_path) => {
path.strip_prefix(search_path)
@ -559,6 +582,7 @@ impl SearchPath {
SearchPathInner::Extra(_)
| SearchPathInner::FirstParty(_)
| SearchPathInner::StandardLibraryCustom(_)
| SearchPathInner::StandardLibraryReal(_)
| SearchPathInner::SitePackages(_)
| SearchPathInner::Editable(_) => None,
SearchPathInner::StandardLibraryVendored(search_path) => path
@ -577,6 +601,7 @@ impl SearchPath {
SearchPathInner::Extra(path)
| SearchPathInner::FirstParty(path)
| SearchPathInner::StandardLibraryCustom(path)
| SearchPathInner::StandardLibraryReal(path)
| SearchPathInner::SitePackages(path)
| SearchPathInner::Editable(path) => Some(path),
SearchPathInner::StandardLibraryVendored(_) => None,
@ -590,6 +615,7 @@ impl SearchPath {
SearchPathInner::Extra(_)
| SearchPathInner::FirstParty(_)
| SearchPathInner::StandardLibraryCustom(_)
| SearchPathInner::StandardLibraryReal(_)
| SearchPathInner::SitePackages(_)
| SearchPathInner::Editable(_) => None,
}
@ -651,6 +677,7 @@ impl fmt::Display for SearchPath {
| SearchPathInner::FirstParty(system_path_buf)
| SearchPathInner::SitePackages(system_path_buf)
| SearchPathInner::Editable(system_path_buf)
| SearchPathInner::StandardLibraryReal(system_path_buf)
| SearchPathInner::StandardLibraryCustom(system_path_buf) => system_path_buf.fmt(f),
SearchPathInner::StandardLibraryVendored(vendored_path_buf) => vendored_path_buf.fmt(f),
}

View file

@ -47,10 +47,25 @@ pub fn resolve_real_module<'db>(db: &'db dyn Db, module_name: &ModuleName) -> Op
/// Which files should be visible when doing a module query
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ModuleResolveMode {
/// Stubs are allowed to appear.
///
/// This is the "normal" mode almost everything uses, as type checkers are in fact supposed
/// to *prefer* stubs over the actual implementations.
StubsAllowed,
/// Stubs are not allowed to appear.
///
/// This is the "goto definition" mode, where we need to ignore the typing spec and find actual
/// implementations. When querying searchpaths this also notably replaces typeshed with
/// the "real" stdlib.
StubsNotAllowed,
}
#[salsa::interned]
#[derive(Debug)]
pub(crate) struct ModuleResolveModeIngredient<'db> {
mode: ModuleResolveMode,
}
impl ModuleResolveMode {
fn stubs_allowed(self) -> bool {
matches!(self, Self::StubsAllowed)
@ -124,7 +139,7 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option<Module<'_>> {
let path = SystemOrVendoredPathRef::try_from_file(db, file)?;
let module_name = search_paths(db).find_map(|candidate| {
let module_name = search_paths(db, ModuleResolveMode::StubsAllowed).find_map(|candidate| {
let relative_path = match path {
SystemOrVendoredPathRef::System(path) => candidate.relativize_system_path(path),
SystemOrVendoredPathRef::Vendored(path) => candidate.relativize_vendored_path(path),
@ -153,8 +168,8 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option<Module<'_>> {
}
}
pub(crate) fn search_paths(db: &dyn Db) -> SearchPathIterator<'_> {
Program::get(db).search_paths(db).iter(db)
pub(crate) fn search_paths(db: &dyn Db, resolve_mode: ModuleResolveMode) -> SearchPathIterator<'_> {
Program::get(db).search_paths(db).iter(db, resolve_mode)
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -164,7 +179,16 @@ pub struct SearchPaths {
/// config settings themselves change.
static_paths: Vec<SearchPath>,
/// site-packages paths are not included in the above field:
/// Path to typeshed, which should come immediately after static paths.
///
/// This can currently only be None if the `SystemPath` this points to is already in `static_paths`.
stdlib_path: Option<SearchPath>,
/// Path to the real stdlib, this replaces typeshed (`stdlib_path`) for goto-definition searches
/// ([`ModuleResolveMode::StubsNotAllowed`]).
real_stdlib_path: Option<SearchPath>,
/// site-packages paths are not included in the above fields:
/// if there are multiple site-packages paths, editable installations can appear
/// *between* the site-packages paths on `sys.path` at runtime.
/// That means we can't know where a second or third `site-packages` path should sit
@ -198,6 +222,7 @@ impl SearchPaths {
src_roots,
custom_typeshed: typeshed,
site_packages_paths,
real_stdlib_path,
} = settings;
let mut static_paths = vec![];
@ -240,7 +265,11 @@ impl SearchPaths {
)
};
static_paths.push(stdlib_path);
let real_stdlib_path = if let Some(path) = real_stdlib_path {
Some(SearchPath::real_stdlib(system, path.clone())?)
} else {
None
};
let mut site_packages: Vec<_> = Vec::with_capacity(site_packages_paths.len());
@ -273,8 +302,37 @@ impl SearchPaths {
}
});
// Users probably shouldn't do this but... if they've shadowed their stdlib we should deduplicate it away.
// This notably will mess up anything that checks if a search path "is the standard library" as we won't
// "remember" that fact for static paths.
//
// (We used to shove these into static_paths, so the above retain implicitly did this. I am opting to
// preserve this behaviour to avoid getting into the weeds of corner cases.)
let stdlib_path_is_shadowed = stdlib_path
.as_system_path()
.map(|path| seen_paths.contains(path))
.unwrap_or(false);
let real_stdlib_path_is_shadowed = real_stdlib_path
.as_ref()
.and_then(SearchPath::as_system_path)
.map(|path| seen_paths.contains(path))
.unwrap_or(false);
let stdlib_path = if stdlib_path_is_shadowed {
None
} else {
Some(stdlib_path)
};
let real_stdlib_path = if real_stdlib_path_is_shadowed {
None
} else {
real_stdlib_path
};
Ok(SearchPaths {
static_paths,
stdlib_path,
real_stdlib_path,
site_packages,
typeshed_versions,
})
@ -291,22 +349,32 @@ impl SearchPaths {
}
}
pub(super) fn iter<'a>(&'a self, db: &'a dyn Db) -> SearchPathIterator<'a> {
pub(super) fn iter<'a>(
&'a self,
db: &'a dyn Db,
mode: ModuleResolveMode,
) -> SearchPathIterator<'a> {
let stdlib_path = self.stdlib(mode);
SearchPathIterator {
db,
static_paths: self.static_paths.iter(),
stdlib_path,
dynamic_paths: None,
mode: ModuleResolveModeIngredient::new(db, mode),
}
}
pub(crate) fn stdlib(&self, mode: ModuleResolveMode) -> Option<&SearchPath> {
match mode {
ModuleResolveMode::StubsAllowed => self.stdlib_path.as_ref(),
ModuleResolveMode::StubsNotAllowed => self.real_stdlib_path.as_ref(),
}
}
pub(crate) fn custom_stdlib(&self) -> Option<&SystemPath> {
self.static_paths.iter().find_map(|search_path| {
if search_path.is_standard_library() {
search_path.as_system_path()
} else {
None
}
})
self.stdlib_path
.as_ref()
.and_then(SearchPath::as_system_path)
}
pub(crate) fn typeshed_versions(&self) -> &TypeshedVersions {
@ -323,13 +391,18 @@ impl SearchPaths {
/// should come between the two `site-packages` directories when it comes to
/// module-resolution priority.
#[salsa::tracked(returns(deref), heap_size=ruff_memory_usage::heap_size)]
pub(crate) fn dynamic_resolution_paths(db: &dyn Db) -> Vec<SearchPath> {
pub(crate) fn dynamic_resolution_paths<'db>(
db: &'db dyn Db,
mode: ModuleResolveModeIngredient<'db>,
) -> Vec<SearchPath> {
tracing::debug!("Resolving dynamic module resolution paths");
let SearchPaths {
static_paths,
stdlib_path,
site_packages,
typeshed_versions: _,
real_stdlib_path,
} = Program::get(db).search_paths(db);
let mut dynamic_paths = Vec::new();
@ -344,6 +417,15 @@ pub(crate) fn dynamic_resolution_paths(db: &dyn Db) -> Vec<SearchPath> {
.map(Cow::Borrowed)
.collect();
// Use the `ModuleResolveMode` to determine which stdlib (if any) to mark as existing
let stdlib = match mode.mode(db) {
ModuleResolveMode::StubsAllowed => stdlib_path,
ModuleResolveMode::StubsNotAllowed => real_stdlib_path,
};
if let Some(path) = stdlib.as_ref().and_then(SearchPath::as_system_path) {
existing_paths.insert(Cow::Borrowed(path));
}
let files = db.files();
let system = db.system();
@ -429,7 +511,9 @@ pub(crate) fn dynamic_resolution_paths(db: &dyn Db) -> Vec<SearchPath> {
pub(crate) struct SearchPathIterator<'db> {
db: &'db dyn Db,
static_paths: std::slice::Iter<'db, SearchPath>,
stdlib_path: Option<&'db SearchPath>,
dynamic_paths: Option<std::slice::Iter<'db, SearchPath>>,
mode: ModuleResolveModeIngredient<'db>,
}
impl<'db> Iterator for SearchPathIterator<'db> {
@ -439,14 +523,19 @@ impl<'db> Iterator for SearchPathIterator<'db> {
let SearchPathIterator {
db,
static_paths,
stdlib_path,
mode,
dynamic_paths,
} = self;
static_paths.next().or_else(|| {
dynamic_paths
.get_or_insert_with(|| dynamic_resolution_paths(*db).iter())
.next()
})
static_paths
.next()
.or_else(|| stdlib_path.take())
.or_else(|| {
dynamic_paths
.get_or_insert_with(|| dynamic_resolution_paths(*db, *mode).iter())
.next()
})
}
}
@ -583,7 +672,7 @@ fn resolve_name(db: &dyn Db, name: &ModuleName, mode: ModuleResolveMode) -> Opti
let stub_name = name.to_stub_package();
let mut is_namespace_package = false;
for search_path in search_paths(db) {
for search_path in search_paths(db, mode) {
// When a builtin module is imported, standard module resolution is bypassed:
// the module name always resolves to the stdlib module,
// even if there's a module of the same name in the first-party root
@ -974,9 +1063,7 @@ mod tests {
use ruff_db::Db;
use ruff_db::files::{File, FilePath, system_path_to_file};
use ruff_db::system::{DbWithTestSystem as _, DbWithWritableSystem as _};
use ruff_db::testing::{
assert_const_function_query_was_not_run, assert_function_query_was_not_run,
};
use ruff_db::testing::assert_function_query_was_not_run;
use ruff_python_ast::PythonVersion;
use crate::db::tests::TestDb;
@ -1908,7 +1995,12 @@ not_a_directory
&FilePath::system("/y/src/bar.py")
);
let events = db.take_salsa_events();
assert_const_function_query_was_not_run(&db, dynamic_resolution_paths, &events);
assert_function_query_was_not_run(
&db,
dynamic_resolution_paths,
ModuleResolveModeIngredient::new(&db, ModuleResolveMode::StubsAllowed),
&events,
);
}
#[test]
@ -1977,7 +2069,8 @@ not_a_directory
.with_site_packages_files(&[("_foo.pth", "/src")])
.build();
let search_paths: Vec<&SearchPath> = search_paths(&db).collect();
let search_paths: Vec<&SearchPath> =
search_paths(&db, ModuleResolveMode::StubsAllowed).collect();
assert!(search_paths.contains(
&&SearchPath::first_party(db.system(), SystemPathBuf::from("/src")).unwrap()