From 7cc3f1ebe9386e77e7009bc411fc6480d3851015 Mon Sep 17 00:00:00 2001 From: Aria Desires Date: Fri, 8 Aug 2025 15:52:15 -0400 Subject: [PATCH] [ty] Implement stdlib stub mapping (#19529) by using essentially the same logic for system site-packages, on the assumption that system site-packages are always a subdir of the stdlib we were looking for. --- crates/ty_project/src/metadata/options.rs | 10 + .../src/module_resolver/mod.rs | 6 +- .../src/module_resolver/path.rs | 35 +- .../src/module_resolver/resolver.rs | 143 ++++++-- crates/ty_python_semantic/src/program.rs | 7 + .../ty_python_semantic/src/site_packages.rs | 324 ++++++++++++++++++ crates/ty_test/src/lib.rs | 1 + 7 files changed, 495 insertions(+), 31 deletions(-) diff --git a/crates/ty_project/src/metadata/options.rs b/crates/ty_project/src/metadata/options.rs index 47dbbe19f6..83c23cf791 100644 --- a/crates/ty_project/src/metadata/options.rs +++ b/crates/ty_project/src/metadata/options.rs @@ -166,6 +166,13 @@ impl Options { SitePackagesPaths::default() }; + let real_stdlib_path = python_environment.as_ref().and_then(|python_environment| { + // For now this is considered non-fatal, we don't Need this for anything. + python_environment.real_stdlib_path(system).map_err(|err| { + tracing::info!("No real stdlib found, stdlib goto-definition may have degraded quality: {err}"); + }).ok() + }); + let python_version = options_python_version .or_else(|| { python_environment @@ -180,6 +187,7 @@ impl Options { project_root, project_name, site_packages_paths, + real_stdlib_path, system, vendored, )?; @@ -201,6 +209,7 @@ impl Options { project_root: &SystemPath, project_name: &str, site_packages_paths: SitePackagesPaths, + real_stdlib_path: Option, system: &dyn System, vendored: &VendoredFileSystem, ) -> Result { @@ -273,6 +282,7 @@ impl Options { .as_ref() .map(|path| path.absolute(project_root, system)), site_packages_paths: site_packages_paths.into_vec(), + real_stdlib_path, }; settings.to_search_paths(system, vendored) diff --git a/crates/ty_python_semantic/src/module_resolver/mod.rs b/crates/ty_python_semantic/src/module_resolver/mod.rs index 02992026f8..5fb2fa1a52 100644 --- a/crates/ty_python_semantic/src/module_resolver/mod.rs +++ b/crates/ty_python_semantic/src/module_resolver/mod.rs @@ -9,7 +9,7 @@ pub use resolver::{resolve_module, resolve_real_module}; use ruff_db::system::SystemPath; use crate::Db; -use crate::module_resolver::resolver::search_paths; +use crate::module_resolver::resolver::{ModuleResolveMode, search_paths}; use resolver::SearchPathIterator; mod module; @@ -23,7 +23,9 @@ mod testing; /// Returns an iterator over all search paths pointing to a system path pub fn system_module_search_paths(db: &dyn Db) -> SystemModuleSearchPathsIter<'_> { SystemModuleSearchPathsIter { - inner: search_paths(db), + // Always run in `StubsAllowed` mode because we want to include as much as possible + // and we don't care about the "real" stdlib + inner: search_paths(db, ModuleResolveMode::StubsAllowed), } } diff --git a/crates/ty_python_semantic/src/module_resolver/path.rs b/crates/ty_python_semantic/src/module_resolver/path.rs index 290b20b1fa..ff07b0a7c4 100644 --- a/crates/ty_python_semantic/src/module_resolver/path.rs +++ b/crates/ty_python_semantic/src/module_resolver/path.rs @@ -76,7 +76,8 @@ impl ModulePath { SearchPathInner::Extra(search_path) | SearchPathInner::FirstParty(search_path) | SearchPathInner::SitePackages(search_path) - | SearchPathInner::Editable(search_path) => { + | SearchPathInner::Editable(search_path) + | SearchPathInner::StandardLibraryReal(search_path) => { system_path_to_file(resolver.db, search_path.join(relative_path)) == Err(FileError::IsADirectory) } @@ -119,6 +120,11 @@ impl ModulePath { system_path_to_file(resolver.db, absolute_path.join("__init__.py")).is_ok() || system_path_to_file(resolver.db, absolute_path.join("__init__.pyi")).is_ok() } + SearchPathInner::StandardLibraryReal(search_path) => { + let absolute_path = search_path.join(relative_path); + + system_path_to_file(resolver.db, absolute_path.join("__init__.py")).is_ok() + } SearchPathInner::StandardLibraryCustom(search_path) => { match query_stdlib_version(relative_path, resolver) { TypeshedVersionsQueryResult::DoesNotExist => false, @@ -152,7 +158,8 @@ impl ModulePath { | SearchPathInner::FirstParty(search_path) | SearchPathInner::SitePackages(search_path) | SearchPathInner::Editable(search_path) => Some(search_path.join(relative_path)), - SearchPathInner::StandardLibraryCustom(stdlib_root) => { + SearchPathInner::StandardLibraryReal(stdlib_root) + | SearchPathInner::StandardLibraryCustom(stdlib_root) => { Some(stdlib_root.join(relative_path)) } SearchPathInner::StandardLibraryVendored(_) => None, @@ -173,6 +180,9 @@ impl ModulePath { | SearchPathInner::Editable(search_path) => { system_path_to_file(db, search_path.join(relative_path)).ok() } + SearchPathInner::StandardLibraryReal(search_path) => { + system_path_to_file(db, search_path.join(relative_path)).ok() + } SearchPathInner::StandardLibraryCustom(stdlib_root) => { match query_stdlib_version(relative_path, resolver) { TypeshedVersionsQueryResult::DoesNotExist => None, @@ -381,6 +391,7 @@ enum SearchPathInner { FirstParty(SystemPathBuf), StandardLibraryCustom(SystemPathBuf), StandardLibraryVendored(VendoredPathBuf), + StandardLibraryReal(SystemPathBuf), SitePackages(SystemPathBuf), Editable(SystemPathBuf), } @@ -391,11 +402,13 @@ enum SearchPathInner { /// The different kinds of search paths are: /// - "Extra" search paths: these go at the start of the module resolution order /// - First-party search paths: the user code that we are directly invoked on. -/// - Standard-library search paths: these come in two different forms: +/// - Standard-library search paths: these come in three different forms: /// - Custom standard-library search paths: paths provided by the user /// pointing to a custom typeshed directory on disk /// - Vendored standard-library search paths: paths pointing to a directory /// in the vendored zip archive. +/// - Real standard-library search paths: path pointing to a directory +/// of the real python stdlib for the environment. /// - Site-packages search paths: search paths that point to the `site-packages` /// directory, in which packages are installed from ``PyPI``. /// - Editable search paths: Additional search paths added to the end of the module @@ -468,6 +481,13 @@ impl SearchPath { ))) } + /// Create a new search path pointing to the real stdlib of a python install + pub(crate) fn real_stdlib(system: &dyn System, root: SystemPathBuf) -> SearchPathResult { + Ok(Self(Arc::new(SearchPathInner::StandardLibraryReal( + Self::directory_path(system, root)?, + )))) + } + /// Create a new search path pointing to the `site-packages` directory on disk /// /// TODO: the validation done here is somewhat redundant given that `site-packages` @@ -504,7 +524,9 @@ impl SearchPath { pub(crate) fn is_standard_library(&self) -> bool { matches!( &*self.0, - SearchPathInner::StandardLibraryCustom(_) | SearchPathInner::StandardLibraryVendored(_) + SearchPathInner::StandardLibraryCustom(_) + | SearchPathInner::StandardLibraryVendored(_) + | SearchPathInner::StandardLibraryReal(_) ) } @@ -533,6 +555,7 @@ impl SearchPath { SearchPathInner::Extra(search_path) | SearchPathInner::FirstParty(search_path) | SearchPathInner::StandardLibraryCustom(search_path) + | SearchPathInner::StandardLibraryReal(search_path) | SearchPathInner::SitePackages(search_path) | SearchPathInner::Editable(search_path) => { path.strip_prefix(search_path) @@ -559,6 +582,7 @@ impl SearchPath { SearchPathInner::Extra(_) | SearchPathInner::FirstParty(_) | SearchPathInner::StandardLibraryCustom(_) + | SearchPathInner::StandardLibraryReal(_) | SearchPathInner::SitePackages(_) | SearchPathInner::Editable(_) => None, SearchPathInner::StandardLibraryVendored(search_path) => path @@ -577,6 +601,7 @@ impl SearchPath { SearchPathInner::Extra(path) | SearchPathInner::FirstParty(path) | SearchPathInner::StandardLibraryCustom(path) + | SearchPathInner::StandardLibraryReal(path) | SearchPathInner::SitePackages(path) | SearchPathInner::Editable(path) => Some(path), SearchPathInner::StandardLibraryVendored(_) => None, @@ -590,6 +615,7 @@ impl SearchPath { SearchPathInner::Extra(_) | SearchPathInner::FirstParty(_) | SearchPathInner::StandardLibraryCustom(_) + | SearchPathInner::StandardLibraryReal(_) | SearchPathInner::SitePackages(_) | SearchPathInner::Editable(_) => None, } @@ -651,6 +677,7 @@ impl fmt::Display for SearchPath { | SearchPathInner::FirstParty(system_path_buf) | SearchPathInner::SitePackages(system_path_buf) | SearchPathInner::Editable(system_path_buf) + | SearchPathInner::StandardLibraryReal(system_path_buf) | SearchPathInner::StandardLibraryCustom(system_path_buf) => system_path_buf.fmt(f), SearchPathInner::StandardLibraryVendored(vendored_path_buf) => vendored_path_buf.fmt(f), } diff --git a/crates/ty_python_semantic/src/module_resolver/resolver.rs b/crates/ty_python_semantic/src/module_resolver/resolver.rs index 8c05e10356..d178105990 100644 --- a/crates/ty_python_semantic/src/module_resolver/resolver.rs +++ b/crates/ty_python_semantic/src/module_resolver/resolver.rs @@ -47,10 +47,25 @@ pub fn resolve_real_module<'db>(db: &'db dyn Db, module_name: &ModuleName) -> Op /// Which files should be visible when doing a module query #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) enum ModuleResolveMode { + /// Stubs are allowed to appear. + /// + /// This is the "normal" mode almost everything uses, as type checkers are in fact supposed + /// to *prefer* stubs over the actual implementations. StubsAllowed, + /// Stubs are not allowed to appear. + /// + /// This is the "goto definition" mode, where we need to ignore the typing spec and find actual + /// implementations. When querying searchpaths this also notably replaces typeshed with + /// the "real" stdlib. StubsNotAllowed, } +#[salsa::interned] +#[derive(Debug)] +pub(crate) struct ModuleResolveModeIngredient<'db> { + mode: ModuleResolveMode, +} + impl ModuleResolveMode { fn stubs_allowed(self) -> bool { matches!(self, Self::StubsAllowed) @@ -124,7 +139,7 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option> { let path = SystemOrVendoredPathRef::try_from_file(db, file)?; - let module_name = search_paths(db).find_map(|candidate| { + let module_name = search_paths(db, ModuleResolveMode::StubsAllowed).find_map(|candidate| { let relative_path = match path { SystemOrVendoredPathRef::System(path) => candidate.relativize_system_path(path), SystemOrVendoredPathRef::Vendored(path) => candidate.relativize_vendored_path(path), @@ -153,8 +168,8 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option> { } } -pub(crate) fn search_paths(db: &dyn Db) -> SearchPathIterator<'_> { - Program::get(db).search_paths(db).iter(db) +pub(crate) fn search_paths(db: &dyn Db, resolve_mode: ModuleResolveMode) -> SearchPathIterator<'_> { + Program::get(db).search_paths(db).iter(db, resolve_mode) } #[derive(Clone, Debug, PartialEq, Eq)] @@ -164,7 +179,16 @@ pub struct SearchPaths { /// config settings themselves change. static_paths: Vec, - /// site-packages paths are not included in the above field: + /// Path to typeshed, which should come immediately after static paths. + /// + /// This can currently only be None if the `SystemPath` this points to is already in `static_paths`. + stdlib_path: Option, + + /// Path to the real stdlib, this replaces typeshed (`stdlib_path`) for goto-definition searches + /// ([`ModuleResolveMode::StubsNotAllowed`]). + real_stdlib_path: Option, + + /// site-packages paths are not included in the above fields: /// if there are multiple site-packages paths, editable installations can appear /// *between* the site-packages paths on `sys.path` at runtime. /// That means we can't know where a second or third `site-packages` path should sit @@ -198,6 +222,7 @@ impl SearchPaths { src_roots, custom_typeshed: typeshed, site_packages_paths, + real_stdlib_path, } = settings; let mut static_paths = vec![]; @@ -240,7 +265,11 @@ impl SearchPaths { ) }; - static_paths.push(stdlib_path); + let real_stdlib_path = if let Some(path) = real_stdlib_path { + Some(SearchPath::real_stdlib(system, path.clone())?) + } else { + None + }; let mut site_packages: Vec<_> = Vec::with_capacity(site_packages_paths.len()); @@ -273,8 +302,37 @@ impl SearchPaths { } }); + // Users probably shouldn't do this but... if they've shadowed their stdlib we should deduplicate it away. + // This notably will mess up anything that checks if a search path "is the standard library" as we won't + // "remember" that fact for static paths. + // + // (We used to shove these into static_paths, so the above retain implicitly did this. I am opting to + // preserve this behaviour to avoid getting into the weeds of corner cases.) + let stdlib_path_is_shadowed = stdlib_path + .as_system_path() + .map(|path| seen_paths.contains(path)) + .unwrap_or(false); + let real_stdlib_path_is_shadowed = real_stdlib_path + .as_ref() + .and_then(SearchPath::as_system_path) + .map(|path| seen_paths.contains(path)) + .unwrap_or(false); + + let stdlib_path = if stdlib_path_is_shadowed { + None + } else { + Some(stdlib_path) + }; + let real_stdlib_path = if real_stdlib_path_is_shadowed { + None + } else { + real_stdlib_path + }; + Ok(SearchPaths { static_paths, + stdlib_path, + real_stdlib_path, site_packages, typeshed_versions, }) @@ -291,22 +349,32 @@ impl SearchPaths { } } - pub(super) fn iter<'a>(&'a self, db: &'a dyn Db) -> SearchPathIterator<'a> { + pub(super) fn iter<'a>( + &'a self, + db: &'a dyn Db, + mode: ModuleResolveMode, + ) -> SearchPathIterator<'a> { + let stdlib_path = self.stdlib(mode); SearchPathIterator { db, static_paths: self.static_paths.iter(), + stdlib_path, dynamic_paths: None, + mode: ModuleResolveModeIngredient::new(db, mode), + } + } + + pub(crate) fn stdlib(&self, mode: ModuleResolveMode) -> Option<&SearchPath> { + match mode { + ModuleResolveMode::StubsAllowed => self.stdlib_path.as_ref(), + ModuleResolveMode::StubsNotAllowed => self.real_stdlib_path.as_ref(), } } pub(crate) fn custom_stdlib(&self) -> Option<&SystemPath> { - self.static_paths.iter().find_map(|search_path| { - if search_path.is_standard_library() { - search_path.as_system_path() - } else { - None - } - }) + self.stdlib_path + .as_ref() + .and_then(SearchPath::as_system_path) } pub(crate) fn typeshed_versions(&self) -> &TypeshedVersions { @@ -323,13 +391,18 @@ impl SearchPaths { /// should come between the two `site-packages` directories when it comes to /// module-resolution priority. #[salsa::tracked(returns(deref), heap_size=ruff_memory_usage::heap_size)] -pub(crate) fn dynamic_resolution_paths(db: &dyn Db) -> Vec { +pub(crate) fn dynamic_resolution_paths<'db>( + db: &'db dyn Db, + mode: ModuleResolveModeIngredient<'db>, +) -> Vec { tracing::debug!("Resolving dynamic module resolution paths"); let SearchPaths { static_paths, + stdlib_path, site_packages, typeshed_versions: _, + real_stdlib_path, } = Program::get(db).search_paths(db); let mut dynamic_paths = Vec::new(); @@ -344,6 +417,15 @@ pub(crate) fn dynamic_resolution_paths(db: &dyn Db) -> Vec { .map(Cow::Borrowed) .collect(); + // Use the `ModuleResolveMode` to determine which stdlib (if any) to mark as existing + let stdlib = match mode.mode(db) { + ModuleResolveMode::StubsAllowed => stdlib_path, + ModuleResolveMode::StubsNotAllowed => real_stdlib_path, + }; + if let Some(path) = stdlib.as_ref().and_then(SearchPath::as_system_path) { + existing_paths.insert(Cow::Borrowed(path)); + } + let files = db.files(); let system = db.system(); @@ -429,7 +511,9 @@ pub(crate) fn dynamic_resolution_paths(db: &dyn Db) -> Vec { pub(crate) struct SearchPathIterator<'db> { db: &'db dyn Db, static_paths: std::slice::Iter<'db, SearchPath>, + stdlib_path: Option<&'db SearchPath>, dynamic_paths: Option>, + mode: ModuleResolveModeIngredient<'db>, } impl<'db> Iterator for SearchPathIterator<'db> { @@ -439,14 +523,19 @@ impl<'db> Iterator for SearchPathIterator<'db> { let SearchPathIterator { db, static_paths, + stdlib_path, + mode, dynamic_paths, } = self; - static_paths.next().or_else(|| { - dynamic_paths - .get_or_insert_with(|| dynamic_resolution_paths(*db).iter()) - .next() - }) + static_paths + .next() + .or_else(|| stdlib_path.take()) + .or_else(|| { + dynamic_paths + .get_or_insert_with(|| dynamic_resolution_paths(*db, *mode).iter()) + .next() + }) } } @@ -583,7 +672,7 @@ fn resolve_name(db: &dyn Db, name: &ModuleName, mode: ModuleResolveMode) -> Opti let stub_name = name.to_stub_package(); let mut is_namespace_package = false; - for search_path in search_paths(db) { + for search_path in search_paths(db, mode) { // When a builtin module is imported, standard module resolution is bypassed: // the module name always resolves to the stdlib module, // even if there's a module of the same name in the first-party root @@ -974,9 +1063,7 @@ mod tests { use ruff_db::Db; use ruff_db::files::{File, FilePath, system_path_to_file}; use ruff_db::system::{DbWithTestSystem as _, DbWithWritableSystem as _}; - use ruff_db::testing::{ - assert_const_function_query_was_not_run, assert_function_query_was_not_run, - }; + use ruff_db::testing::assert_function_query_was_not_run; use ruff_python_ast::PythonVersion; use crate::db::tests::TestDb; @@ -1908,7 +1995,12 @@ not_a_directory &FilePath::system("/y/src/bar.py") ); let events = db.take_salsa_events(); - assert_const_function_query_was_not_run(&db, dynamic_resolution_paths, &events); + assert_function_query_was_not_run( + &db, + dynamic_resolution_paths, + ModuleResolveModeIngredient::new(&db, ModuleResolveMode::StubsAllowed), + &events, + ); } #[test] @@ -1977,7 +2069,8 @@ not_a_directory .with_site_packages_files(&[("_foo.pth", "/src")]) .build(); - let search_paths: Vec<&SearchPath> = search_paths(&db).collect(); + let search_paths: Vec<&SearchPath> = + search_paths(&db, ModuleResolveMode::StubsAllowed).collect(); assert!(search_paths.contains( &&SearchPath::first_party(db.system(), SystemPathBuf::from("/src")).unwrap() diff --git a/crates/ty_python_semantic/src/program.rs b/crates/ty_python_semantic/src/program.rs index e674926a20..760e934d41 100644 --- a/crates/ty_python_semantic/src/program.rs +++ b/crates/ty_python_semantic/src/program.rs @@ -178,6 +178,12 @@ pub struct SearchPathSettings { /// List of site packages paths to use. pub site_packages_paths: Vec, + + /// Option path to the real stdlib on the system, and not some instance of typeshed. + /// + /// We should ideally only ever use this for things like goto-definition, + /// where typeshed isn't the right answer. + pub real_stdlib_path: Option, } impl SearchPathSettings { @@ -194,6 +200,7 @@ impl SearchPathSettings { extra_paths: vec![], custom_typeshed: None, site_packages_paths: vec![], + real_stdlib_path: None, } } diff --git a/crates/ty_python_semantic/src/site_packages.rs b/crates/ty_python_semantic/src/site_packages.rs index d69e8896d0..a278805f31 100644 --- a/crates/ty_python_semantic/src/site_packages.rs +++ b/crates/ty_python_semantic/src/site_packages.rs @@ -26,6 +26,7 @@ use ruff_text_size::{TextLen, TextRange}; use ty_static::EnvVars; type SitePackagesDiscoveryResult = Result; +type StdlibDiscoveryResult = Result; /// An ordered, deduplicated set of `site-packages` search paths. /// @@ -230,6 +231,13 @@ impl PythonEnvironment { Self::System(env) => env.site_packages_directories(system), } } + + pub fn real_stdlib_path(&self, system: &dyn System) -> StdlibDiscoveryResult { + match self { + Self::Virtual(env) => env.real_stdlib_directory(system), + Self::System(env) => env.real_stdlib_directory(system), + } + } } /// The Python runtime that produced the venv. @@ -259,6 +267,16 @@ impl PythonImplementation { Self::Unknown => None, } } + + /// Return the relative path from `sys.prefix` to the directory containing the python stdlib's + /// .pys if this is a known implementation. Return `None` if this is an unknown implementation. + fn relative_stdlib_path(self, version: Option) -> Option { + match self { + Self::CPython | Self::GraalPy => version.map(|version| format!("lib/python{version}")), + Self::PyPy => version.map(|version| format!("lib/pypy{version}")), + Self::Unknown => None, + } + } } /// Abstraction for a Python virtual environment. @@ -466,6 +484,59 @@ System site-packages will not be used for module resolution.", ); Ok(site_packages_directories) } + + /// Return the real stdlib path (containing actual .py files, and not some variation of typeshed). + /// + /// See the documentation for [`real_stdlib_directory_from_sys_prefix`] for more details. + pub(crate) fn real_stdlib_directory( + &self, + system: &dyn System, + ) -> StdlibDiscoveryResult { + let VirtualEnvironment { + base_executable_home_path, + implementation, + version, + // Unlike site-packages, what we're looking for is never inside the virtual environment + // so this is only used for diagnostics. + root_path, + // We don't need to respect this setting + include_system_site_packages: _, + // We don't need to inherit any info from the parent environment + parent_environment: _, + } = self; + + // Unconditionally follow the same logic that `site_packages_directories` uses when + // `include_system_site_packages` is true, as those site-packages should be a subdir + // of the dir we're looking for. + let version = version.as_ref().map(|v| v.version); + if let Some(system_sys_prefix) = + SysPrefixPath::from_executable_home_path_real(system, base_executable_home_path) + { + let real_stdlib_directory = real_stdlib_directory_from_sys_prefix( + &system_sys_prefix, + version, + *implementation, + system, + ); + match &real_stdlib_directory { + Ok(path) => tracing::debug!( + "Resolved real stdlib path for this virtual environment is: {path}" + ), + Err(_) => tracing::debug!( + "Failed to resolve real stdlib path for this virtual environment" + ), + } + real_stdlib_directory + } else { + let cfg_path = root_path.join("pyvenv.cfg"); + tracing::debug!( + "Failed to resolve `sys.prefix` of the system Python installation \ +from the `home` value in the `pyvenv.cfg` file at `{cfg_path}`. \ +System stdlib will not be used for module definitions.", + ); + Err(StdlibDiscoveryError::NoSysPrefixFound(cfg_path)) + } + } } /// A parser for `pyvenv.cfg` files: metadata files for virtual environments. @@ -622,6 +693,28 @@ impl SystemEnvironment { ); Ok(site_packages_directories) } + + /// Return a list of `site-packages` directories that are available from this environment. + /// + /// See the documentation for [`site_packages_directory_from_sys_prefix`] for more details. + pub(crate) fn real_stdlib_directory( + &self, + system: &dyn System, + ) -> StdlibDiscoveryResult { + let SystemEnvironment { root_path } = self; + + let stdlib_directory = real_stdlib_directory_from_sys_prefix( + root_path, + None, + PythonImplementation::Unknown, + system, + )?; + + tracing::debug!( + "Resolved real stdlib directory for this environment is: {stdlib_directory:?}" + ); + Ok(stdlib_directory) + } } /// Enumeration of ways in which `site-packages` discovery can fail. @@ -654,6 +747,22 @@ pub enum SitePackagesDiscoveryError { NoSitePackagesDirFound(SysPrefixPath), } +/// Enumeration of ways in which stdlib discovery can fail. +#[derive(Debug)] +pub enum StdlibDiscoveryError { + /// We looked everywhere we could think of for the standard library's directory, + /// but none could be found despite our best endeavours. + NoStdlibFound(SysPrefixPath), + /// Stdlib discovery failed because we're on a Unix system, + /// we weren't able to figure out from the `pyvenv.cfg` file exactly where the stdlib + /// would be relative to the `sys.prefix` path, and we tried to fallback to iterating + /// through the `/lib` directory looking for a stdlib directory, + /// but we came across some I/O error while trying to do so. + CouldNotReadLibDirectory(SysPrefixPath, io::Error), + /// We failed to resolve the value of `sys.prefix`. + NoSysPrefixFound(SystemPathBuf), +} + impl std::error::Error for SitePackagesDiscoveryError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { @@ -722,6 +831,43 @@ impl std::fmt::Display for SitePackagesDiscoveryError { } } +impl std::error::Error for StdlibDiscoveryError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::CouldNotReadLibDirectory(_, io_err) => Some(io_err), + Self::NoStdlibFound(_) => None, + Self::NoSysPrefixFound(_) => None, + } + } +} + +impl std::fmt::Display for StdlibDiscoveryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NoSysPrefixFound(path) => { + write!( + f, + "Failed to resolve a `sys.prefix` from the `pyvenv.cfg` file at `{path}`" + ) + } + Self::CouldNotReadLibDirectory(SysPrefixPath { inner, origin }, _) => display_error( + f, + origin, + inner, + "Failed to iterate over the contents of the `lib` directory of the Python installation", + None, + ), + Self::NoStdlibFound(SysPrefixPath { inner, origin }) => display_error( + f, + origin, + inner, + &format!("Invalid {origin}"), + Some("Could not find a stdlib directory for this Python installation/executable"), + ), + } + } +} + fn display_error( f: &mut std::fmt::Formatter<'_>, sys_prefix_origin: &SysPrefixPathOrigin, @@ -933,6 +1079,90 @@ fn site_packages_directory_from_sys_prefix( )) } +/// Attempt to retrieve the real stdlib directory +/// associated with a given Python installation. +/// +/// The location of the stdlib directory can vary according to the +/// Python version that this installation represents. The Python version may +/// or may not be known at this point, which is why the `python_version` +/// parameter is an `Option`. +fn real_stdlib_directory_from_sys_prefix( + sys_prefix_path: &SysPrefixPath, + python_version: Option, + implementation: PythonImplementation, + system: &dyn System, +) -> StdlibDiscoveryResult { + tracing::debug!( + "Searching for real stdlib directory in sys.prefix {}", + sys_prefix_path.inner + ); + + if cfg!(target_os = "windows") { + let stdlib = sys_prefix_path.join("Lib"); + return system.is_directory(&stdlib).then_some(stdlib).ok_or( + StdlibDiscoveryError::NoStdlibFound(sys_prefix_path.to_owned()), + ); + } + + // If we were able to figure out what Python version this installation is, + // we should be able to avoid iterating through all items in the `lib/` directory: + if let Some(expected_relative_path) = implementation.relative_stdlib_path(python_version) { + let expected_absolute_path = sys_prefix_path.join(expected_relative_path); + if system.is_directory(&expected_absolute_path) { + return Ok(expected_absolute_path); + } + + // CPython free-threaded (3.13+) variant: pythonXYt + if matches!(implementation, PythonImplementation::CPython) + && python_version.is_some_and(PythonVersion::free_threaded_build_available) + { + let alternative_path = + sys_prefix_path.join(format!("lib/python{}t", python_version.unwrap())); + if system.is_directory(&alternative_path) { + return Ok(alternative_path); + } + } + } + + // Either we couldn't figure out the version before calling this function + // (e.g., from a `pyvenv.cfg` file if this was a venv), + // or we couldn't find a stdlib folder at the expected location given + // the parsed version + // + // Note: the `python3.x` part of the stdlib path can't be computed from + // the `--python-version` the user has passed, as they might be running Python 3.12 locally + // even if they've requested that we type check their code "as if" they're running 3.8. + for entry_result in system + .read_directory(&sys_prefix_path.join("lib")) + .map_err(|io_err| { + StdlibDiscoveryError::CouldNotReadLibDirectory(sys_prefix_path.to_owned(), io_err) + })? + { + let Ok(entry) = entry_result else { + continue; + }; + + if !entry.file_type().is_directory() { + continue; + } + + let path = entry.into_path(); + + let name = path + .file_name() + .expect("File name to be non-null because path is guaranteed to be a child of `lib`"); + + if !(name.starts_with("python3.") || name.starts_with("pypy3.")) { + continue; + } + + return Ok(path); + } + Err(StdlibDiscoveryError::NoStdlibFound( + sys_prefix_path.to_owned(), + )) +} + /// A path that represents the value of [`sys.prefix`] at runtime in Python /// for a given Python executable. /// @@ -1048,6 +1278,77 @@ impl SysPrefixPath { }) } } + /// Like `from_executable_home_path` but attempts to resolve through symlink facades + /// to find a sys prefix that will actually contain the stdlib. + fn from_executable_home_path_real(system: &dyn System, path: &PythonHomePath) -> Option { + let mut home_path = path.0.clone(); + + // Try to find the python executable in the given directory and canonicalize it + // to resolve any symlink. This is (at least) necessary for homebrew pythons + // and the macOS system python. + // + // In python installations like homebrew, the home path points to a directory like + // `/opt/homebrew/opt/python@3.13/bin` and indeed if you look for `../lib/python3.13/` + // you *will* find `site-packages` but you *won't* find the stdlib! (For the macOS + // system install you won't even find `site-packages` here.) + // + // However if you look at `/opt/homebrew/opt/python@3.13/bin/python3.13` (the actual + // python executable in that dir) you will find that it's a symlink to something like + // `../Frameworks/Python.framework/Versions/3.13/bin/python3.13` + // + // From this Framework binary path if you go to `../../lib/python3.13/` you will then + // find the python stdlib as expected (and a different instance of site-packages). + // + // FIXME: it would be nice to include a "we know the python name" fastpath like in + // `real_stdlib_directory_from_sys_prefix`. + if let Ok(dir) = system.read_directory(&home_path) { + for entry_result in dir { + let Ok(entry) = entry_result else { + continue; + }; + + if entry.file_type().is_directory() { + continue; + } + + let path = entry.into_path(); + + let name = path.file_name().expect( + "File name to be non-null because path is guaranteed to be a child of `lib`", + ); + + if !(name.starts_with("python3.") || name.starts_with("pypy3.")) { + continue; + } + + let Ok(canonical_path) = system.canonicalize_path(&path) else { + continue; + }; + + let Some(parent) = canonical_path.parent() else { + continue; + }; + + home_path = parent.to_path_buf(); + break; + } + } + + // No need to check whether `path.parent()` is a directory: + // the parent of a canonicalised path that is known to exist + // is guaranteed to be a directory. + if cfg!(target_os = "windows") { + Some(Self { + inner: home_path.to_path_buf(), + origin: SysPrefixPathOrigin::DerivedFromPyvenvCfg, + }) + } else { + home_path.parent().map(|home_path| Self { + inner: home_path.to_path_buf(), + origin: SysPrefixPathOrigin::DerivedFromPyvenvCfg, + }) + } + } } impl Deref for SysPrefixPath { @@ -1417,6 +1718,10 @@ mod tests { &expected_venv_site_packages ); } + + let stdlib_directory = venv.real_stdlib_directory(&self.system).unwrap(); + let expected_stdlib_directory = self.expected_system_stdlib(); + assert_eq!(stdlib_directory, expected_stdlib_directory); } #[track_caller] @@ -1444,6 +1749,10 @@ mod tests { site_packages_directories, std::slice::from_ref(&expected_site_packages) ); + + let stdlib_directory = env.real_stdlib_directory(&self.system).unwrap(); + let expected_stdlib_directory = self.expected_system_stdlib(); + assert_eq!(stdlib_directory, expected_stdlib_directory); } fn expected_system_site_packages(&self) -> SystemPathBuf { @@ -1460,6 +1769,21 @@ mod tests { )) } } + + fn expected_system_stdlib(&self) -> SystemPathBuf { + let minor_version = self.minor_version; + if cfg!(target_os = "windows") { + SystemPathBuf::from(&*format!(r"\Python3.{minor_version}\Lib")) + } else if self.free_threaded { + SystemPathBuf::from(&*format!( + "/Python3.{minor_version}/lib/python3.{minor_version}t" + )) + } else { + SystemPathBuf::from(&*format!( + "/Python3.{minor_version}/lib/python3.{minor_version}" + )) + } + } } #[test] diff --git a/crates/ty_test/src/lib.rs b/crates/ty_test/src/lib.rs index fdde483c1a..10dc4a7ce1 100644 --- a/crates/ty_test/src/lib.rs +++ b/crates/ty_test/src/lib.rs @@ -283,6 +283,7 @@ fn run_test( extra_paths: configuration.extra_paths().unwrap_or_default().to_vec(), custom_typeshed: custom_typeshed_path.map(SystemPath::to_path_buf), site_packages_paths, + real_stdlib_path: None, } .to_search_paths(db.system(), db.vendored()) .expect("Failed to resolve search path settings"),