From 64edfb6ef6f26bb3618c5c888f5abf3a866a172c Mon Sep 17 00:00:00 2001 From: Aria Desires Date: Thu, 16 Oct 2025 23:16:37 -0400 Subject: [PATCH] [ty] add legacy namespace package support (#20897) Detect legacy namespace packages and treat them like namespace packages when looking them up as the *parent* of the module we're interested in. In all other cases treat them like a regular package. (This PR is coauthored by @MichaReiser in a shared coding session) Fixes https://github.com/astral-sh/ty/issues/838 --------- Co-authored-by: Micha Reiser --- .../mdtest/import/legacy_namespace.md | 221 ++++++++++++++++++ .../src/module_resolver/resolver.rs | 186 ++++++++++++++- 2 files changed, 405 insertions(+), 2 deletions(-) create mode 100644 crates/ty_python_semantic/resources/mdtest/import/legacy_namespace.md diff --git a/crates/ty_python_semantic/resources/mdtest/import/legacy_namespace.md b/crates/ty_python_semantic/resources/mdtest/import/legacy_namespace.md new file mode 100644 index 0000000000..1c3b221c90 --- /dev/null +++ b/crates/ty_python_semantic/resources/mdtest/import/legacy_namespace.md @@ -0,0 +1,221 @@ +# Legacy namespace packages + +## `__import__("pkgutil").extend_path` + +```toml +[environment] +extra-paths = ["/airflow-core/src", "/providers/amazon/src/"] +``` + +`/airflow-core/src/airflow/__init__.py`: + +```py +__path__ = __import__("pkgutil").extend_path(__path__, __name__) +__version__ = "3.2.0" +``` + +`/providers/amazon/src/airflow/__init__.py`: + +```py +__path__ = __import__("pkgutil").extend_path(__path__, __name__) +``` + +`/providers/amazon/src/airflow/providers/__init__.py`: + +```py +__path__ = __import__("pkgutil").extend_path(__path__, __name__) +``` + +`/providers/amazon/src/airflow/providers/amazon/__init__.py`: + +```py +__version__ = "9.15.0" +``` + +`test.py`: + +```py +from airflow import __version__ as airflow_version +from airflow.providers.amazon import __version__ as amazon_provider_version + +reveal_type(airflow_version) # revealed: Literal["3.2.0"] +reveal_type(amazon_provider_version) # revealed: Literal["9.15.0"] +``` + +## `pkgutil.extend_path` + +```toml +[environment] +extra-paths = ["/airflow-core/src", "/providers/amazon/src/"] +``` + +`/airflow-core/src/airflow/__init__.py`: + +```py +import pkgutil + +__path__ = pkgutil.extend_path(__path__, __name__) +__version__ = "3.2.0" +``` + +`/providers/amazon/src/airflow/__init__.py`: + +```py +import pkgutil + +__path__ = pkgutil.extend_path(__path__, __name__) +``` + +`/providers/amazon/src/airflow/providers/__init__.py`: + +```py +import pkgutil + +__path__ = pkgutil.extend_path(__path__, __name__) +``` + +`/providers/amazon/src/airflow/providers/amazon/__init__.py`: + +```py +__version__ = "9.15.0" +``` + +`test.py`: + +```py +from airflow import __version__ as airflow_version +from airflow.providers.amazon import __version__ as amazon_provider_version + +reveal_type(airflow_version) # revealed: Literal["3.2.0"] +reveal_type(amazon_provider_version) # revealed: Literal["9.15.0"] +``` + +## `extend_path` with keyword arguments + +```toml +[environment] +extra-paths = ["/airflow-core/src", "/providers/amazon/src/"] +``` + +`/airflow-core/src/airflow/__init__.py`: + +```py +import pkgutil + +__path__ = pkgutil.extend_path(name=__name__, path=__path__) +__version__ = "3.2.0" +``` + +`/providers/amazon/src/airflow/__init__.py`: + +```py +import pkgutil + +__path__ = pkgutil.extend_path(name=__name__, path=__path__) +``` + +`/providers/amazon/src/airflow/providers/__init__.py`: + +```py +import pkgutil + +__path__ = pkgutil.extend_path(name=__name__, path=__path__) +``` + +`/providers/amazon/src/airflow/providers/amazon/__init__.py`: + +```py +__version__ = "9.15.0" +``` + +`test.py`: + +```py +from airflow import __version__ as airflow_version +from airflow.providers.amazon import __version__ as amazon_provider_version + +reveal_type(airflow_version) # revealed: Literal["3.2.0"] +reveal_type(amazon_provider_version) # revealed: Literal["9.15.0"] +``` + +## incorrect `__import__` arguments + +```toml +[environment] +extra-paths = ["/airflow-core/src", "/providers/amazon/src/"] +``` + +`/airflow-core/src/airflow/__init__.py`: + +```py +__path__ = __import__("not_pkgutil").extend_path(__path__, __name__) +__version__ = "3.2.0" +``` + +`/providers/amazon/src/airflow/__init__.py`: + +```py +__path__ = __import__("not_pkgutil").extend_path(__path__, __name__) +``` + +`/providers/amazon/src/airflow/providers/__init__.py`: + +```py +__path__ = __import__("not_pkgutil").extend_path(__path__, __name__) +``` + +`/providers/amazon/src/airflow/providers/amazon/__init__.py`: + +```py +__version__ = "9.15.0" +``` + +`test.py`: + +```py +from airflow.providers.amazon import __version__ as amazon_provider_version # error: [unresolved-import] +from airflow import __version__ as airflow_version + +reveal_type(airflow_version) # revealed: Literal["3.2.0"] +``` + +## incorrect `extend_path` arguments + +```toml +[environment] +extra-paths = ["/airflow-core/src", "/providers/amazon/src/"] +``` + +`/airflow-core/src/airflow/__init__.py`: + +```py +__path__ = __import__("pkgutil").extend_path(__path__, "other_module") +__version__ = "3.2.0" +``` + +`/providers/amazon/src/airflow/__init__.py`: + +```py +__path__ = __import__("pkgutil").extend_path(__path__, "other_module") +``` + +`/providers/amazon/src/airflow/providers/__init__.py`: + +```py +__path__ = __import__("pkgutil").extend_path(__path__, "other_module") +``` + +`/providers/amazon/src/airflow/providers/amazon/__init__.py`: + +```py +__version__ = "9.15.0" +``` + +`test.py`: + +```py +from airflow.providers.amazon import __version__ as amazon_provider_version # error: [unresolved-import] +from airflow import __version__ as airflow_version + +reveal_type(airflow_version) # revealed: Literal["3.2.0"] +``` diff --git a/crates/ty_python_semantic/src/module_resolver/resolver.rs b/crates/ty_python_semantic/src/module_resolver/resolver.rs index 2f827f256f..0787859049 100644 --- a/crates/ty_python_semantic/src/module_resolver/resolver.rs +++ b/crates/ty_python_semantic/src/module_resolver/resolver.rs @@ -19,7 +19,10 @@ use rustc_hash::{FxBuildHasher, FxHashSet}; use ruff_db::files::{File, FilePath, FileRootKind}; use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf}; use ruff_db::vendored::VendoredFileSystem; -use ruff_python_ast::{PySourceType, PythonVersion}; +use ruff_python_ast::{ + self as ast, PySourceType, PythonVersion, + visitor::{Visitor, walk_body}, +}; use crate::db::Db; use crate::module_name::ModuleName; @@ -1002,7 +1005,12 @@ where let is_regular_package = package_path.is_regular_package(resolver_state); if is_regular_package { - in_namespace_package = false; + // This is the only place where we need to consider the existence of legacy namespace + // packages, as we are explicitly searching for the *parent* package of the module + // we actually want. Here, such a package should be treated as a PEP-420 ("modern") + // namespace package. In all other contexts it acts like a normal package and needs + // no special handling. + in_namespace_package = is_legacy_namespace_package(&package_path, resolver_state); } else if package_path.is_directory(resolver_state) // Pure modules hide namespace packages with the same name && resolve_file_module(&package_path, resolver_state).is_none() @@ -1039,6 +1047,62 @@ where }) } +/// Determines whether a package is a legacy namespace package. +/// +/// Before PEP 420 introduced implicit namespace packages, the ecosystem developed +/// its own form of namespace packages. These legacy namespace packages continue to persist +/// in modern codebases because they work with ancient Pythons and if it ain't broke, don't fix it. +/// +/// A legacy namespace package is distinguished by having an `__init__.py` that contains an +/// expression to the effect of: +/// +/// ```python +/// __path__ = __import__("pkgutil").extend_path(__path__, __name__) +/// ``` +/// +/// The resulting package simultaneously has properties of both regular packages and namespace ones: +/// +/// * Like regular packages, `__init__.py` is defined and can contain items other than submodules +/// * Like implicit namespace packages, multiple copies of the package may exist with different +/// submodules, and they will be merged into one namespace at runtime by the interpreter +/// +/// Now, you may rightly wonder: "What if the `__init__.py` files have different contents?" +/// The apparent official answer is: "Don't do that!" +/// And the reality is: "Of course people do that!" +/// +/// In practice we think it's fine to, just like with regular packages, use the first one +/// we find on the search paths. To the extent that the different copies "need" to have the same +/// contents, they all "need" to have the legacy namespace idiom (we do nothing to enforce that, +/// we will just get confused if you mess it up). +fn is_legacy_namespace_package( + package_path: &ModulePath, + resolver_state: &ResolverContext, +) -> bool { + // Just an optimization, the stdlib and typeshed are never legacy namespace packages + if package_path.search_path().is_standard_library() { + return false; + } + + let mut package_path = package_path.clone(); + package_path.push("__init__"); + let Some(init) = resolve_file_module(&package_path, resolver_state) else { + return false; + }; + + // This is all syntax-only analysis so it *could* be fooled but it's really unlikely. + // + // The benefit of being syntax-only is speed and avoiding circular dependencies + // between module resolution and semantic analysis. + // + // The downside is if you write slightly different syntax we will fail to detect the idiom, + // but hey, this is better than nothing! + let parsed = ruff_db::parsed::parsed_module(resolver_state.db, init); + let mut visitor = LegacyNamespacePackageVisitor::default(); + visitor.visit_body(parsed.load(resolver_state.db).suite()); + + visitor.is_legacy_namespace_package +} + #[derive(Debug)] struct ResolvedPackage { path: ModulePath, @@ -1148,6 +1212,124 @@ impl fmt::Display for RelaxedModuleName { } } +/// Detects if a module contains a statement of the form: +/// ```python +/// __path__ = pkgutil.extend_path(__path__, __name__) +/// ``` +/// or +/// ```python +/// __path__ = __import__("pkgutil").extend_path(__path__, __name__) +/// ``` +#[derive(Default)] +struct LegacyNamespacePackageVisitor { + is_legacy_namespace_package: bool, + in_body: bool, +} + +impl Visitor<'_> for LegacyNamespacePackageVisitor { + fn visit_body(&mut self, body: &[ruff_python_ast::Stmt]) { + if self.is_legacy_namespace_package { + return; + } + + // Don't traverse into nested bodies. + if self.in_body { + return; + } + + self.in_body = true; + + walk_body(self, body); + } + + fn visit_stmt(&mut self, stmt: &ast::Stmt) { + if self.is_legacy_namespace_package { + return; + } + + let ast::Stmt::Assign(ast::StmtAssign { value, targets, .. }) = stmt else { + return; + }; + + let [ast::Expr::Name(maybe_path)] = &**targets else { + return; + }; + + if &*maybe_path.id != "__path__" { + return; + } + + let ast::Expr::Call(ast::ExprCall { + func: extend_func, + arguments: extend_arguments, + .. + }) = &**value + else { + return; + }; + + let ast::Expr::Attribute(ast::ExprAttribute { + value: maybe_pkg_util, + attr: maybe_extend_path, + .. + }) = &**extend_func + else { + return; + }; + + // Match if the left side of the attribute access is either `__import__("pkgutil")` or `pkgutil` + match &**maybe_pkg_util { + // __import__("pkgutil").extend_path(__path__, __name__) + ast::Expr::Call(ruff_python_ast::ExprCall { + func: maybe_import, + arguments: import_arguments, + .. + }) => { + let ast::Expr::Name(maybe_import) = &**maybe_import else { + return; + }; + + if maybe_import.id() != "__import__" { + return; + } + + let Some(ast::Expr::StringLiteral(name)) = + import_arguments.find_argument_value("name", 0) + else { + return; + }; + + if name.value.to_str() != "pkgutil" { + return; + } + } + // "pkgutil.extend_path(__path__, __name__)" + ast::Expr::Name(name) => { + if name.id() != "pkgutil" { + return; + } + } + _ => { + return; + } + } + + // Test that this is an `extend_path(__path__, __name__)` call + if maybe_extend_path != "extend_path" { + return; + } + + let Some(ast::Expr::Name(path)) = extend_arguments.find_argument_value("path", 0) else { + return; + }; + let Some(ast::Expr::Name(name)) = extend_arguments.find_argument_value("name", 1) else { + return; + }; + + self.is_legacy_namespace_package = path.id() == "__path__" && name.id() == "__name__"; + } +} + #[cfg(test)] mod tests { #![expect(