diff --git a/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md b/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md new file mode 100644 index 0000000000..ffe73bfcef --- /dev/null +++ b/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md @@ -0,0 +1,141 @@ +# Implicit globals from `types.ModuleType` + +## Implicit `ModuleType` globals + +All modules are instances of `types.ModuleType`. +If a name can't be found in any local or global scope, we look it up +as an attribute on `types.ModuleType` in typeshed +before deciding that the name is unbound. + +```py +reveal_type(__name__) # revealed: str +reveal_type(__file__) # revealed: str | None +reveal_type(__loader__) # revealed: LoaderProtocol | None +reveal_type(__package__) # revealed: str | None +reveal_type(__spec__) # revealed: ModuleSpec | None + +# TODO: generics +reveal_type(__path__) # revealed: @Todo + +# TODO: this should probably be added to typeshed; not sure why it isn't? +# error: [unresolved-reference] +# revealed: Unbound +reveal_type(__doc__) + +class X: + reveal_type(__name__) # revealed: str + +def foo(): + reveal_type(__name__) # revealed: str +``` + +However, three attributes on `types.ModuleType` are not present as implicit +module globals; these are excluded: + +```py path=unbound_dunders.py +# error: [unresolved-reference] +# revealed: Unbound +reveal_type(__getattr__) + +# error: [unresolved-reference] +# revealed: Unbound +reveal_type(__dict__) + +# error: [unresolved-reference] +# revealed: Unbound +reveal_type(__init__) +``` + +## Accessed as attributes + +`ModuleType` attributes can also be accessed as attributes on module-literal types. +The special attributes `__dict__` and `__init__`, and all attributes on +`builtins.object`, can also be accessed as attributes on module-literal types, +despite the fact that these are inaccessible as globals from inside the module: + +```py +import typing + +reveal_type(typing.__name__) # revealed: str +reveal_type(typing.__init__) # revealed: Literal[__init__] + +# These come from `builtins.object`, not `types.ModuleType`: +# TODO: we don't currently understand `types.ModuleType` as inheriting from `object`; +# these should not reveal `Unbound`: +reveal_type(typing.__eq__) # revealed: Unbound +reveal_type(typing.__class__) # revealed: Unbound +reveal_type(typing.__module__) # revealed: Unbound + +# TODO: needs support for attribute access on instances, properties and generics; +# should be `dict[str, Any]` +reveal_type(typing.__dict__) # revealed: @Todo +``` + +Typeshed includes a fake `__getattr__` method in the stub for `types.ModuleType` +to help out with dynamic imports; but we ignore that for module-literal types +where we know exactly which module we're dealing with: + +```py path=__getattr__.py +import typing + +reveal_type(typing.__getattr__) # revealed: Unbound +``` + +## `types.ModuleType.__dict__` takes precedence over global variable `__dict__` + +It's impossible to override the `__dict__` attribute of `types.ModuleType` +instances from inside the module; we should prioritise the attribute in +the `types.ModuleType` stub over a variable named `__dict__` in the module's +global namespace: + +```py path=foo.py +__dict__ = "foo" + +reveal_type(__dict__) # revealed: Literal["foo"] +``` + +```py path=bar.py +import foo +from foo import __dict__ as foo_dict + +# TODO: needs support for attribute access on instances, properties, and generics; +# should be `dict[str, Any]` for both of these: +reveal_type(foo.__dict__) # revealed: @Todo +reveal_type(foo_dict) # revealed: @Todo +``` + +## Conditionally global or `ModuleType` attribute + +Attributes overridden in the module namespace take priority. +If a builtin name is conditionally defined as a global, however, +a name lookup should union the `ModuleType` type with the conditionally defined type: + +```py +__file__ = 42 + +def returns_bool() -> bool: + return True + +if returns_bool(): + __name__ = 1 + +reveal_type(__file__) # revealed: Literal[42] +reveal_type(__name__) # revealed: str | Literal[1] +``` + +## Conditionally global or `ModuleType` attribute, with annotation + +The same is true if the name is annotated: + +```py +__file__: int = 42 + +def returns_bool() -> bool: + return True + +if returns_bool(): + __name__: int = 1 + +reveal_type(__file__) # revealed: Literal[42] +reveal_type(__name__) # revealed: str | Literal[1] +``` diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 284d582075..4ee6faac8a 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -195,6 +195,10 @@ impl<'db> SemanticIndexBuilder<'db> { self.current_symbol_table().mark_symbol_bound(id); } + fn mark_symbol_declared(&mut self, id: ScopedSymbolId) { + self.current_symbol_table().mark_symbol_declared(id); + } + fn mark_symbol_used(&mut self, id: ScopedSymbolId) { self.current_symbol_table().mark_symbol_used(id); } @@ -226,6 +230,9 @@ impl<'db> SemanticIndexBuilder<'db> { if category.is_binding() { self.mark_symbol_bound(symbol); } + if category.is_declaration() { + self.mark_symbol_declared(symbol); + } let use_def = self.current_use_def_map_mut(); match category { @@ -359,6 +366,7 @@ impl<'db> SemanticIndexBuilder<'db> { // note that the "bound" on the typevar is a totally different thing than whether // or not a name is "bound" by a typevar declaration; the latter is always true. self.mark_symbol_bound(symbol); + self.mark_symbol_declared(symbol); if let Some(bounds) = bound { self.visit_expr(bounds); } diff --git a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs index eeab9bf169..44bf466d9d 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs @@ -47,17 +47,27 @@ impl Symbol { pub fn is_bound(&self) -> bool { self.flags.contains(SymbolFlags::IS_BOUND) } + + /// Is the symbol declared in its containing scope? + pub fn is_declared(&self) -> bool { + self.flags.contains(SymbolFlags::IS_DECLARED) + } } bitflags! { + /// Flags that can be queried to obtain information about a symbol in a given scope. + /// + /// See the doc-comment at the top of [`super::use_def`] for explanations of what it + /// means for a symbol to be *bound* as opposed to *declared*. #[derive(Copy, Clone, Debug, Eq, PartialEq)] struct SymbolFlags: u8 { const IS_USED = 1 << 0; - const IS_BOUND = 1 << 1; + const IS_BOUND = 1 << 1; + const IS_DECLARED = 1 << 2; /// TODO: This flag is not yet set by anything - const MARKED_GLOBAL = 1 << 2; + const MARKED_GLOBAL = 1 << 3; /// TODO: This flag is not yet set by anything - const MARKED_NONLOCAL = 1 << 3; + const MARKED_NONLOCAL = 1 << 4; } } @@ -298,6 +308,10 @@ impl SymbolTableBuilder { self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND); } + pub(super) fn mark_symbol_declared(&mut self, id: ScopedSymbolId) { + self.table.symbols[id].insert_flags(SymbolFlags::IS_DECLARED); + } + pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) { self.table.symbols[id].insert_flags(SymbolFlags::IS_USED); } diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index 11f679ef5a..9a382697ea 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -5,7 +5,7 @@ use ruff_python_ast as ast; use crate::module_resolver::file_to_module; use crate::semantic_index::ast_ids::HasScopedAstId; use crate::semantic_index::definition::{Definition, DefinitionKind}; -use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId}; +use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId, Symbol}; use crate::semantic_index::{ global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints, BindingWithConstraintsIterator, DeclarationsIterator, @@ -88,9 +88,64 @@ fn symbol_ty<'db>(db: &'db dyn Db, scope: ScopeId<'db>, name: &str) -> Type<'db> .unwrap_or(Type::Unbound) } -/// Shorthand for `symbol_ty` that looks up a module-global symbol by name in a file. +/// Return a list of the symbols that typeshed declares in the body scope of +/// the stub for the class `types.ModuleType`. +/// +/// Conceptually this could be a `Set` rather than a list, +/// but the number of symbols declared in this scope is likely to be very small, +/// so the cost of hashing the names is likely to be more expensive than it's worth. +#[salsa::tracked(return_ref)] +fn module_type_symbols<'db>(db: &'db dyn Db) -> smallvec::SmallVec<[ast::name::Name; 8]> { + let Some(module_type) = KnownClass::ModuleType + .to_class(db) + .into_class_literal_type() + else { + // The most likely way we get here is if a user specified a `--custom-typeshed-dir` + // without a `types.pyi` stub in the `stdlib/` directory + return smallvec::SmallVec::default(); + }; + + let module_type_scope = module_type.body_scope(db); + let module_type_symbol_table = symbol_table(db, module_type_scope); + + // `__dict__` and `__init__` are very special members that can be accessed as attributes + // on the module when imported, but cannot be accessed as globals *inside* the module. + // + // `__getattr__` is even more special: it doesn't exist at runtime, but typeshed includes it + // to reduce false positives associated with functions that dynamically import modules + // and return `Instance(types.ModuleType)`. We should ignore it for any known module-literal type. + module_type_symbol_table + .symbols() + .filter(|symbol| symbol.is_declared()) + .map(Symbol::name) + .filter(|symbol_name| !matches!(&***symbol_name, "__dict__" | "__getattr__" | "__init__")) + .cloned() + .collect() +} + +/// Looks up a module-global symbol by name in a file. pub(crate) fn global_symbol_ty<'db>(db: &'db dyn Db, file: File, name: &str) -> Type<'db> { - symbol_ty(db, global_scope(db, file), name) + let explicit_ty = symbol_ty(db, global_scope(db, file), name); + if !explicit_ty.may_be_unbound(db) { + return explicit_ty; + } + + // Not defined explicitly in the global scope? + // All modules are instances of `types.ModuleType`; + // look it up there (with a few very special exceptions) + if module_type_symbols(db) + .iter() + .any(|module_type_member| &**module_type_member == name) + { + // TODO: this should use `.to_instance(db)`. but we don't understand attribute access + // on instance types yet. + let module_type_member = KnownClass::ModuleType.to_class(db).member(db, name); + if !module_type_member.is_unbound() { + return explicit_ty.replace_unbound_with(db, module_type_member); + } + } + + explicit_ty } /// Infer the type of a binding. @@ -821,7 +876,46 @@ impl<'db> Type<'db> { // TODO: attribute lookup on function type Type::Todo } - Type::ModuleLiteral(file) => global_symbol_ty(db, *file, name), + Type::ModuleLiteral(file) => { + // `__dict__` is a very special member that is never overridden by module globals; + // we should always look it up directly as an attribute on `types.ModuleType`, + // never in the global scope of the module. + if name == "__dict__" { + return KnownClass::ModuleType + .to_instance(db) + .member(db, "__dict__"); + } + + let global_lookup = symbol_ty(db, global_scope(db, *file), name); + + // If it's unbound, check if it's present as an instance on `types.ModuleType` + // or `builtins.object`. + // + // We do a more limited version of this in `global_symbol_ty`, + // but there are two crucial differences here: + // - If a member is looked up as an attribute, `__init__` is also available + // on the module, but it isn't available as a global from inside the module + // - If a member is looked up as an attribute, members on `builtins.object` + // are also available (because `types.ModuleType` inherits from `object`); + // these attributes are also not available as globals from inside the module. + // + // The same way as in `global_symbol_ty`, however, we need to be careful to + // ignore `__getattr__`. Typeshed has a fake `__getattr__` on `types.ModuleType` + // to help out with dynamic imports; we shouldn't use it for `ModuleLiteral` types + // where we know exactly which module we're dealing with. + if name != "__getattr__" && global_lookup.may_be_unbound(db) { + // TODO: this should use `.to_instance()`, but we don't understand instance attribute yet + let module_type_instance_member = + KnownClass::ModuleType.to_class(db).member(db, name); + if module_type_instance_member.is_unbound() { + global_lookup + } else { + global_lookup.replace_unbound_with(db, module_type_instance_member) + } + } else { + global_lookup + } + } Type::ClassLiteral(class) => class.class_member(db, name), Type::Instance(_) => { // TODO MRO? get_own_instance_member, get_instance_member @@ -1868,15 +1962,13 @@ impl<'db> TupleType<'db> { #[cfg(test)] mod tests { - use super::{ - builtins_symbol_ty, BytesLiteralType, IntersectionBuilder, StringLiteralType, Truthiness, - TupleType, Type, UnionType, - }; + use super::*; use crate::db::tests::TestDb; use crate::program::{Program, SearchPathSettings}; use crate::python_version::PythonVersion; use crate::ProgramSettings; use ruff_db::system::{DbWithTestSystem, SystemPathBuf}; + use ruff_python_ast as ast; use test_case::test_case; fn setup_db() -> TestDb { @@ -2256,4 +2348,16 @@ mod tests { assert_eq!(ty.into_type(&db).repr(&db), expected.into_type(&db)); } + + #[test] + fn module_type_symbols_includes_declared_types_but_not_referenced_types() { + let db = setup_db(); + let symbol_names = module_type_symbols(&db); + + let dunder_name_symbol_name = ast::name::Name::new_static("__name__"); + assert!(symbol_names.contains(&dunder_name_symbol_name)); + + let property_symbol_name = ast::name::Name::new_static("property"); + assert!(!symbol_names.contains(&property_symbol_name)); + } } diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index 795c333d7f..6e52ccc409 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -23,8 +23,6 @@ struct Case { const TOMLLIB_312_URL: &str = "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib"; static EXPECTED_DIAGNOSTICS: &[&str] = &[ - // We don't support `ModuleType`-attributes as globals yet: - "/src/tomllib/__init__.py:10:30: Name `__name__` used when not defined", // We don't support `*` imports yet: "/src/tomllib/_parser.py:7:29: Module `collections.abc` has no member `Iterable`", // We don't support terminal statements in control flow yet: