[red-knot] Fallback to attributes on types.ModuleType if a symbol can't be found in locals or globals (#13904)

This commit is contained in:
Alex Waygood 2024-10-29 10:59:03 +00:00 committed by GitHub
parent 7dd0c7f4bd
commit d2c9f5e43c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 278 additions and 13 deletions

View file

@ -0,0 +1,141 @@
# Implicit globals from `types.ModuleType`
## Implicit `ModuleType` globals
All modules are instances of `types.ModuleType`.
If a name can't be found in any local or global scope, we look it up
as an attribute on `types.ModuleType` in typeshed
before deciding that the name is unbound.
```py
reveal_type(__name__) # revealed: str
reveal_type(__file__) # revealed: str | None
reveal_type(__loader__) # revealed: LoaderProtocol | None
reveal_type(__package__) # revealed: str | None
reveal_type(__spec__) # revealed: ModuleSpec | None
# TODO: generics
reveal_type(__path__) # revealed: @Todo
# TODO: this should probably be added to typeshed; not sure why it isn't?
# error: [unresolved-reference]
# revealed: Unbound
reveal_type(__doc__)
class X:
reveal_type(__name__) # revealed: str
def foo():
reveal_type(__name__) # revealed: str
```
However, three attributes on `types.ModuleType` are not present as implicit
module globals; these are excluded:
```py path=unbound_dunders.py
# error: [unresolved-reference]
# revealed: Unbound
reveal_type(__getattr__)
# error: [unresolved-reference]
# revealed: Unbound
reveal_type(__dict__)
# error: [unresolved-reference]
# revealed: Unbound
reveal_type(__init__)
```
## Accessed as attributes
`ModuleType` attributes can also be accessed as attributes on module-literal types.
The special attributes `__dict__` and `__init__`, and all attributes on
`builtins.object`, can also be accessed as attributes on module-literal types,
despite the fact that these are inaccessible as globals from inside the module:
```py
import typing
reveal_type(typing.__name__) # revealed: str
reveal_type(typing.__init__) # revealed: Literal[__init__]
# These come from `builtins.object`, not `types.ModuleType`:
# TODO: we don't currently understand `types.ModuleType` as inheriting from `object`;
# these should not reveal `Unbound`:
reveal_type(typing.__eq__) # revealed: Unbound
reveal_type(typing.__class__) # revealed: Unbound
reveal_type(typing.__module__) # revealed: Unbound
# TODO: needs support for attribute access on instances, properties and generics;
# should be `dict[str, Any]`
reveal_type(typing.__dict__) # revealed: @Todo
```
Typeshed includes a fake `__getattr__` method in the stub for `types.ModuleType`
to help out with dynamic imports; but we ignore that for module-literal types
where we know exactly which module we're dealing with:
```py path=__getattr__.py
import typing
reveal_type(typing.__getattr__) # revealed: Unbound
```
## `types.ModuleType.__dict__` takes precedence over global variable `__dict__`
It's impossible to override the `__dict__` attribute of `types.ModuleType`
instances from inside the module; we should prioritise the attribute in
the `types.ModuleType` stub over a variable named `__dict__` in the module's
global namespace:
```py path=foo.py
__dict__ = "foo"
reveal_type(__dict__) # revealed: Literal["foo"]
```
```py path=bar.py
import foo
from foo import __dict__ as foo_dict
# TODO: needs support for attribute access on instances, properties, and generics;
# should be `dict[str, Any]` for both of these:
reveal_type(foo.__dict__) # revealed: @Todo
reveal_type(foo_dict) # revealed: @Todo
```
## Conditionally global or `ModuleType` attribute
Attributes overridden in the module namespace take priority.
If a builtin name is conditionally defined as a global, however,
a name lookup should union the `ModuleType` type with the conditionally defined type:
```py
__file__ = 42
def returns_bool() -> bool:
return True
if returns_bool():
__name__ = 1
reveal_type(__file__) # revealed: Literal[42]
reveal_type(__name__) # revealed: str | Literal[1]
```
## Conditionally global or `ModuleType` attribute, with annotation
The same is true if the name is annotated:
```py
__file__: int = 42
def returns_bool() -> bool:
return True
if returns_bool():
__name__: int = 1
reveal_type(__file__) # revealed: Literal[42]
reveal_type(__name__) # revealed: str | Literal[1]
```

View file

@ -195,6 +195,10 @@ impl<'db> SemanticIndexBuilder<'db> {
self.current_symbol_table().mark_symbol_bound(id);
}
fn mark_symbol_declared(&mut self, id: ScopedSymbolId) {
self.current_symbol_table().mark_symbol_declared(id);
}
fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
self.current_symbol_table().mark_symbol_used(id);
}
@ -226,6 +230,9 @@ impl<'db> SemanticIndexBuilder<'db> {
if category.is_binding() {
self.mark_symbol_bound(symbol);
}
if category.is_declaration() {
self.mark_symbol_declared(symbol);
}
let use_def = self.current_use_def_map_mut();
match category {
@ -359,6 +366,7 @@ impl<'db> SemanticIndexBuilder<'db> {
// note that the "bound" on the typevar is a totally different thing than whether
// or not a name is "bound" by a typevar declaration; the latter is always true.
self.mark_symbol_bound(symbol);
self.mark_symbol_declared(symbol);
if let Some(bounds) = bound {
self.visit_expr(bounds);
}

View file

@ -47,17 +47,27 @@ impl Symbol {
pub fn is_bound(&self) -> bool {
self.flags.contains(SymbolFlags::IS_BOUND)
}
/// Is the symbol declared in its containing scope?
pub fn is_declared(&self) -> bool {
self.flags.contains(SymbolFlags::IS_DECLARED)
}
}
bitflags! {
/// Flags that can be queried to obtain information about a symbol in a given scope.
///
/// See the doc-comment at the top of [`super::use_def`] for explanations of what it
/// means for a symbol to be *bound* as opposed to *declared*.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct SymbolFlags: u8 {
const IS_USED = 1 << 0;
const IS_BOUND = 1 << 1;
const IS_BOUND = 1 << 1;
const IS_DECLARED = 1 << 2;
/// TODO: This flag is not yet set by anything
const MARKED_GLOBAL = 1 << 2;
const MARKED_GLOBAL = 1 << 3;
/// TODO: This flag is not yet set by anything
const MARKED_NONLOCAL = 1 << 3;
const MARKED_NONLOCAL = 1 << 4;
}
}
@ -298,6 +308,10 @@ impl SymbolTableBuilder {
self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND);
}
pub(super) fn mark_symbol_declared(&mut self, id: ScopedSymbolId) {
self.table.symbols[id].insert_flags(SymbolFlags::IS_DECLARED);
}
pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
self.table.symbols[id].insert_flags(SymbolFlags::IS_USED);
}

View file

@ -5,7 +5,7 @@ use ruff_python_ast as ast;
use crate::module_resolver::file_to_module;
use crate::semantic_index::ast_ids::HasScopedAstId;
use crate::semantic_index::definition::{Definition, DefinitionKind};
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId};
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId, Symbol};
use crate::semantic_index::{
global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints,
BindingWithConstraintsIterator, DeclarationsIterator,
@ -88,9 +88,64 @@ fn symbol_ty<'db>(db: &'db dyn Db, scope: ScopeId<'db>, name: &str) -> Type<'db>
.unwrap_or(Type::Unbound)
}
/// Shorthand for `symbol_ty` that looks up a module-global symbol by name in a file.
/// Return a list of the symbols that typeshed declares in the body scope of
/// the stub for the class `types.ModuleType`.
///
/// Conceptually this could be a `Set` rather than a list,
/// but the number of symbols declared in this scope is likely to be very small,
/// so the cost of hashing the names is likely to be more expensive than it's worth.
#[salsa::tracked(return_ref)]
fn module_type_symbols<'db>(db: &'db dyn Db) -> smallvec::SmallVec<[ast::name::Name; 8]> {
let Some(module_type) = KnownClass::ModuleType
.to_class(db)
.into_class_literal_type()
else {
// The most likely way we get here is if a user specified a `--custom-typeshed-dir`
// without a `types.pyi` stub in the `stdlib/` directory
return smallvec::SmallVec::default();
};
let module_type_scope = module_type.body_scope(db);
let module_type_symbol_table = symbol_table(db, module_type_scope);
// `__dict__` and `__init__` are very special members that can be accessed as attributes
// on the module when imported, but cannot be accessed as globals *inside* the module.
//
// `__getattr__` is even more special: it doesn't exist at runtime, but typeshed includes it
// to reduce false positives associated with functions that dynamically import modules
// and return `Instance(types.ModuleType)`. We should ignore it for any known module-literal type.
module_type_symbol_table
.symbols()
.filter(|symbol| symbol.is_declared())
.map(Symbol::name)
.filter(|symbol_name| !matches!(&***symbol_name, "__dict__" | "__getattr__" | "__init__"))
.cloned()
.collect()
}
/// Looks up a module-global symbol by name in a file.
pub(crate) fn global_symbol_ty<'db>(db: &'db dyn Db, file: File, name: &str) -> Type<'db> {
symbol_ty(db, global_scope(db, file), name)
let explicit_ty = symbol_ty(db, global_scope(db, file), name);
if !explicit_ty.may_be_unbound(db) {
return explicit_ty;
}
// Not defined explicitly in the global scope?
// All modules are instances of `types.ModuleType`;
// look it up there (with a few very special exceptions)
if module_type_symbols(db)
.iter()
.any(|module_type_member| &**module_type_member == name)
{
// TODO: this should use `.to_instance(db)`. but we don't understand attribute access
// on instance types yet.
let module_type_member = KnownClass::ModuleType.to_class(db).member(db, name);
if !module_type_member.is_unbound() {
return explicit_ty.replace_unbound_with(db, module_type_member);
}
}
explicit_ty
}
/// Infer the type of a binding.
@ -821,7 +876,46 @@ impl<'db> Type<'db> {
// TODO: attribute lookup on function type
Type::Todo
}
Type::ModuleLiteral(file) => global_symbol_ty(db, *file, name),
Type::ModuleLiteral(file) => {
// `__dict__` is a very special member that is never overridden by module globals;
// we should always look it up directly as an attribute on `types.ModuleType`,
// never in the global scope of the module.
if name == "__dict__" {
return KnownClass::ModuleType
.to_instance(db)
.member(db, "__dict__");
}
let global_lookup = symbol_ty(db, global_scope(db, *file), name);
// If it's unbound, check if it's present as an instance on `types.ModuleType`
// or `builtins.object`.
//
// We do a more limited version of this in `global_symbol_ty`,
// but there are two crucial differences here:
// - If a member is looked up as an attribute, `__init__` is also available
// on the module, but it isn't available as a global from inside the module
// - If a member is looked up as an attribute, members on `builtins.object`
// are also available (because `types.ModuleType` inherits from `object`);
// these attributes are also not available as globals from inside the module.
//
// The same way as in `global_symbol_ty`, however, we need to be careful to
// ignore `__getattr__`. Typeshed has a fake `__getattr__` on `types.ModuleType`
// to help out with dynamic imports; we shouldn't use it for `ModuleLiteral` types
// where we know exactly which module we're dealing with.
if name != "__getattr__" && global_lookup.may_be_unbound(db) {
// TODO: this should use `.to_instance()`, but we don't understand instance attribute yet
let module_type_instance_member =
KnownClass::ModuleType.to_class(db).member(db, name);
if module_type_instance_member.is_unbound() {
global_lookup
} else {
global_lookup.replace_unbound_with(db, module_type_instance_member)
}
} else {
global_lookup
}
}
Type::ClassLiteral(class) => class.class_member(db, name),
Type::Instance(_) => {
// TODO MRO? get_own_instance_member, get_instance_member
@ -1868,15 +1962,13 @@ impl<'db> TupleType<'db> {
#[cfg(test)]
mod tests {
use super::{
builtins_symbol_ty, BytesLiteralType, IntersectionBuilder, StringLiteralType, Truthiness,
TupleType, Type, UnionType,
};
use super::*;
use crate::db::tests::TestDb;
use crate::program::{Program, SearchPathSettings};
use crate::python_version::PythonVersion;
use crate::ProgramSettings;
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
use ruff_python_ast as ast;
use test_case::test_case;
fn setup_db() -> TestDb {
@ -2256,4 +2348,16 @@ mod tests {
assert_eq!(ty.into_type(&db).repr(&db), expected.into_type(&db));
}
#[test]
fn module_type_symbols_includes_declared_types_but_not_referenced_types() {
let db = setup_db();
let symbol_names = module_type_symbols(&db);
let dunder_name_symbol_name = ast::name::Name::new_static("__name__");
assert!(symbol_names.contains(&dunder_name_symbol_name));
let property_symbol_name = ast::name::Name::new_static("property");
assert!(!symbol_names.contains(&property_symbol_name));
}
}