mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-29 07:53:52 +00:00
[red-knot] Fallback to attributes on types.ModuleType if a symbol can't be found in locals or globals (#13904)
This commit is contained in:
parent
7dd0c7f4bd
commit
d2c9f5e43c
5 changed files with 278 additions and 13 deletions
|
@ -0,0 +1,141 @@
|
|||
# Implicit globals from `types.ModuleType`
|
||||
|
||||
## Implicit `ModuleType` globals
|
||||
|
||||
All modules are instances of `types.ModuleType`.
|
||||
If a name can't be found in any local or global scope, we look it up
|
||||
as an attribute on `types.ModuleType` in typeshed
|
||||
before deciding that the name is unbound.
|
||||
|
||||
```py
|
||||
reveal_type(__name__) # revealed: str
|
||||
reveal_type(__file__) # revealed: str | None
|
||||
reveal_type(__loader__) # revealed: LoaderProtocol | None
|
||||
reveal_type(__package__) # revealed: str | None
|
||||
reveal_type(__spec__) # revealed: ModuleSpec | None
|
||||
|
||||
# TODO: generics
|
||||
reveal_type(__path__) # revealed: @Todo
|
||||
|
||||
# TODO: this should probably be added to typeshed; not sure why it isn't?
|
||||
# error: [unresolved-reference]
|
||||
# revealed: Unbound
|
||||
reveal_type(__doc__)
|
||||
|
||||
class X:
|
||||
reveal_type(__name__) # revealed: str
|
||||
|
||||
def foo():
|
||||
reveal_type(__name__) # revealed: str
|
||||
```
|
||||
|
||||
However, three attributes on `types.ModuleType` are not present as implicit
|
||||
module globals; these are excluded:
|
||||
|
||||
```py path=unbound_dunders.py
|
||||
# error: [unresolved-reference]
|
||||
# revealed: Unbound
|
||||
reveal_type(__getattr__)
|
||||
|
||||
# error: [unresolved-reference]
|
||||
# revealed: Unbound
|
||||
reveal_type(__dict__)
|
||||
|
||||
# error: [unresolved-reference]
|
||||
# revealed: Unbound
|
||||
reveal_type(__init__)
|
||||
```
|
||||
|
||||
## Accessed as attributes
|
||||
|
||||
`ModuleType` attributes can also be accessed as attributes on module-literal types.
|
||||
The special attributes `__dict__` and `__init__`, and all attributes on
|
||||
`builtins.object`, can also be accessed as attributes on module-literal types,
|
||||
despite the fact that these are inaccessible as globals from inside the module:
|
||||
|
||||
```py
|
||||
import typing
|
||||
|
||||
reveal_type(typing.__name__) # revealed: str
|
||||
reveal_type(typing.__init__) # revealed: Literal[__init__]
|
||||
|
||||
# These come from `builtins.object`, not `types.ModuleType`:
|
||||
# TODO: we don't currently understand `types.ModuleType` as inheriting from `object`;
|
||||
# these should not reveal `Unbound`:
|
||||
reveal_type(typing.__eq__) # revealed: Unbound
|
||||
reveal_type(typing.__class__) # revealed: Unbound
|
||||
reveal_type(typing.__module__) # revealed: Unbound
|
||||
|
||||
# TODO: needs support for attribute access on instances, properties and generics;
|
||||
# should be `dict[str, Any]`
|
||||
reveal_type(typing.__dict__) # revealed: @Todo
|
||||
```
|
||||
|
||||
Typeshed includes a fake `__getattr__` method in the stub for `types.ModuleType`
|
||||
to help out with dynamic imports; but we ignore that for module-literal types
|
||||
where we know exactly which module we're dealing with:
|
||||
|
||||
```py path=__getattr__.py
|
||||
import typing
|
||||
|
||||
reveal_type(typing.__getattr__) # revealed: Unbound
|
||||
```
|
||||
|
||||
## `types.ModuleType.__dict__` takes precedence over global variable `__dict__`
|
||||
|
||||
It's impossible to override the `__dict__` attribute of `types.ModuleType`
|
||||
instances from inside the module; we should prioritise the attribute in
|
||||
the `types.ModuleType` stub over a variable named `__dict__` in the module's
|
||||
global namespace:
|
||||
|
||||
```py path=foo.py
|
||||
__dict__ = "foo"
|
||||
|
||||
reveal_type(__dict__) # revealed: Literal["foo"]
|
||||
```
|
||||
|
||||
```py path=bar.py
|
||||
import foo
|
||||
from foo import __dict__ as foo_dict
|
||||
|
||||
# TODO: needs support for attribute access on instances, properties, and generics;
|
||||
# should be `dict[str, Any]` for both of these:
|
||||
reveal_type(foo.__dict__) # revealed: @Todo
|
||||
reveal_type(foo_dict) # revealed: @Todo
|
||||
```
|
||||
|
||||
## Conditionally global or `ModuleType` attribute
|
||||
|
||||
Attributes overridden in the module namespace take priority.
|
||||
If a builtin name is conditionally defined as a global, however,
|
||||
a name lookup should union the `ModuleType` type with the conditionally defined type:
|
||||
|
||||
```py
|
||||
__file__ = 42
|
||||
|
||||
def returns_bool() -> bool:
|
||||
return True
|
||||
|
||||
if returns_bool():
|
||||
__name__ = 1
|
||||
|
||||
reveal_type(__file__) # revealed: Literal[42]
|
||||
reveal_type(__name__) # revealed: str | Literal[1]
|
||||
```
|
||||
|
||||
## Conditionally global or `ModuleType` attribute, with annotation
|
||||
|
||||
The same is true if the name is annotated:
|
||||
|
||||
```py
|
||||
__file__: int = 42
|
||||
|
||||
def returns_bool() -> bool:
|
||||
return True
|
||||
|
||||
if returns_bool():
|
||||
__name__: int = 1
|
||||
|
||||
reveal_type(__file__) # revealed: Literal[42]
|
||||
reveal_type(__name__) # revealed: str | Literal[1]
|
||||
```
|
|
@ -195,6 +195,10 @@ impl<'db> SemanticIndexBuilder<'db> {
|
|||
self.current_symbol_table().mark_symbol_bound(id);
|
||||
}
|
||||
|
||||
fn mark_symbol_declared(&mut self, id: ScopedSymbolId) {
|
||||
self.current_symbol_table().mark_symbol_declared(id);
|
||||
}
|
||||
|
||||
fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
|
||||
self.current_symbol_table().mark_symbol_used(id);
|
||||
}
|
||||
|
@ -226,6 +230,9 @@ impl<'db> SemanticIndexBuilder<'db> {
|
|||
if category.is_binding() {
|
||||
self.mark_symbol_bound(symbol);
|
||||
}
|
||||
if category.is_declaration() {
|
||||
self.mark_symbol_declared(symbol);
|
||||
}
|
||||
|
||||
let use_def = self.current_use_def_map_mut();
|
||||
match category {
|
||||
|
@ -359,6 +366,7 @@ impl<'db> SemanticIndexBuilder<'db> {
|
|||
// note that the "bound" on the typevar is a totally different thing than whether
|
||||
// or not a name is "bound" by a typevar declaration; the latter is always true.
|
||||
self.mark_symbol_bound(symbol);
|
||||
self.mark_symbol_declared(symbol);
|
||||
if let Some(bounds) = bound {
|
||||
self.visit_expr(bounds);
|
||||
}
|
||||
|
|
|
@ -47,17 +47,27 @@ impl Symbol {
|
|||
pub fn is_bound(&self) -> bool {
|
||||
self.flags.contains(SymbolFlags::IS_BOUND)
|
||||
}
|
||||
|
||||
/// Is the symbol declared in its containing scope?
|
||||
pub fn is_declared(&self) -> bool {
|
||||
self.flags.contains(SymbolFlags::IS_DECLARED)
|
||||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
/// Flags that can be queried to obtain information about a symbol in a given scope.
|
||||
///
|
||||
/// See the doc-comment at the top of [`super::use_def`] for explanations of what it
|
||||
/// means for a symbol to be *bound* as opposed to *declared*.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
struct SymbolFlags: u8 {
|
||||
const IS_USED = 1 << 0;
|
||||
const IS_BOUND = 1 << 1;
|
||||
const IS_BOUND = 1 << 1;
|
||||
const IS_DECLARED = 1 << 2;
|
||||
/// TODO: This flag is not yet set by anything
|
||||
const MARKED_GLOBAL = 1 << 2;
|
||||
const MARKED_GLOBAL = 1 << 3;
|
||||
/// TODO: This flag is not yet set by anything
|
||||
const MARKED_NONLOCAL = 1 << 3;
|
||||
const MARKED_NONLOCAL = 1 << 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -298,6 +308,10 @@ impl SymbolTableBuilder {
|
|||
self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND);
|
||||
}
|
||||
|
||||
pub(super) fn mark_symbol_declared(&mut self, id: ScopedSymbolId) {
|
||||
self.table.symbols[id].insert_flags(SymbolFlags::IS_DECLARED);
|
||||
}
|
||||
|
||||
pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
|
||||
self.table.symbols[id].insert_flags(SymbolFlags::IS_USED);
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use ruff_python_ast as ast;
|
|||
use crate::module_resolver::file_to_module;
|
||||
use crate::semantic_index::ast_ids::HasScopedAstId;
|
||||
use crate::semantic_index::definition::{Definition, DefinitionKind};
|
||||
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId};
|
||||
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId, Symbol};
|
||||
use crate::semantic_index::{
|
||||
global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints,
|
||||
BindingWithConstraintsIterator, DeclarationsIterator,
|
||||
|
@ -88,9 +88,64 @@ fn symbol_ty<'db>(db: &'db dyn Db, scope: ScopeId<'db>, name: &str) -> Type<'db>
|
|||
.unwrap_or(Type::Unbound)
|
||||
}
|
||||
|
||||
/// Shorthand for `symbol_ty` that looks up a module-global symbol by name in a file.
|
||||
/// Return a list of the symbols that typeshed declares in the body scope of
|
||||
/// the stub for the class `types.ModuleType`.
|
||||
///
|
||||
/// Conceptually this could be a `Set` rather than a list,
|
||||
/// but the number of symbols declared in this scope is likely to be very small,
|
||||
/// so the cost of hashing the names is likely to be more expensive than it's worth.
|
||||
#[salsa::tracked(return_ref)]
|
||||
fn module_type_symbols<'db>(db: &'db dyn Db) -> smallvec::SmallVec<[ast::name::Name; 8]> {
|
||||
let Some(module_type) = KnownClass::ModuleType
|
||||
.to_class(db)
|
||||
.into_class_literal_type()
|
||||
else {
|
||||
// The most likely way we get here is if a user specified a `--custom-typeshed-dir`
|
||||
// without a `types.pyi` stub in the `stdlib/` directory
|
||||
return smallvec::SmallVec::default();
|
||||
};
|
||||
|
||||
let module_type_scope = module_type.body_scope(db);
|
||||
let module_type_symbol_table = symbol_table(db, module_type_scope);
|
||||
|
||||
// `__dict__` and `__init__` are very special members that can be accessed as attributes
|
||||
// on the module when imported, but cannot be accessed as globals *inside* the module.
|
||||
//
|
||||
// `__getattr__` is even more special: it doesn't exist at runtime, but typeshed includes it
|
||||
// to reduce false positives associated with functions that dynamically import modules
|
||||
// and return `Instance(types.ModuleType)`. We should ignore it for any known module-literal type.
|
||||
module_type_symbol_table
|
||||
.symbols()
|
||||
.filter(|symbol| symbol.is_declared())
|
||||
.map(Symbol::name)
|
||||
.filter(|symbol_name| !matches!(&***symbol_name, "__dict__" | "__getattr__" | "__init__"))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Looks up a module-global symbol by name in a file.
|
||||
pub(crate) fn global_symbol_ty<'db>(db: &'db dyn Db, file: File, name: &str) -> Type<'db> {
|
||||
symbol_ty(db, global_scope(db, file), name)
|
||||
let explicit_ty = symbol_ty(db, global_scope(db, file), name);
|
||||
if !explicit_ty.may_be_unbound(db) {
|
||||
return explicit_ty;
|
||||
}
|
||||
|
||||
// Not defined explicitly in the global scope?
|
||||
// All modules are instances of `types.ModuleType`;
|
||||
// look it up there (with a few very special exceptions)
|
||||
if module_type_symbols(db)
|
||||
.iter()
|
||||
.any(|module_type_member| &**module_type_member == name)
|
||||
{
|
||||
// TODO: this should use `.to_instance(db)`. but we don't understand attribute access
|
||||
// on instance types yet.
|
||||
let module_type_member = KnownClass::ModuleType.to_class(db).member(db, name);
|
||||
if !module_type_member.is_unbound() {
|
||||
return explicit_ty.replace_unbound_with(db, module_type_member);
|
||||
}
|
||||
}
|
||||
|
||||
explicit_ty
|
||||
}
|
||||
|
||||
/// Infer the type of a binding.
|
||||
|
@ -821,7 +876,46 @@ impl<'db> Type<'db> {
|
|||
// TODO: attribute lookup on function type
|
||||
Type::Todo
|
||||
}
|
||||
Type::ModuleLiteral(file) => global_symbol_ty(db, *file, name),
|
||||
Type::ModuleLiteral(file) => {
|
||||
// `__dict__` is a very special member that is never overridden by module globals;
|
||||
// we should always look it up directly as an attribute on `types.ModuleType`,
|
||||
// never in the global scope of the module.
|
||||
if name == "__dict__" {
|
||||
return KnownClass::ModuleType
|
||||
.to_instance(db)
|
||||
.member(db, "__dict__");
|
||||
}
|
||||
|
||||
let global_lookup = symbol_ty(db, global_scope(db, *file), name);
|
||||
|
||||
// If it's unbound, check if it's present as an instance on `types.ModuleType`
|
||||
// or `builtins.object`.
|
||||
//
|
||||
// We do a more limited version of this in `global_symbol_ty`,
|
||||
// but there are two crucial differences here:
|
||||
// - If a member is looked up as an attribute, `__init__` is also available
|
||||
// on the module, but it isn't available as a global from inside the module
|
||||
// - If a member is looked up as an attribute, members on `builtins.object`
|
||||
// are also available (because `types.ModuleType` inherits from `object`);
|
||||
// these attributes are also not available as globals from inside the module.
|
||||
//
|
||||
// The same way as in `global_symbol_ty`, however, we need to be careful to
|
||||
// ignore `__getattr__`. Typeshed has a fake `__getattr__` on `types.ModuleType`
|
||||
// to help out with dynamic imports; we shouldn't use it for `ModuleLiteral` types
|
||||
// where we know exactly which module we're dealing with.
|
||||
if name != "__getattr__" && global_lookup.may_be_unbound(db) {
|
||||
// TODO: this should use `.to_instance()`, but we don't understand instance attribute yet
|
||||
let module_type_instance_member =
|
||||
KnownClass::ModuleType.to_class(db).member(db, name);
|
||||
if module_type_instance_member.is_unbound() {
|
||||
global_lookup
|
||||
} else {
|
||||
global_lookup.replace_unbound_with(db, module_type_instance_member)
|
||||
}
|
||||
} else {
|
||||
global_lookup
|
||||
}
|
||||
}
|
||||
Type::ClassLiteral(class) => class.class_member(db, name),
|
||||
Type::Instance(_) => {
|
||||
// TODO MRO? get_own_instance_member, get_instance_member
|
||||
|
@ -1868,15 +1962,13 @@ impl<'db> TupleType<'db> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
builtins_symbol_ty, BytesLiteralType, IntersectionBuilder, StringLiteralType, Truthiness,
|
||||
TupleType, Type, UnionType,
|
||||
};
|
||||
use super::*;
|
||||
use crate::db::tests::TestDb;
|
||||
use crate::program::{Program, SearchPathSettings};
|
||||
use crate::python_version::PythonVersion;
|
||||
use crate::ProgramSettings;
|
||||
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
|
||||
use ruff_python_ast as ast;
|
||||
use test_case::test_case;
|
||||
|
||||
fn setup_db() -> TestDb {
|
||||
|
@ -2256,4 +2348,16 @@ mod tests {
|
|||
|
||||
assert_eq!(ty.into_type(&db).repr(&db), expected.into_type(&db));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_type_symbols_includes_declared_types_but_not_referenced_types() {
|
||||
let db = setup_db();
|
||||
let symbol_names = module_type_symbols(&db);
|
||||
|
||||
let dunder_name_symbol_name = ast::name::Name::new_static("__name__");
|
||||
assert!(symbol_names.contains(&dunder_name_symbol_name));
|
||||
|
||||
let property_symbol_name = ast::name::Name::new_static("property");
|
||||
assert!(!symbol_names.contains(&property_symbol_name));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue