mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-30 16:33:49 +00:00
[red-knot] Fallback to attributes on types.ModuleType if a symbol can't be found in locals or globals (#13904)
This commit is contained in:
parent
7dd0c7f4bd
commit
d2c9f5e43c
5 changed files with 278 additions and 13 deletions
|
@ -0,0 +1,141 @@
|
||||||
|
# Implicit globals from `types.ModuleType`
|
||||||
|
|
||||||
|
## Implicit `ModuleType` globals
|
||||||
|
|
||||||
|
All modules are instances of `types.ModuleType`.
|
||||||
|
If a name can't be found in any local or global scope, we look it up
|
||||||
|
as an attribute on `types.ModuleType` in typeshed
|
||||||
|
before deciding that the name is unbound.
|
||||||
|
|
||||||
|
```py
|
||||||
|
reveal_type(__name__) # revealed: str
|
||||||
|
reveal_type(__file__) # revealed: str | None
|
||||||
|
reveal_type(__loader__) # revealed: LoaderProtocol | None
|
||||||
|
reveal_type(__package__) # revealed: str | None
|
||||||
|
reveal_type(__spec__) # revealed: ModuleSpec | None
|
||||||
|
|
||||||
|
# TODO: generics
|
||||||
|
reveal_type(__path__) # revealed: @Todo
|
||||||
|
|
||||||
|
# TODO: this should probably be added to typeshed; not sure why it isn't?
|
||||||
|
# error: [unresolved-reference]
|
||||||
|
# revealed: Unbound
|
||||||
|
reveal_type(__doc__)
|
||||||
|
|
||||||
|
class X:
|
||||||
|
reveal_type(__name__) # revealed: str
|
||||||
|
|
||||||
|
def foo():
|
||||||
|
reveal_type(__name__) # revealed: str
|
||||||
|
```
|
||||||
|
|
||||||
|
However, three attributes on `types.ModuleType` are not present as implicit
|
||||||
|
module globals; these are excluded:
|
||||||
|
|
||||||
|
```py path=unbound_dunders.py
|
||||||
|
# error: [unresolved-reference]
|
||||||
|
# revealed: Unbound
|
||||||
|
reveal_type(__getattr__)
|
||||||
|
|
||||||
|
# error: [unresolved-reference]
|
||||||
|
# revealed: Unbound
|
||||||
|
reveal_type(__dict__)
|
||||||
|
|
||||||
|
# error: [unresolved-reference]
|
||||||
|
# revealed: Unbound
|
||||||
|
reveal_type(__init__)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Accessed as attributes
|
||||||
|
|
||||||
|
`ModuleType` attributes can also be accessed as attributes on module-literal types.
|
||||||
|
The special attributes `__dict__` and `__init__`, and all attributes on
|
||||||
|
`builtins.object`, can also be accessed as attributes on module-literal types,
|
||||||
|
despite the fact that these are inaccessible as globals from inside the module:
|
||||||
|
|
||||||
|
```py
|
||||||
|
import typing
|
||||||
|
|
||||||
|
reveal_type(typing.__name__) # revealed: str
|
||||||
|
reveal_type(typing.__init__) # revealed: Literal[__init__]
|
||||||
|
|
||||||
|
# These come from `builtins.object`, not `types.ModuleType`:
|
||||||
|
# TODO: we don't currently understand `types.ModuleType` as inheriting from `object`;
|
||||||
|
# these should not reveal `Unbound`:
|
||||||
|
reveal_type(typing.__eq__) # revealed: Unbound
|
||||||
|
reveal_type(typing.__class__) # revealed: Unbound
|
||||||
|
reveal_type(typing.__module__) # revealed: Unbound
|
||||||
|
|
||||||
|
# TODO: needs support for attribute access on instances, properties and generics;
|
||||||
|
# should be `dict[str, Any]`
|
||||||
|
reveal_type(typing.__dict__) # revealed: @Todo
|
||||||
|
```
|
||||||
|
|
||||||
|
Typeshed includes a fake `__getattr__` method in the stub for `types.ModuleType`
|
||||||
|
to help out with dynamic imports; but we ignore that for module-literal types
|
||||||
|
where we know exactly which module we're dealing with:
|
||||||
|
|
||||||
|
```py path=__getattr__.py
|
||||||
|
import typing
|
||||||
|
|
||||||
|
reveal_type(typing.__getattr__) # revealed: Unbound
|
||||||
|
```
|
||||||
|
|
||||||
|
## `types.ModuleType.__dict__` takes precedence over global variable `__dict__`
|
||||||
|
|
||||||
|
It's impossible to override the `__dict__` attribute of `types.ModuleType`
|
||||||
|
instances from inside the module; we should prioritise the attribute in
|
||||||
|
the `types.ModuleType` stub over a variable named `__dict__` in the module's
|
||||||
|
global namespace:
|
||||||
|
|
||||||
|
```py path=foo.py
|
||||||
|
__dict__ = "foo"
|
||||||
|
|
||||||
|
reveal_type(__dict__) # revealed: Literal["foo"]
|
||||||
|
```
|
||||||
|
|
||||||
|
```py path=bar.py
|
||||||
|
import foo
|
||||||
|
from foo import __dict__ as foo_dict
|
||||||
|
|
||||||
|
# TODO: needs support for attribute access on instances, properties, and generics;
|
||||||
|
# should be `dict[str, Any]` for both of these:
|
||||||
|
reveal_type(foo.__dict__) # revealed: @Todo
|
||||||
|
reveal_type(foo_dict) # revealed: @Todo
|
||||||
|
```
|
||||||
|
|
||||||
|
## Conditionally global or `ModuleType` attribute
|
||||||
|
|
||||||
|
Attributes overridden in the module namespace take priority.
|
||||||
|
If a builtin name is conditionally defined as a global, however,
|
||||||
|
a name lookup should union the `ModuleType` type with the conditionally defined type:
|
||||||
|
|
||||||
|
```py
|
||||||
|
__file__ = 42
|
||||||
|
|
||||||
|
def returns_bool() -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if returns_bool():
|
||||||
|
__name__ = 1
|
||||||
|
|
||||||
|
reveal_type(__file__) # revealed: Literal[42]
|
||||||
|
reveal_type(__name__) # revealed: str | Literal[1]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Conditionally global or `ModuleType` attribute, with annotation
|
||||||
|
|
||||||
|
The same is true if the name is annotated:
|
||||||
|
|
||||||
|
```py
|
||||||
|
__file__: int = 42
|
||||||
|
|
||||||
|
def returns_bool() -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if returns_bool():
|
||||||
|
__name__: int = 1
|
||||||
|
|
||||||
|
reveal_type(__file__) # revealed: Literal[42]
|
||||||
|
reveal_type(__name__) # revealed: str | Literal[1]
|
||||||
|
```
|
|
@ -195,6 +195,10 @@ impl<'db> SemanticIndexBuilder<'db> {
|
||||||
self.current_symbol_table().mark_symbol_bound(id);
|
self.current_symbol_table().mark_symbol_bound(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn mark_symbol_declared(&mut self, id: ScopedSymbolId) {
|
||||||
|
self.current_symbol_table().mark_symbol_declared(id);
|
||||||
|
}
|
||||||
|
|
||||||
fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
|
fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
|
||||||
self.current_symbol_table().mark_symbol_used(id);
|
self.current_symbol_table().mark_symbol_used(id);
|
||||||
}
|
}
|
||||||
|
@ -226,6 +230,9 @@ impl<'db> SemanticIndexBuilder<'db> {
|
||||||
if category.is_binding() {
|
if category.is_binding() {
|
||||||
self.mark_symbol_bound(symbol);
|
self.mark_symbol_bound(symbol);
|
||||||
}
|
}
|
||||||
|
if category.is_declaration() {
|
||||||
|
self.mark_symbol_declared(symbol);
|
||||||
|
}
|
||||||
|
|
||||||
let use_def = self.current_use_def_map_mut();
|
let use_def = self.current_use_def_map_mut();
|
||||||
match category {
|
match category {
|
||||||
|
@ -359,6 +366,7 @@ impl<'db> SemanticIndexBuilder<'db> {
|
||||||
// note that the "bound" on the typevar is a totally different thing than whether
|
// note that the "bound" on the typevar is a totally different thing than whether
|
||||||
// or not a name is "bound" by a typevar declaration; the latter is always true.
|
// or not a name is "bound" by a typevar declaration; the latter is always true.
|
||||||
self.mark_symbol_bound(symbol);
|
self.mark_symbol_bound(symbol);
|
||||||
|
self.mark_symbol_declared(symbol);
|
||||||
if let Some(bounds) = bound {
|
if let Some(bounds) = bound {
|
||||||
self.visit_expr(bounds);
|
self.visit_expr(bounds);
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,17 +47,27 @@ impl Symbol {
|
||||||
pub fn is_bound(&self) -> bool {
|
pub fn is_bound(&self) -> bool {
|
||||||
self.flags.contains(SymbolFlags::IS_BOUND)
|
self.flags.contains(SymbolFlags::IS_BOUND)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Is the symbol declared in its containing scope?
|
||||||
|
pub fn is_declared(&self) -> bool {
|
||||||
|
self.flags.contains(SymbolFlags::IS_DECLARED)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bitflags! {
|
bitflags! {
|
||||||
|
/// Flags that can be queried to obtain information about a symbol in a given scope.
|
||||||
|
///
|
||||||
|
/// See the doc-comment at the top of [`super::use_def`] for explanations of what it
|
||||||
|
/// means for a symbol to be *bound* as opposed to *declared*.
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
struct SymbolFlags: u8 {
|
struct SymbolFlags: u8 {
|
||||||
const IS_USED = 1 << 0;
|
const IS_USED = 1 << 0;
|
||||||
const IS_BOUND = 1 << 1;
|
const IS_BOUND = 1 << 1;
|
||||||
|
const IS_DECLARED = 1 << 2;
|
||||||
/// TODO: This flag is not yet set by anything
|
/// TODO: This flag is not yet set by anything
|
||||||
const MARKED_GLOBAL = 1 << 2;
|
const MARKED_GLOBAL = 1 << 3;
|
||||||
/// TODO: This flag is not yet set by anything
|
/// TODO: This flag is not yet set by anything
|
||||||
const MARKED_NONLOCAL = 1 << 3;
|
const MARKED_NONLOCAL = 1 << 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,6 +308,10 @@ impl SymbolTableBuilder {
|
||||||
self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND);
|
self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) fn mark_symbol_declared(&mut self, id: ScopedSymbolId) {
|
||||||
|
self.table.symbols[id].insert_flags(SymbolFlags::IS_DECLARED);
|
||||||
|
}
|
||||||
|
|
||||||
pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
|
pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
|
||||||
self.table.symbols[id].insert_flags(SymbolFlags::IS_USED);
|
self.table.symbols[id].insert_flags(SymbolFlags::IS_USED);
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ use ruff_python_ast as ast;
|
||||||
use crate::module_resolver::file_to_module;
|
use crate::module_resolver::file_to_module;
|
||||||
use crate::semantic_index::ast_ids::HasScopedAstId;
|
use crate::semantic_index::ast_ids::HasScopedAstId;
|
||||||
use crate::semantic_index::definition::{Definition, DefinitionKind};
|
use crate::semantic_index::definition::{Definition, DefinitionKind};
|
||||||
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId};
|
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId, Symbol};
|
||||||
use crate::semantic_index::{
|
use crate::semantic_index::{
|
||||||
global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints,
|
global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints,
|
||||||
BindingWithConstraintsIterator, DeclarationsIterator,
|
BindingWithConstraintsIterator, DeclarationsIterator,
|
||||||
|
@ -88,9 +88,64 @@ fn symbol_ty<'db>(db: &'db dyn Db, scope: ScopeId<'db>, name: &str) -> Type<'db>
|
||||||
.unwrap_or(Type::Unbound)
|
.unwrap_or(Type::Unbound)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shorthand for `symbol_ty` that looks up a module-global symbol by name in a file.
|
/// Return a list of the symbols that typeshed declares in the body scope of
|
||||||
|
/// the stub for the class `types.ModuleType`.
|
||||||
|
///
|
||||||
|
/// Conceptually this could be a `Set` rather than a list,
|
||||||
|
/// but the number of symbols declared in this scope is likely to be very small,
|
||||||
|
/// so the cost of hashing the names is likely to be more expensive than it's worth.
|
||||||
|
#[salsa::tracked(return_ref)]
|
||||||
|
fn module_type_symbols<'db>(db: &'db dyn Db) -> smallvec::SmallVec<[ast::name::Name; 8]> {
|
||||||
|
let Some(module_type) = KnownClass::ModuleType
|
||||||
|
.to_class(db)
|
||||||
|
.into_class_literal_type()
|
||||||
|
else {
|
||||||
|
// The most likely way we get here is if a user specified a `--custom-typeshed-dir`
|
||||||
|
// without a `types.pyi` stub in the `stdlib/` directory
|
||||||
|
return smallvec::SmallVec::default();
|
||||||
|
};
|
||||||
|
|
||||||
|
let module_type_scope = module_type.body_scope(db);
|
||||||
|
let module_type_symbol_table = symbol_table(db, module_type_scope);
|
||||||
|
|
||||||
|
// `__dict__` and `__init__` are very special members that can be accessed as attributes
|
||||||
|
// on the module when imported, but cannot be accessed as globals *inside* the module.
|
||||||
|
//
|
||||||
|
// `__getattr__` is even more special: it doesn't exist at runtime, but typeshed includes it
|
||||||
|
// to reduce false positives associated with functions that dynamically import modules
|
||||||
|
// and return `Instance(types.ModuleType)`. We should ignore it for any known module-literal type.
|
||||||
|
module_type_symbol_table
|
||||||
|
.symbols()
|
||||||
|
.filter(|symbol| symbol.is_declared())
|
||||||
|
.map(Symbol::name)
|
||||||
|
.filter(|symbol_name| !matches!(&***symbol_name, "__dict__" | "__getattr__" | "__init__"))
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Looks up a module-global symbol by name in a file.
|
||||||
pub(crate) fn global_symbol_ty<'db>(db: &'db dyn Db, file: File, name: &str) -> Type<'db> {
|
pub(crate) fn global_symbol_ty<'db>(db: &'db dyn Db, file: File, name: &str) -> Type<'db> {
|
||||||
symbol_ty(db, global_scope(db, file), name)
|
let explicit_ty = symbol_ty(db, global_scope(db, file), name);
|
||||||
|
if !explicit_ty.may_be_unbound(db) {
|
||||||
|
return explicit_ty;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not defined explicitly in the global scope?
|
||||||
|
// All modules are instances of `types.ModuleType`;
|
||||||
|
// look it up there (with a few very special exceptions)
|
||||||
|
if module_type_symbols(db)
|
||||||
|
.iter()
|
||||||
|
.any(|module_type_member| &**module_type_member == name)
|
||||||
|
{
|
||||||
|
// TODO: this should use `.to_instance(db)`. but we don't understand attribute access
|
||||||
|
// on instance types yet.
|
||||||
|
let module_type_member = KnownClass::ModuleType.to_class(db).member(db, name);
|
||||||
|
if !module_type_member.is_unbound() {
|
||||||
|
return explicit_ty.replace_unbound_with(db, module_type_member);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit_ty
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Infer the type of a binding.
|
/// Infer the type of a binding.
|
||||||
|
@ -821,7 +876,46 @@ impl<'db> Type<'db> {
|
||||||
// TODO: attribute lookup on function type
|
// TODO: attribute lookup on function type
|
||||||
Type::Todo
|
Type::Todo
|
||||||
}
|
}
|
||||||
Type::ModuleLiteral(file) => global_symbol_ty(db, *file, name),
|
Type::ModuleLiteral(file) => {
|
||||||
|
// `__dict__` is a very special member that is never overridden by module globals;
|
||||||
|
// we should always look it up directly as an attribute on `types.ModuleType`,
|
||||||
|
// never in the global scope of the module.
|
||||||
|
if name == "__dict__" {
|
||||||
|
return KnownClass::ModuleType
|
||||||
|
.to_instance(db)
|
||||||
|
.member(db, "__dict__");
|
||||||
|
}
|
||||||
|
|
||||||
|
let global_lookup = symbol_ty(db, global_scope(db, *file), name);
|
||||||
|
|
||||||
|
// If it's unbound, check if it's present as an instance on `types.ModuleType`
|
||||||
|
// or `builtins.object`.
|
||||||
|
//
|
||||||
|
// We do a more limited version of this in `global_symbol_ty`,
|
||||||
|
// but there are two crucial differences here:
|
||||||
|
// - If a member is looked up as an attribute, `__init__` is also available
|
||||||
|
// on the module, but it isn't available as a global from inside the module
|
||||||
|
// - If a member is looked up as an attribute, members on `builtins.object`
|
||||||
|
// are also available (because `types.ModuleType` inherits from `object`);
|
||||||
|
// these attributes are also not available as globals from inside the module.
|
||||||
|
//
|
||||||
|
// The same way as in `global_symbol_ty`, however, we need to be careful to
|
||||||
|
// ignore `__getattr__`. Typeshed has a fake `__getattr__` on `types.ModuleType`
|
||||||
|
// to help out with dynamic imports; we shouldn't use it for `ModuleLiteral` types
|
||||||
|
// where we know exactly which module we're dealing with.
|
||||||
|
if name != "__getattr__" && global_lookup.may_be_unbound(db) {
|
||||||
|
// TODO: this should use `.to_instance()`, but we don't understand instance attribute yet
|
||||||
|
let module_type_instance_member =
|
||||||
|
KnownClass::ModuleType.to_class(db).member(db, name);
|
||||||
|
if module_type_instance_member.is_unbound() {
|
||||||
|
global_lookup
|
||||||
|
} else {
|
||||||
|
global_lookup.replace_unbound_with(db, module_type_instance_member)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
global_lookup
|
||||||
|
}
|
||||||
|
}
|
||||||
Type::ClassLiteral(class) => class.class_member(db, name),
|
Type::ClassLiteral(class) => class.class_member(db, name),
|
||||||
Type::Instance(_) => {
|
Type::Instance(_) => {
|
||||||
// TODO MRO? get_own_instance_member, get_instance_member
|
// TODO MRO? get_own_instance_member, get_instance_member
|
||||||
|
@ -1868,15 +1962,13 @@ impl<'db> TupleType<'db> {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{
|
use super::*;
|
||||||
builtins_symbol_ty, BytesLiteralType, IntersectionBuilder, StringLiteralType, Truthiness,
|
|
||||||
TupleType, Type, UnionType,
|
|
||||||
};
|
|
||||||
use crate::db::tests::TestDb;
|
use crate::db::tests::TestDb;
|
||||||
use crate::program::{Program, SearchPathSettings};
|
use crate::program::{Program, SearchPathSettings};
|
||||||
use crate::python_version::PythonVersion;
|
use crate::python_version::PythonVersion;
|
||||||
use crate::ProgramSettings;
|
use crate::ProgramSettings;
|
||||||
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
|
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
|
||||||
|
use ruff_python_ast as ast;
|
||||||
use test_case::test_case;
|
use test_case::test_case;
|
||||||
|
|
||||||
fn setup_db() -> TestDb {
|
fn setup_db() -> TestDb {
|
||||||
|
@ -2256,4 +2348,16 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(ty.into_type(&db).repr(&db), expected.into_type(&db));
|
assert_eq!(ty.into_type(&db).repr(&db), expected.into_type(&db));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn module_type_symbols_includes_declared_types_but_not_referenced_types() {
|
||||||
|
let db = setup_db();
|
||||||
|
let symbol_names = module_type_symbols(&db);
|
||||||
|
|
||||||
|
let dunder_name_symbol_name = ast::name::Name::new_static("__name__");
|
||||||
|
assert!(symbol_names.contains(&dunder_name_symbol_name));
|
||||||
|
|
||||||
|
let property_symbol_name = ast::name::Name::new_static("property");
|
||||||
|
assert!(!symbol_names.contains(&property_symbol_name));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,8 +23,6 @@ struct Case {
|
||||||
const TOMLLIB_312_URL: &str = "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib";
|
const TOMLLIB_312_URL: &str = "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib";
|
||||||
|
|
||||||
static EXPECTED_DIAGNOSTICS: &[&str] = &[
|
static EXPECTED_DIAGNOSTICS: &[&str] = &[
|
||||||
// We don't support `ModuleType`-attributes as globals yet:
|
|
||||||
"/src/tomllib/__init__.py:10:30: Name `__name__` used when not defined",
|
|
||||||
// We don't support `*` imports yet:
|
// We don't support `*` imports yet:
|
||||||
"/src/tomllib/_parser.py:7:29: Module `collections.abc` has no member `Iterable`",
|
"/src/tomllib/_parser.py:7:29: Module `collections.abc` has no member `Iterable`",
|
||||||
// We don't support terminal statements in control flow yet:
|
// We don't support terminal statements in control flow yet:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue