[ty] Strict validation of protocol members (#17750)

This commit is contained in:
Alex Waygood 2025-08-19 23:45:41 +01:00 committed by GitHub
parent e0f4cec7a1
commit 656fc335f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 619 additions and 78 deletions

View file

@ -482,6 +482,8 @@ reveal_type(get_protocol_members(Baz2))
## Protocol members in statically known branches
<!-- snapshot-diagnostics -->
The list of protocol members does not include any members declared in branches that are statically
known to be unreachable:
@ -492,7 +494,7 @@ python-version = "3.9"
```py
import sys
from typing_extensions import Protocol, get_protocol_members
from typing_extensions import Protocol, get_protocol_members, reveal_type
class Foo(Protocol):
if sys.version_info >= (3, 10):
@ -501,7 +503,7 @@ class Foo(Protocol):
def c(self) -> None: ...
else:
d: int
e = 56
e = 56 # error: [ambiguous-protocol-member]
def f(self) -> None: ...
reveal_type(get_protocol_members(Foo)) # revealed: frozenset[Literal["d", "e", "f"]]
@ -797,9 +799,9 @@ def f(arg: HasXWithDefault):
```
Assignments in a class body of a protocol -- of any kind -- are not permitted by ty unless the
symbol being assigned to is also explicitly declared in the protocol's class body. Note that this is
stricter validation of protocol members than many other type checkers currently apply (as of
2025/04/21).
symbol being assigned to is also explicitly declared in the body of the protocol class or one of its
superclasses. Note that this is stricter validation of protocol members than many other type
checkers currently apply (as of 2025/04/21).
The reason for this strict validation is that undeclared variables in the class body would lead to
an ambiguous interface being declared by the protocol.
@ -823,24 +825,75 @@ class LotsOfBindings(Protocol):
class Nested: ... # also weird, but we should also probably allow it
class NestedProtocol(Protocol): ... # same here...
e = 72 # TODO: this should error with `[invalid-protocol]` (`e` is not declared)
e = 72 # error: [ambiguous-protocol-member]
f, g = (1, 2) # TODO: this should error with `[invalid-protocol]` (`f` and `g` are not declared)
# error: [ambiguous-protocol-member] "Consider adding an annotation, e.g. `f: int = ...`"
# error: [ambiguous-protocol-member] "Consider adding an annotation, e.g. `g: int = ...`"
f, g = (1, 2)
h: int = (i := 3) # TODO: this should error with `[invalid-protocol]` (`i` is not declared)
h: int = (i := 3) # error: [ambiguous-protocol-member]
for j in range(42): # TODO: this should error with `[invalid-protocol]` (`j` is not declared)
for j in range(42): # error: [ambiguous-protocol-member]
pass
with MyContext() as k: # TODO: this should error with `[invalid-protocol]` (`k` is not declared)
with MyContext() as k: # error: [ambiguous-protocol-member]
pass
match object():
case l: # TODO: this should error with `[invalid-protocol]` (`l` is not declared)
case l: # error: [ambiguous-protocol-member]
...
# revealed: frozenset[Literal["Nested", "NestedProtocol", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]]
reveal_type(get_protocol_members(LotsOfBindings))
class Foo(Protocol):
a: int
class Bar(Foo, Protocol):
a = 42 # fine, because it's declared in the superclass
reveal_type(get_protocol_members(Bar)) # revealed: frozenset[Literal["a"]]
```
A binding-without-declaration will not be reported if it occurs in a branch that we can statically
determine to be unreachable. The reason is that we don't consider it to be a protocol member at all
if all definitions for the variable are in unreachable blocks:
```py
import sys
class Protocol694(Protocol):
if sys.version_info > (3, 694):
x = 42 # no error!
```
If there are multiple bindings of the variable in the class body, however, and at least one of the
bindings occurs in a block of code that is understood to be (possibly) reachable, a diagnostic will
be reported. The diagnostic will be attached to the first binding that occurs in the class body,
even if that first definition occurs in an unreachable block:
```py
class Protocol695(Protocol):
if sys.version_info > (3, 695):
x = 42
else:
x = 42
x = 56 # error: [ambiguous-protocol-member]
```
In order for the variable to be considered declared, the declaration of the variable must also take
place in a block of code that is understood to be (possibly) reachable:
```py
class Protocol696(Protocol):
if sys.version_info > (3, 696):
x: int
else:
x = 42 # error: [ambiguous-protocol-member]
y: int
y = 56 # no error
```
Attribute members are allowed to have assignments in methods on the protocol class, just like
@ -943,6 +996,40 @@ static_assert(not is_assignable_to(HasX, Foo))
static_assert(not is_subtype_of(HasX, Foo))
```
## Diagnostics for protocols with invalid attribute members
This is a short appendix to the previous section with the `snapshot-diagnostics` directive enabled
(enabling snapshots for the previous section in its entirety would lead to a huge snapshot, since
it's a large section).
<!-- snapshot-diagnostics -->
```py
from typing import Protocol
def coinflip() -> bool:
return True
class A(Protocol):
# The `x` and `y` members attempt to use Python-2-style type comments
# to indicate that the type should be `int | None` and `str` respectively,
# but we don't support those
# error: [ambiguous-protocol-member]
a = None # type: int
# error: [ambiguous-protocol-member]
b = ... # type: str
if coinflip():
c = 1 # error: [ambiguous-protocol-member]
else:
c = 2
# error: [ambiguous-protocol-member]
for d in range(42):
pass
```
## Equivalence of protocols
Two protocols are considered equivalent types if they specify the same interface, even if they have

View file

@ -0,0 +1,140 @@
---
source: crates/ty_test/src/lib.rs
expression: snapshot
---
---
mdtest name: protocols.md - Protocols - Diagnostics for protocols with invalid attribute members
mdtest path: crates/ty_python_semantic/resources/mdtest/protocols.md
---
# Python source files
## mdtest_snippet.py
```
1 | from typing import Protocol
2 |
3 | def coinflip() -> bool:
4 | return True
5 |
6 | class A(Protocol):
7 | # The `x` and `y` members attempt to use Python-2-style type comments
8 | # to indicate that the type should be `int | None` and `str` respectively,
9 | # but we don't support those
10 |
11 | # error: [ambiguous-protocol-member]
12 | a = None # type: int
13 | # error: [ambiguous-protocol-member]
14 | b = ... # type: str
15 |
16 | if coinflip():
17 | c = 1 # error: [ambiguous-protocol-member]
18 | else:
19 | c = 2
20 |
21 | # error: [ambiguous-protocol-member]
22 | for d in range(42):
23 | pass
```
# Diagnostics
```
warning[ambiguous-protocol-member]: Cannot assign to undeclared variable in the body of a protocol class
--> src/mdtest_snippet.py:12:5
|
11 | # error: [ambiguous-protocol-member]
12 | a = None # type: int
| ^^^^^^^^ Consider adding an annotation for `a`
13 | # error: [ambiguous-protocol-member]
14 | b = ... # type: str
|
info: Assigning to an undeclared variable in a protocol class leads to an ambiguous interface
--> src/mdtest_snippet.py:6:7
|
4 | return True
5 |
6 | class A(Protocol):
| ^^^^^^^^^^^ `A` declared as a protocol here
7 | # The `x` and `y` members attempt to use Python-2-style type comments
8 | # to indicate that the type should be `int | None` and `str` respectively,
|
info: No declarations found for `a` in the body of `A` or any of its superclasses
info: rule `ambiguous-protocol-member` is enabled by default
```
```
warning[ambiguous-protocol-member]: Cannot assign to undeclared variable in the body of a protocol class
--> src/mdtest_snippet.py:14:5
|
12 | a = None # type: int
13 | # error: [ambiguous-protocol-member]
14 | b = ... # type: str
| ^^^^^^^ Consider adding an annotation for `b`
15 |
16 | if coinflip():
|
info: Assigning to an undeclared variable in a protocol class leads to an ambiguous interface
--> src/mdtest_snippet.py:6:7
|
4 | return True
5 |
6 | class A(Protocol):
| ^^^^^^^^^^^ `A` declared as a protocol here
7 | # The `x` and `y` members attempt to use Python-2-style type comments
8 | # to indicate that the type should be `int | None` and `str` respectively,
|
info: No declarations found for `b` in the body of `A` or any of its superclasses
info: rule `ambiguous-protocol-member` is enabled by default
```
```
warning[ambiguous-protocol-member]: Cannot assign to undeclared variable in the body of a protocol class
--> src/mdtest_snippet.py:17:9
|
16 | if coinflip():
17 | c = 1 # error: [ambiguous-protocol-member]
| ^^^^^ Consider adding an annotation, e.g. `c: int = ...`
18 | else:
19 | c = 2
|
info: Assigning to an undeclared variable in a protocol class leads to an ambiguous interface
--> src/mdtest_snippet.py:6:7
|
4 | return True
5 |
6 | class A(Protocol):
| ^^^^^^^^^^^ `A` declared as a protocol here
7 | # The `x` and `y` members attempt to use Python-2-style type comments
8 | # to indicate that the type should be `int | None` and `str` respectively,
|
info: No declarations found for `c` in the body of `A` or any of its superclasses
info: rule `ambiguous-protocol-member` is enabled by default
```
```
warning[ambiguous-protocol-member]: Cannot assign to undeclared variable in the body of a protocol class
--> src/mdtest_snippet.py:22:9
|
21 | # error: [ambiguous-protocol-member]
22 | for d in range(42):
| ^ `d` is not declared as a protocol member
23 | pass
|
info: Assigning to an undeclared variable in a protocol class leads to an ambiguous interface
--> src/mdtest_snippet.py:6:7
|
4 | return True
5 |
6 | class A(Protocol):
| ^^^^^^^^^^^ `A` declared as a protocol here
7 | # The `x` and `y` members attempt to use Python-2-style type comments
8 | # to indicate that the type should be `int | None` and `str` respectively,
|
info: No declarations found for `d` in the body of `A` or any of its superclasses
info: rule `ambiguous-protocol-member` is enabled by default
```

View file

@ -0,0 +1,68 @@
---
source: crates/ty_test/src/lib.rs
expression: snapshot
---
---
mdtest name: protocols.md - Protocols - Protocol members in statically known branches
mdtest path: crates/ty_python_semantic/resources/mdtest/protocols.md
---
# Python source files
## mdtest_snippet.py
```
1 | import sys
2 | from typing_extensions import Protocol, get_protocol_members, reveal_type
3 |
4 | class Foo(Protocol):
5 | if sys.version_info >= (3, 10):
6 | a: int
7 | b = 42
8 | def c(self) -> None: ...
9 | else:
10 | d: int
11 | e = 56 # error: [ambiguous-protocol-member]
12 | def f(self) -> None: ...
13 |
14 | reveal_type(get_protocol_members(Foo)) # revealed: frozenset[Literal["d", "e", "f"]]
```
# Diagnostics
```
warning[ambiguous-protocol-member]: Cannot assign to undeclared variable in the body of a protocol class
--> src/mdtest_snippet.py:11:9
|
9 | else:
10 | d: int
11 | e = 56 # error: [ambiguous-protocol-member]
| ^^^^^^ Consider adding an annotation, e.g. `e: int = ...`
12 | def f(self) -> None: ...
|
info: Assigning to an undeclared variable in a protocol class leads to an ambiguous interface
--> src/mdtest_snippet.py:4:7
|
2 | from typing_extensions import Protocol, get_protocol_members, reveal_type
3 |
4 | class Foo(Protocol):
| ^^^^^^^^^^^^^ `Foo` declared as a protocol here
5 | if sys.version_info >= (3, 10):
6 | a: int
|
info: No declarations found for `e` in the body of `Foo` or any of its superclasses
info: rule `ambiguous-protocol-member` is enabled by default
```
```
info[revealed-type]: Revealed type
--> src/mdtest_snippet.py:14:13
|
12 | def f(self) -> None: ...
13 |
14 | reveal_type(get_protocol_members(Foo)) # revealed: frozenset[Literal["d", "e", "f"]]
| ^^^^^^^^^^^^^^^^^^^^^^^^^ `frozenset[Literal["d", "e", "f"]]`
|
```

View file

@ -702,6 +702,10 @@ impl DefinitionKind<'_> {
)
}
pub(crate) const fn is_unannotated_assignment(&self) -> bool {
matches!(self, DefinitionKind::Assignment(_))
}
pub(crate) fn as_typevar(&self) -> Option<&AstNodeRef<ast::TypeParamTypeVar>> {
match self {
DefinitionKind::TypeVar(type_var) => Some(type_var),

View file

@ -7,8 +7,9 @@ use super::{
};
use crate::lint::{Level, LintRegistryBuilder, LintStatus};
use crate::semantic_index::SemanticIndex;
use crate::semantic_index::definition::Definition;
use crate::semantic_index::place::{PlaceTable, ScopedPlaceId};
use crate::suppression::FileSuppressionId;
use crate::types::LintDiagnosticGuard;
use crate::types::class::{Field, SolidBase, SolidBaseKind};
use crate::types::function::KnownFunction;
use crate::types::string_annotation::{
@ -16,6 +17,9 @@ use crate::types::string_annotation::{
IMPLICIT_CONCATENATED_STRING_TYPE_ANNOTATION, INVALID_SYNTAX_IN_FORWARD_ANNOTATION,
RAW_STRING_TYPE_ANNOTATION,
};
use crate::types::{
DynamicType, LintDiagnosticGuard, Protocol, ProtocolInstanceType, SubclassOfInner, binding_type,
};
use crate::types::{SpecialFormType, Type, protocol_class::ProtocolClassLiteral};
use crate::util::diagnostics::format_enumeration;
use crate::{Db, FxIndexMap, FxOrderMap, Module, ModuleName, Program, declare_lint};
@ -29,6 +33,7 @@ use std::fmt::Formatter;
/// Registers all known type check lints.
pub(crate) fn register_lints(registry: &mut LintRegistryBuilder) {
registry.register_lint(&AMBIGUOUS_PROTOCOL_MEMBER);
registry.register_lint(&CALL_NON_CALLABLE);
registry.register_lint(&POSSIBLY_UNBOUND_IMPLICIT_CALL);
registry.register_lint(&CONFLICTING_ARGUMENT_FORMS);
@ -424,7 +429,7 @@ declare_lint! {
declare_lint! {
/// ## What it does
/// Checks for invalidly defined protocol classes.
/// Checks for protocol classes that will raise `TypeError` at runtime.
///
/// ## Why is this bad?
/// An invalidly defined protocol class may lead to the type checker inferring
@ -450,6 +455,41 @@ declare_lint! {
}
}
declare_lint! {
/// ## What it does
/// Checks for protocol classes with members that will lead to ambiguous interfaces.
///
/// ## Why is this bad?
/// Assigning to an undeclared variable in a protocol class leads to an ambiguous
/// interface which may lead to the type checker inferring unexpected things. It's
/// recommended to ensure that all members of a protocol class are explicitly declared.
///
/// ## Examples
///
/// ```py
/// from typing import Protocol
///
/// class BaseProto(Protocol):
/// a: int # fine (explicitly declared as `int`)
/// def method_member(self) -> int: ... # fine: a method definition using `def` is considered a declaration
/// c = "some variable" # error: no explicit declaration, leading to ambiguity
/// b = method_member # error: no explicit declaration, leading to ambiguity
///
/// # error: this creates implicit assignments of `d` and `e` in the protocol class body.
/// # Were they really meant to be considered protocol members?
/// for d, e in enumerate(range(42)):
/// pass
///
/// class SubProto(BaseProto, Protocol):
/// a = 42 # fine (declared in superclass)
/// ```
pub(crate) static AMBIGUOUS_PROTOCOL_MEMBER = {
summary: "detects protocol classes with ambiguous interfaces",
status: LintStatus::preview("1.0.0"),
default_level: Level::Warn,
}
}
declare_lint! {
/// ## What it does
/// Checks for invalidly defined `NamedTuple` classes.
@ -2456,6 +2496,95 @@ pub(crate) fn report_attempted_protocol_instantiation(
diagnostic.sub(class_def_diagnostic);
}
pub(crate) fn report_undeclared_protocol_member(
context: &InferContext,
definition: Definition,
protocol_class: ProtocolClassLiteral,
class_symbol_table: &PlaceTable,
) {
/// We want to avoid suggesting an annotation for e.g. `x = None`,
/// because the user almost certainly doesn't want to write `x: None = None`.
/// We also want to avoid suggesting invalid syntax such as `x: <class 'int'> = int`.
fn should_give_hint<'db>(db: &'db dyn Db, ty: Type<'db>) -> bool {
let class = match ty {
Type::ProtocolInstance(ProtocolInstanceType {
inner: Protocol::FromClass(_),
..
}) => return true,
Type::SubclassOf(subclass_of) => match subclass_of.subclass_of() {
SubclassOfInner::Class(class) => class,
SubclassOfInner::Dynamic(DynamicType::Any) => return true,
SubclassOfInner::Dynamic(_) => return false,
},
Type::NominalInstance(instance) => instance.class(db),
_ => return false,
};
!matches!(
class.known(db),
Some(KnownClass::NoneType | KnownClass::EllipsisType)
)
}
let db = context.db();
let Some(builder) = context.report_lint(
&AMBIGUOUS_PROTOCOL_MEMBER,
definition.full_range(db, context.module()),
) else {
return;
};
let ScopedPlaceId::Symbol(symbol_id) = definition.place(db) else {
return;
};
let symbol_name = class_symbol_table.symbol(symbol_id).name();
let class_name = protocol_class.name(db);
let mut diagnostic = builder
.into_diagnostic("Cannot assign to undeclared variable in the body of a protocol class");
if definition.kind(db).is_unannotated_assignment() {
let binding_type = binding_type(db, definition);
let suggestion = binding_type
.literal_fallback_instance(db)
.unwrap_or(binding_type);
if should_give_hint(db, suggestion) {
diagnostic.set_primary_message(format_args!(
"Consider adding an annotation, e.g. `{symbol_name}: {} = ...`",
suggestion.display(db)
));
} else {
diagnostic.set_primary_message(format_args!(
"Consider adding an annotation for `{symbol_name}`"
));
}
} else {
diagnostic.set_primary_message(format_args!(
"`{symbol_name}` is not declared as a protocol member"
));
}
let mut class_def_diagnostic = SubDiagnostic::new(
SubDiagnosticSeverity::Info,
"Assigning to an undeclared variable in a protocol class \
leads to an ambiguous interface",
);
class_def_diagnostic.annotate(
Annotation::primary(protocol_class.header_span(db))
.message(format_args!("`{class_name}` declared as a protocol here",)),
);
diagnostic.sub(class_def_diagnostic);
diagnostic.info(format_args!(
"No declarations found for `{symbol_name}` \
in the body of `{class_name}` or any of its superclasses"
));
}
pub(crate) fn report_duplicate_bases(
context: &InferContext,
class: ClassLiteral,

View file

@ -1434,6 +1434,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
}
}
}
if let Some(protocol) = class.into_protocol_class(self.db()) {
protocol.validate_members(&self.context, self.index);
}
}
}

View file

@ -7,7 +7,10 @@ use ruff_python_ast::name::Name;
use rustc_hash::FxHashMap;
use super::TypeVarVariance;
use crate::semantic_index::place_table;
use crate::semantic_index::place::ScopedPlaceId;
use crate::semantic_index::{SemanticIndex, place_table};
use crate::types::context::InferContext;
use crate::types::diagnostic::report_undeclared_protocol_member;
use crate::{
Db, FxOrderSet,
place::{Boundness, Place, PlaceAndQualifiers, place_from_bindings, place_from_declarations},
@ -55,6 +58,59 @@ impl<'db> ProtocolClassLiteral<'db> {
self.known_function_decorators(db)
.contains(&KnownFunction::RuntimeCheckable)
}
/// Iterate through the body of the protocol class. Check that all definitions
/// in the protocol class body are either explicitly declared directly in the
/// class body, or are declared in a superclass of the protocol class.
pub(super) fn validate_members(self, context: &InferContext, index: &SemanticIndex<'db>) {
let db = context.db();
let interface = self.interface(db);
let class_place_table = index.place_table(self.body_scope(db).file_scope_id(db));
for (symbol_id, mut bindings_iterator) in
use_def_map(db, self.body_scope(db)).all_end_of_scope_symbol_bindings()
{
let symbol_name = class_place_table.symbol(symbol_id).name();
if !interface.includes_member(db, symbol_name) {
continue;
}
let has_declaration = self
.iter_mro(db, None)
.filter_map(ClassBase::into_class)
.any(|superclass| {
let superclass_scope = superclass.class_literal(db).0.body_scope(db);
let Some(scoped_symbol_id) =
place_table(db, superclass_scope).symbol_id(symbol_name)
else {
return false;
};
!place_from_declarations(
db,
index
.use_def_map(superclass_scope.file_scope_id(db))
.end_of_scope_declarations(ScopedPlaceId::Symbol(scoped_symbol_id)),
)
.into_place_and_conflicting_declarations()
.0
.place
.is_unbound()
});
if has_declaration {
continue;
}
let Some(first_definition) =
bindings_iterator.find_map(|binding| binding.binding.definition())
else {
continue;
};
report_undeclared_protocol_member(context, first_definition, self, class_place_table);
}
}
}
impl<'db> Deref for ProtocolClassLiteral<'db> {
@ -147,6 +203,10 @@ impl<'db> ProtocolInterface<'db> {
})
}
pub(super) fn includes_member(self, db: &'db dyn Db, name: &str) -> bool {
self.inner(db).contains_key(name)
}
pub(super) fn instance_member(self, db: &'db dyn Db, name: &str) -> PlaceAndQualifiers<'db> {
self.member_by_name(db, name)
.map(|member| PlaceAndQualifiers {