[ty] Return a tuple spec from the iterator protocol (#19496)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
[ty Playground] Release / publish (push) Waiting to run

This PR updates our iterator protocol machinery to return a tuple spec
describing the elements that are returned, instead of a type. That
allows us to track heterogeneous iterators more precisely, and
consolidates the logic in unpacking and splatting, which are the two
places where we can take advantage of that more precise information.
(Other iterator consumers, like `for` loops, have to collapse the
iterated elements down to a single type regardless, and we provide a new
helper method on `TupleSpec` to perform that summarization.)
This commit is contained in:
Douglas Creager 2025-07-23 17:11:44 -04:00 committed by GitHub
parent 2a00eca66b
commit e0149cd9f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 204 additions and 125 deletions

View file

@ -259,6 +259,81 @@ def _(args: tuple[int, *tuple[str, ...], int]) -> None:
takes_at_least_two_positional_only(*args) # error: [invalid-argument-type]
```
### String argument
```py
from typing import Literal
def takes_zero() -> None: ...
def takes_one(x: str) -> None: ...
def takes_two(x: str, y: str) -> None: ...
def takes_two_positional_only(x: str, y: str, /) -> None: ...
def takes_two_different(x: int, y: str) -> None: ...
def takes_two_different_positional_only(x: int, y: str, /) -> None: ...
def takes_at_least_zero(*args) -> None: ...
def takes_at_least_one(x: str, *args) -> None: ...
def takes_at_least_two(x: str, y: str, *args) -> None: ...
def takes_at_least_two_positional_only(x: str, y: str, /, *args) -> None: ...
# Test all of the above with a number of different splatted argument types
def _(args: Literal["a"]) -> None:
takes_zero(*args) # error: [too-many-positional-arguments]
takes_one(*args)
takes_two(*args) # error: [missing-argument]
takes_two_positional_only(*args) # error: [missing-argument]
# error: [invalid-argument-type]
# error: [missing-argument]
takes_two_different(*args)
# error: [invalid-argument-type]
# error: [missing-argument]
takes_two_different_positional_only(*args)
takes_at_least_zero(*args)
takes_at_least_one(*args)
takes_at_least_two(*args) # error: [missing-argument]
takes_at_least_two_positional_only(*args) # error: [missing-argument]
def _(args: Literal["ab"]) -> None:
takes_zero(*args) # error: [too-many-positional-arguments]
takes_one(*args) # error: [too-many-positional-arguments]
takes_two(*args)
takes_two_positional_only(*args)
takes_two_different(*args) # error: [invalid-argument-type]
takes_two_different_positional_only(*args) # error: [invalid-argument-type]
takes_at_least_zero(*args)
takes_at_least_one(*args)
takes_at_least_two(*args)
takes_at_least_two_positional_only(*args)
def _(args: Literal["abc"]) -> None:
takes_zero(*args) # error: [too-many-positional-arguments]
takes_one(*args) # error: [too-many-positional-arguments]
takes_two(*args) # error: [too-many-positional-arguments]
takes_two_positional_only(*args) # error: [too-many-positional-arguments]
# error: [invalid-argument-type]
# error: [too-many-positional-arguments]
takes_two_different(*args)
# error: [invalid-argument-type]
# error: [too-many-positional-arguments]
takes_two_different_positional_only(*args)
takes_at_least_zero(*args)
takes_at_least_one(*args)
takes_at_least_two(*args)
takes_at_least_two_positional_only(*args)
def _(args: str) -> None:
takes_zero(*args)
takes_one(*args)
takes_two(*args)
takes_two_positional_only(*args)
takes_two_different(*args) # error: [invalid-argument-type]
takes_two_different_positional_only(*args) # error: [invalid-argument-type]
takes_at_least_zero(*args)
takes_at_least_one(*args)
takes_at_least_two(*args)
takes_at_least_two_positional_only(*args)
```
### Argument expansion regression
This is a regression that was highlighted by the ecosystem check, which shows that we might need to

View file

@ -738,6 +738,13 @@ def _(flag: bool, flag2: bool):
reveal_type(y) # revealed: bytes | str | int
```
## Empty tuple is iterable
```py
for x in ():
reveal_type(x) # revealed: Never
```
## Never is iterable
```py
@ -745,5 +752,5 @@ from typing_extensions import Never
def f(never: Never):
for x in never:
reveal_type(x) # revealed: Never
reveal_type(x) # revealed: Unknown
```

View file

@ -2,6 +2,7 @@ use infer::nearest_enclosing_class;
use itertools::{Either, Itertools};
use ruff_db::parsed::parsed_module;
use std::borrow::Cow;
use std::slice::Iter;
use bitflags::bitflags;
@ -56,7 +57,7 @@ use crate::types::infer::infer_unpack_types;
use crate::types::mro::{Mro, MroError, MroIterator};
pub(crate) use crate::types::narrow::infer_narrowing_constraint;
use crate::types::signatures::{Parameter, ParameterForm, Parameters, walk_signature};
use crate::types::tuple::TupleType;
use crate::types::tuple::{TupleSpec, TupleType};
pub use crate::util::diagnostics::add_inferred_python_version_hint_to_diagnostic;
use crate::{Db, FxOrderSet, Module, Program};
pub(crate) use class::{ClassLiteral, ClassType, GenericAlias, KnownClass};
@ -813,13 +814,6 @@ impl<'db> Type<'db> {
.expect("Expected a Type::EnumLiteral variant")
}
pub(crate) const fn into_tuple(self) -> Option<TupleType<'db>> {
match self {
Type::Tuple(tuple_type) => Some(tuple_type),
_ => None,
}
}
/// Turn a class literal (`Type::ClassLiteral` or `Type::GenericAlias`) into a `ClassType`.
/// Since a `ClassType` must be specialized, apply the default specialization to any
/// unspecialized generic class literal.
@ -4615,35 +4609,50 @@ impl<'db> Type<'db> {
}
}
/// Returns the element type when iterating over `self`.
/// Returns a tuple spec describing the elements that are produced when iterating over `self`.
///
/// This method should only be used outside of type checking because it omits any errors.
/// For type checking, use [`try_iterate`](Self::try_iterate) instead.
fn iterate(self, db: &'db dyn Db) -> Type<'db> {
fn iterate(self, db: &'db dyn Db) -> Cow<'db, TupleSpec<'db>> {
self.try_iterate(db)
.unwrap_or_else(|err| err.fallback_element_type(db))
.unwrap_or_else(|err| Cow::Owned(TupleSpec::homogeneous(err.fallback_element_type(db))))
}
/// Given the type of an object that is iterated over in some way,
/// return the type of objects that are yielded by that iteration.
/// return a tuple spec describing the type of objects that are yielded by that iteration.
///
/// E.g., for the following loop, given the type of `x`, infer the type of `y`:
/// E.g., for the following call, given the type of `x`, infer the types of the values that are
/// splatted into `y`'s positional arguments:
/// ```python
/// for y in x:
/// pass
/// y(*x)
/// ```
fn try_iterate(self, db: &'db dyn Db) -> Result<Type<'db>, IterationError<'db>> {
if let Type::Tuple(tuple_type) = self {
return Ok(UnionType::from_elements(
db,
tuple_type.tuple(db).all_elements(),
));
fn try_iterate(self, db: &'db dyn Db) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
match self {
Type::Tuple(tuple_type) => return Ok(Cow::Borrowed(tuple_type.tuple(db))),
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
return Ok(Cow::Owned(TupleSpec::homogeneous(todo_type!(
"*tuple[] annotations"
))));
}
if let Type::GenericAlias(alias) = self {
if alias.origin(db).is_tuple(db) {
return Ok(todo_type!("*tuple[] annotations"));
Type::StringLiteral(string_literal_ty) => {
// We could go further and deconstruct to an array of `StringLiteral`
// with each individual character, instead of just an array of
// `LiteralString`, but there would be a cost and it's not clear that
// it's worth it.
return Ok(Cow::Owned(TupleSpec::from_elements(std::iter::repeat_n(
Type::LiteralString,
string_literal_ty.python_len(db),
))));
}
Type::Never => {
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
// TODO: Consider removing this special case, and instead hide the indexing
// diagnostic in unreachable code.
return Ok(Cow::Owned(TupleSpec::homogeneous(Type::unknown())));
}
_ => {}
}
let try_call_dunder_getitem = || {
@ -4669,12 +4678,14 @@ impl<'db> Type<'db> {
Ok(iterator) => {
// `__iter__` is definitely bound and calling it succeeds.
// See what calling `__next__` on the object returned by `__iter__` gives us...
try_call_dunder_next_on_iterator(iterator).map_err(|dunder_next_error| {
IterationError::IterReturnsInvalidIterator {
try_call_dunder_next_on_iterator(iterator)
.map(|ty| Cow::Owned(TupleSpec::homogeneous(ty)))
.map_err(
|dunder_next_error| IterationError::IterReturnsInvalidIterator {
iterator,
dunder_next_error,
}
})
},
)
}
// `__iter__` is possibly unbound...
@ -4692,10 +4703,10 @@ impl<'db> Type<'db> {
// and the type returned by the `__getitem__` method.
//
// No diagnostic is emitted; iteration will always succeed!
UnionType::from_elements(
Cow::Owned(TupleSpec::homogeneous(UnionType::from_elements(
db,
[dunder_next_return, dunder_getitem_return_type],
)
)))
})
.map_err(|dunder_getitem_error| {
IterationError::PossiblyUnboundIterAndGetitemError {
@ -4718,13 +4729,13 @@ impl<'db> Type<'db> {
}
// There's no `__iter__` method. Try `__getitem__` instead...
Err(CallDunderError::MethodNotAvailable) => {
try_call_dunder_getitem().map_err(|dunder_getitem_error| {
IterationError::UnboundIterAndGetitemError {
Err(CallDunderError::MethodNotAvailable) => try_call_dunder_getitem()
.map(|ty| Cow::Owned(TupleSpec::homogeneous(ty)))
.map_err(
|dunder_getitem_error| IterationError::UnboundIterAndGetitemError {
dunder_getitem_error,
}
})
}
},
),
}
}

View file

@ -51,12 +51,10 @@ impl<'a, 'db> CallArguments<'a, 'db> {
ast::ArgOrKeyword::Arg(arg) => match arg {
ast::Expr::Starred(ast::ExprStarred { value, .. }) => {
let ty = infer_argument_type(arg, value);
let length = match ty {
Type::Tuple(tuple) => tuple.tuple(db).len(),
// TODO: have `Type::try_iterator` return a tuple spec, and use its
// length as this argument's arity
_ => TupleLength::unknown(),
};
let length = ty
.try_iterate(db)
.map(|tuple| tuple.len())
.unwrap_or(TupleLength::unknown());
(Argument::Variadic(length), Some(ty))
}
_ => (Argument::Positional, None),

View file

@ -977,24 +977,14 @@ impl<'db> Bindings<'db> {
// `tuple(range(42))` => `tuple[int, ...]`
// BUT `tuple((1, 2))` => `tuple[Literal[1], Literal[2]]` rather than `tuple[Literal[1, 2], ...]`
if let [Some(argument)] = overload.parameter_types() {
let overridden_return =
argument.into_tuple().map(Type::Tuple).unwrap_or_else(|| {
// Some awkward special handling is required here because of the fact
// that calling `try_iterate()` on `Never` returns `Never`,
// but `tuple[Never, ...]` eagerly simplifies to `tuple[()]`,
// which will cause us to emit false positives if we index into the tuple.
// Using `tuple[Unknown, ...]` avoids these false positives.
let specialization = if argument.is_never() {
Type::unknown()
} else {
argument.try_iterate(db).expect(
let tuple_spec = argument.try_iterate(db).expect(
"try_iterate() should not fail on a type \
assignable to `Iterable`",
)
};
TupleType::homogeneous(db, specialization)
});
overload.set_return_type(overridden_return);
);
overload.set_return_type(Type::tuple(TupleType::new(
db,
tuple_spec.as_ref(),
)));
}
}
@ -2097,14 +2087,7 @@ impl<'a, 'db> ArgumentTypeChecker<'a, 'db> {
// elements. For tuples, we don't have to do anything! For other types, we treat it as
// an iterator, and create a homogeneous tuple of its output type, since we don't know
// how many elements the iterator will produce.
// TODO: update `Type::try_iterate` to return this tuple type for us.
let argument_types = match argument_type {
Type::Tuple(tuple) => Cow::Borrowed(tuple.tuple(self.db)),
_ => {
let element_type = argument_type.iterate(self.db);
Cow::Owned(Tuple::homogeneous(element_type))
}
};
let argument_types = argument_type.iterate(self.db);
// TODO: When we perform argument expansion during overload resolution, we might need
// to retry both `match_parameters` _and_ `check_types` for each expansion. Currently

View file

@ -618,6 +618,8 @@ impl<'db> ClassType<'db> {
match specialization {
Some(spec) => {
// TODO: Once we support PEP 646 annotations for `*args` parameters, we can
// use the tuple itself as the argument type.
let tuple = spec.tuple(db);
let tuple_len = tuple.len();
@ -2137,7 +2139,8 @@ impl<'db> ClassLiteral<'db> {
index.expression(for_stmt.iterable(&module)),
);
// TODO: Potential diagnostics resulting from the iterable are currently not reported.
let inferred_ty = iterable_ty.iterate(db);
let inferred_ty =
iterable_ty.iterate(db).homogeneous_element_type(db);
union_of_inferred_types = union_of_inferred_types.add(inferred_ty);
}
@ -2195,7 +2198,8 @@ impl<'db> ClassLiteral<'db> {
index.expression(comprehension.iterable(&module)),
);
// TODO: Potential diagnostics resulting from the iterable are currently not reported.
let inferred_ty = iterable_ty.iterate(db);
let inferred_ty =
iterable_ty.iterate(db).homogeneous_element_type(db);
union_of_inferred_types = union_of_inferred_types.add(inferred_ty);
}

View file

@ -194,7 +194,7 @@ impl<'db> GenericContext<'db> {
db: &'db dyn Db,
tuple: TupleType<'db>,
) -> Specialization<'db> {
let element_type = UnionType::from_elements(db, tuple.tuple(db).all_elements());
let element_type = tuple.tuple(db).homogeneous_element_type(db);
Specialization::new(db, self, Box::from([element_type]), Some(tuple))
}

View file

@ -4533,6 +4533,7 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
builder
.infer_standalone_expression(iter_expr)
.iterate(builder.db())
.homogeneous_element_type(builder.db())
});
self.infer_body(body);
@ -4562,7 +4563,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
}
TargetKind::Single => {
let iterable_type = self.infer_standalone_expression(iterable);
iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
iterable_type
.try_iterate(self.db())
.map(|tuple| tuple.homogeneous_element_type(self.db()))
.unwrap_or_else(|err| {
err.report_diagnostic(&self.context, iterable_type, iterable.into());
err.fallback_element_type(self.db())
})
@ -5671,6 +5675,7 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
builder.infer_standalone_expression(iter_expr)
}
.iterate(builder.db())
.homogeneous_element_type(builder.db())
});
for expr in ifs {
self.infer_standalone_expression(expr);
@ -5719,7 +5724,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
}
TargetKind::Single => {
let iterable_type = infer_iterable_type();
iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
iterable_type
.try_iterate(self.db())
.map(|tuple| tuple.homogeneous_element_type(self.db()))
.unwrap_or_else(|err| {
err.report_diagnostic(&self.context, iterable_type, iterable.into());
err.fallback_element_type(self.db())
})
@ -6062,7 +6070,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
} = starred;
let iterable_type = self.infer_expression(value);
iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
iterable_type
.try_iterate(self.db())
.map(|tuple| tuple.homogeneous_element_type(self.db()))
.unwrap_or_else(|err| {
err.report_diagnostic(&self.context, iterable_type, value.as_ref().into());
err.fallback_element_type(self.db())
});
@ -6089,7 +6100,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
} = yield_from;
let iterable_type = self.infer_expression(value);
iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
iterable_type
.try_iterate(self.db())
.map(|tuple| tuple.homogeneous_element_type(self.db()))
.unwrap_or_else(|err| {
err.report_diagnostic(&self.context, iterable_type, value.as_ref().into());
err.fallback_element_type(self.db())
});

View file

@ -1014,6 +1014,10 @@ impl<T> Tuple<T> {
}
impl<'db> Tuple<Type<'db>> {
pub(crate) fn homogeneous_element_type(&self, db: &'db dyn Db) -> Type<'db> {
UnionType::from_elements(db, self.all_elements())
}
/// Concatenates another tuple to the end of this tuple, returning a new tuple.
pub(crate) fn concat(&self, db: &'db dyn Db, other: &Self) -> Self {
match self {

View file

@ -8,7 +8,7 @@ use ruff_python_ast::{self as ast, AnyNodeRef};
use crate::Db;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::place::ScopeId;
use crate::types::tuple::{ResizeTupleError, Tuple, TupleLength, TupleUnpacker};
use crate::types::tuple::{ResizeTupleError, Tuple, TupleLength, TupleSpec, TupleUnpacker};
use crate::types::{Type, TypeCheckDiagnostics, infer_expression_types};
use crate::unpack::{UnpackKind, UnpackValue};
@ -64,7 +64,10 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
value_type
}
}
UnpackKind::Iterable => value_type.try_iterate(self.db()).unwrap_or_else(|err| {
UnpackKind::Iterable => value_type
.try_iterate(self.db())
.map(|tuple| tuple.homogeneous_element_type(self.db()))
.unwrap_or_else(|err| {
err.report_diagnostic(
&self.context,
value_type,
@ -118,30 +121,10 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
};
for ty in unpack_types.iter().copied() {
let tuple = match ty {
Type::Tuple(tuple_ty) => Cow::Borrowed(tuple_ty.tuple(self.db())),
Type::StringLiteral(string_literal_ty) => {
// We could go further and deconstruct to an array of `StringLiteral`
// with each individual character, instead of just an array of
// `LiteralString`, but there would be a cost and it's not clear that
// it's worth it.
Cow::Owned(Tuple::from_elements(std::iter::repeat_n(
Type::LiteralString,
string_literal_ty.python_len(self.db()),
)))
}
Type::LiteralString => Cow::Owned(Tuple::homogeneous(Type::LiteralString)),
_ => {
// TODO: Update our iterator protocol machinery to return a tuple
// describing the returned values in more detail, when we can.
Cow::Owned(Tuple::homogeneous(
ty.try_iterate(self.db()).unwrap_or_else(|err| {
let tuple = ty.try_iterate(self.db()).unwrap_or_else(|err| {
err.report_diagnostic(&self.context, ty, value_expr);
err.fallback_element_type(self.db())
}),
))
}
};
Cow::Owned(TupleSpec::homogeneous(err.fallback_element_type(self.db())))
});
if let Err(err) = unpacker.unpack_tuple(tuple.as_ref()) {
unpacker