[ty] Return a tuple spec from the iterator protocol (#19496)

This PR updates our iterator protocol machinery to return a tuple spec describing the elements that are returned, instead of a type. That allows us to track heterogeneous iterators more precisely, and consolidates the logic in unpacking and splatting, which are the two places where we can take advantage of that more precise information. (Other iterator consumers, like `for` loops, have to collapse the iterated elements down to a single type regardless, and we provide a new helper method on `TupleSpec` to perform that summarization.)
2025-11-25 06:13:01 +00:00 · 2025-07-23 17:11:44 -04:00 · 2025-07-23 17:11:44 -04:00 · e0149cd9f3
commit e0149cd9f3
parent 2a00eca66b
10 changed files with 204 additions and 125 deletions
--- a/crates/ty_python_semantic/resources/mdtest/call/function.md
+++ b/crates/ty_python_semantic/resources/mdtest/call/function.md
@ -259,6 +259,81 @@ def _(args: tuple[int, *tuple[str, ...], int]) -> None:
    takes_at_least_two_positional_only(*args)  # error: [invalid-argument-type]
 ```

+### String argument
+
+```py
+from typing import Literal
+
+def takes_zero() -> None: ...
+def takes_one(x: str) -> None: ...
+def takes_two(x: str, y: str) -> None: ...
+def takes_two_positional_only(x: str, y: str, /) -> None: ...
+def takes_two_different(x: int, y: str) -> None: ...
+def takes_two_different_positional_only(x: int, y: str, /) -> None: ...
+def takes_at_least_zero(*args) -> None: ...
+def takes_at_least_one(x: str, *args) -> None: ...
+def takes_at_least_two(x: str, y: str, *args) -> None: ...
+def takes_at_least_two_positional_only(x: str, y: str, /, *args) -> None: ...
+
+# Test all of the above with a number of different splatted argument types
+
+def _(args: Literal["a"]) -> None:
+    takes_zero(*args)  # error: [too-many-positional-arguments]
+    takes_one(*args)
+    takes_two(*args)  # error: [missing-argument]
+    takes_two_positional_only(*args)  # error: [missing-argument]
+    # error: [invalid-argument-type]
+    # error: [missing-argument]
+    takes_two_different(*args)
+    # error: [invalid-argument-type]
+    # error: [missing-argument]
+    takes_two_different_positional_only(*args)
+    takes_at_least_zero(*args)
+    takes_at_least_one(*args)
+    takes_at_least_two(*args)  # error: [missing-argument]
+    takes_at_least_two_positional_only(*args)  # error: [missing-argument]
+
+def _(args: Literal["ab"]) -> None:
+    takes_zero(*args)  # error: [too-many-positional-arguments]
+    takes_one(*args)  # error: [too-many-positional-arguments]
+    takes_two(*args)
+    takes_two_positional_only(*args)
+    takes_two_different(*args)  # error: [invalid-argument-type]
+    takes_two_different_positional_only(*args)  # error: [invalid-argument-type]
+    takes_at_least_zero(*args)
+    takes_at_least_one(*args)
+    takes_at_least_two(*args)
+    takes_at_least_two_positional_only(*args)
+
+def _(args: Literal["abc"]) -> None:
+    takes_zero(*args)  # error: [too-many-positional-arguments]
+    takes_one(*args)  # error: [too-many-positional-arguments]
+    takes_two(*args)  # error: [too-many-positional-arguments]
+    takes_two_positional_only(*args)  # error: [too-many-positional-arguments]
+    # error: [invalid-argument-type]
+    # error: [too-many-positional-arguments]
+    takes_two_different(*args)
+    # error: [invalid-argument-type]
+    # error: [too-many-positional-arguments]
+    takes_two_different_positional_only(*args)
+    takes_at_least_zero(*args)
+    takes_at_least_one(*args)
+    takes_at_least_two(*args)
+    takes_at_least_two_positional_only(*args)
+
+def _(args: str) -> None:
+    takes_zero(*args)
+    takes_one(*args)
+    takes_two(*args)
+    takes_two_positional_only(*args)
+    takes_two_different(*args)  # error: [invalid-argument-type]
+    takes_two_different_positional_only(*args)  # error: [invalid-argument-type]
+    takes_at_least_zero(*args)
+    takes_at_least_one(*args)
+    takes_at_least_two(*args)
+    takes_at_least_two_positional_only(*args)
+```
+
 ### Argument expansion regression

 This is a regression that was highlighted by the ecosystem check, which shows that we might need to
--- a/crates/ty_python_semantic/resources/mdtest/loops/for.md
+++ b/crates/ty_python_semantic/resources/mdtest/loops/for.md
@ -738,6 +738,13 @@ def _(flag: bool, flag2: bool):
        reveal_type(y)  # revealed: bytes | str | int
 ```

+## Empty tuple is iterable
+
+```py
+for x in ():
+    reveal_type(x)  # revealed: Never
+```
+
 ## Never is iterable

 ```py
@ -745,5 +752,5 @@ from typing_extensions import Never

 def f(never: Never):
    for x in never:
-        reveal_type(x)  # revealed: Never
+        reveal_type(x)  # revealed: Unknown
 ```
--- a/crates/ty_python_semantic/src/types.rs
+++ b/crates/ty_python_semantic/src/types.rs
@ -2,6 +2,7 @@ use infer::nearest_enclosing_class;
 use itertools::{Either, Itertools};
 use ruff_db::parsed::parsed_module;

+use std::borrow::Cow;
 use std::slice::Iter;

 use bitflags::bitflags;
@ -56,7 +57,7 @@ use crate::types::infer::infer_unpack_types;
 use crate::types::mro::{Mro, MroError, MroIterator};
 pub(crate) use crate::types::narrow::infer_narrowing_constraint;
 use crate::types::signatures::{Parameter, ParameterForm, Parameters, walk_signature};
-use crate::types::tuple::TupleType;
+use crate::types::tuple::{TupleSpec, TupleType};
 pub use crate::util::diagnostics::add_inferred_python_version_hint_to_diagnostic;
 use crate::{Db, FxOrderSet, Module, Program};
 pub(crate) use class::{ClassLiteral, ClassType, GenericAlias, KnownClass};
@ -813,13 +814,6 @@ impl<'db> Type<'db> {
            .expect("Expected a Type::EnumLiteral variant")
    }

-    pub(crate) const fn into_tuple(self) -> Option<TupleType<'db>> {
-        match self {
-            Type::Tuple(tuple_type) => Some(tuple_type),
-            _ => None,
-        }
-    }
-
    /// Turn a class literal (`Type::ClassLiteral` or `Type::GenericAlias`) into a `ClassType`.
    /// Since a `ClassType` must be specialized, apply the default specialization to any
    /// unspecialized generic class literal.
@ -4615,35 +4609,50 @@ impl<'db> Type<'db> {
        }
    }

-    /// Returns the element type when iterating over `self`.
+    /// Returns a tuple spec describing the elements that are produced when iterating over `self`.
    ///
    /// This method should only be used outside of type checking because it omits any errors.
    /// For type checking, use [`try_iterate`](Self::try_iterate) instead.
-    fn iterate(self, db: &'db dyn Db) -> Type<'db> {
+    fn iterate(self, db: &'db dyn Db) -> Cow<'db, TupleSpec<'db>> {
        self.try_iterate(db)
-            .unwrap_or_else(|err| err.fallback_element_type(db))
+            .unwrap_or_else(|err| Cow::Owned(TupleSpec::homogeneous(err.fallback_element_type(db))))
    }

    /// Given the type of an object that is iterated over in some way,
-    /// return the type of objects that are yielded by that iteration.
+    /// return a tuple spec describing the type of objects that are yielded by that iteration.
    ///
-    /// E.g., for the following loop, given the type of `x`, infer the type of `y`:
+    /// E.g., for the following call, given the type of `x`, infer the types of the values that are
+    /// splatted into `y`'s positional arguments:
    /// ```python
-    /// for y in x:
-    ///     pass
+    /// y(*x)
    /// ```
-    fn try_iterate(self, db: &'db dyn Db) -> Result<Type<'db>, IterationError<'db>> {
-        if let Type::Tuple(tuple_type) = self {
-            return Ok(UnionType::from_elements(
-                db,
-                tuple_type.tuple(db).all_elements(),
-            ));
+    fn try_iterate(self, db: &'db dyn Db) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
+        match self {
+            Type::Tuple(tuple_type) => return Ok(Cow::Borrowed(tuple_type.tuple(db))),
+            Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
+                return Ok(Cow::Owned(TupleSpec::homogeneous(todo_type!(
+                    "*tuple[] annotations"
+                ))));
            }
-
-        if let Type::GenericAlias(alias) = self {
-            if alias.origin(db).is_tuple(db) {
-                return Ok(todo_type!("*tuple[] annotations"));
+            Type::StringLiteral(string_literal_ty) => {
+                // We could go further and deconstruct to an array of `StringLiteral`
+                // with each individual character, instead of just an array of
+                // `LiteralString`, but there would be a cost and it's not clear that
+                // it's worth it.
+                return Ok(Cow::Owned(TupleSpec::from_elements(std::iter::repeat_n(
+                    Type::LiteralString,
+                    string_literal_ty.python_len(db),
+                ))));
            }
+            Type::Never => {
+                // The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
+                // simplifies to `tuple[()]`. That will will cause us to emit false positives if we
+                // index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
+                // TODO: Consider removing this special case, and instead hide the indexing
+                // diagnostic in unreachable code.
+                return Ok(Cow::Owned(TupleSpec::homogeneous(Type::unknown())));
+            }
+            _ => {}
        }

        let try_call_dunder_getitem = || {
@ -4669,12 +4678,14 @@ impl<'db> Type<'db> {
            Ok(iterator) => {
                // `__iter__` is definitely bound and calling it succeeds.
                // See what calling `__next__` on the object returned by `__iter__` gives us...
-                try_call_dunder_next_on_iterator(iterator).map_err(|dunder_next_error| {
-                    IterationError::IterReturnsInvalidIterator {
+                try_call_dunder_next_on_iterator(iterator)
+                    .map(|ty| Cow::Owned(TupleSpec::homogeneous(ty)))
+                    .map_err(
+                        |dunder_next_error| IterationError::IterReturnsInvalidIterator {
                            iterator,
                            dunder_next_error,
-                    }
-                })
+                        },
+                    )
            }

            // `__iter__` is possibly unbound...
@ -4692,10 +4703,10 @@ impl<'db> Type<'db> {
                                // and the type returned by the `__getitem__` method.
                                //
                                // No diagnostic is emitted; iteration will always succeed!
-                                UnionType::from_elements(
+                                Cow::Owned(TupleSpec::homogeneous(UnionType::from_elements(
                                    db,
                                    [dunder_next_return, dunder_getitem_return_type],
-                                )
+                                )))
                            })
                            .map_err(|dunder_getitem_error| {
                                IterationError::PossiblyUnboundIterAndGetitemError {
@ -4718,13 +4729,13 @@ impl<'db> Type<'db> {
            }

            // There's no `__iter__` method. Try `__getitem__` instead...
-            Err(CallDunderError::MethodNotAvailable) => {
-                try_call_dunder_getitem().map_err(|dunder_getitem_error| {
-                    IterationError::UnboundIterAndGetitemError {
+            Err(CallDunderError::MethodNotAvailable) => try_call_dunder_getitem()
+                .map(|ty| Cow::Owned(TupleSpec::homogeneous(ty)))
+                .map_err(
+                    |dunder_getitem_error| IterationError::UnboundIterAndGetitemError {
                        dunder_getitem_error,
-                    }
-                })
-            }
+                    },
+                ),
        }
    }

--- a/crates/ty_python_semantic/src/types/call/arguments.rs
+++ b/crates/ty_python_semantic/src/types/call/arguments.rs
@ -51,12 +51,10 @@ impl<'a, 'db> CallArguments<'a, 'db> {
                ast::ArgOrKeyword::Arg(arg) => match arg {
                    ast::Expr::Starred(ast::ExprStarred { value, .. }) => {
                        let ty = infer_argument_type(arg, value);
-                        let length = match ty {
-                            Type::Tuple(tuple) => tuple.tuple(db).len(),
-                            // TODO: have `Type::try_iterator` return a tuple spec, and use its
-                            // length as this argument's arity
-                            _ => TupleLength::unknown(),
-                        };
+                        let length = ty
+                            .try_iterate(db)
+                            .map(|tuple| tuple.len())
+                            .unwrap_or(TupleLength::unknown());
                        (Argument::Variadic(length), Some(ty))
                    }
                    _ => (Argument::Positional, None),
--- a/crates/ty_python_semantic/src/types/call/bind.rs
+++ b/crates/ty_python_semantic/src/types/call/bind.rs
@ -977,24 +977,14 @@ impl<'db> Bindings<'db> {
                            // `tuple(range(42))` => `tuple[int, ...]`
                            // BUT `tuple((1, 2))` => `tuple[Literal[1], Literal[2]]` rather than `tuple[Literal[1, 2], ...]`
                            if let [Some(argument)] = overload.parameter_types() {
-                                let overridden_return =
-                                    argument.into_tuple().map(Type::Tuple).unwrap_or_else(|| {
-                                        // Some awkward special handling is required here because of the fact
-                                        // that calling `try_iterate()` on `Never` returns `Never`,
-                                        // but `tuple[Never, ...]` eagerly simplifies to `tuple[()]`,
-                                        // which will cause us to emit false positives if we index into the tuple.
-                                        // Using `tuple[Unknown, ...]` avoids these false positives.
-                                        let specialization = if argument.is_never() {
-                                            Type::unknown()
-                                        } else {
-                                            argument.try_iterate(db).expect(
+                                let tuple_spec = argument.try_iterate(db).expect(
                                    "try_iterate() should not fail on a type \
                                        assignable to `Iterable`",
-                                            )
-                                        };
-                                        TupleType::homogeneous(db, specialization)
-                                    });
-                                overload.set_return_type(overridden_return);
+                                );
+                                overload.set_return_type(Type::tuple(TupleType::new(
+                                    db,
+                                    tuple_spec.as_ref(),
+                                )));
                            }
                        }

@ -2097,14 +2087,7 @@ impl<'a, 'db> ArgumentTypeChecker<'a, 'db> {
            // elements. For tuples, we don't have to do anything! For other types, we treat it as
            // an iterator, and create a homogeneous tuple of its output type, since we don't know
            // how many elements the iterator will produce.
-            // TODO: update `Type::try_iterate` to return this tuple type for us.
-            let argument_types = match argument_type {
-                Type::Tuple(tuple) => Cow::Borrowed(tuple.tuple(self.db)),
-                _ => {
-                    let element_type = argument_type.iterate(self.db);
-                    Cow::Owned(Tuple::homogeneous(element_type))
-                }
-            };
+            let argument_types = argument_type.iterate(self.db);

            // TODO: When we perform argument expansion during overload resolution, we might need
            // to retry both `match_parameters` _and_ `check_types` for each expansion. Currently
--- a/crates/ty_python_semantic/src/types/class.rs
+++ b/crates/ty_python_semantic/src/types/class.rs
@ -618,6 +618,8 @@ impl<'db> ClassType<'db> {

                match specialization {
                    Some(spec) => {
+                        // TODO: Once we support PEP 646 annotations for `*args` parameters, we can
+                        // use the tuple itself as the argument type.
                        let tuple = spec.tuple(db);
                        let tuple_len = tuple.len();

@ -2137,7 +2139,8 @@ impl<'db> ClassLiteral<'db> {
                                    index.expression(for_stmt.iterable(&module)),
                                );
                                // TODO: Potential diagnostics resulting from the iterable are currently not reported.
-                                let inferred_ty = iterable_ty.iterate(db);
+                                let inferred_ty =
+                                    iterable_ty.iterate(db).homogeneous_element_type(db);

                                union_of_inferred_types = union_of_inferred_types.add(inferred_ty);
                            }
@ -2195,7 +2198,8 @@ impl<'db> ClassLiteral<'db> {
                                    index.expression(comprehension.iterable(&module)),
                                );
                                // TODO: Potential diagnostics resulting from the iterable are currently not reported.
-                                let inferred_ty = iterable_ty.iterate(db);
+                                let inferred_ty =
+                                    iterable_ty.iterate(db).homogeneous_element_type(db);

                                union_of_inferred_types = union_of_inferred_types.add(inferred_ty);
                            }
--- a/crates/ty_python_semantic/src/types/generics.rs
+++ b/crates/ty_python_semantic/src/types/generics.rs
@ -194,7 +194,7 @@ impl<'db> GenericContext<'db> {
        db: &'db dyn Db,
        tuple: TupleType<'db>,
    ) -> Specialization<'db> {
-        let element_type = UnionType::from_elements(db, tuple.tuple(db).all_elements());
+        let element_type = tuple.tuple(db).homogeneous_element_type(db);
        Specialization::new(db, self, Box::from([element_type]), Some(tuple))
    }

--- a/crates/ty_python_semantic/src/types/infer.rs
+++ b/crates/ty_python_semantic/src/types/infer.rs
@ -4533,6 +4533,7 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
            builder
                .infer_standalone_expression(iter_expr)
                .iterate(builder.db())
+                .homogeneous_element_type(builder.db())
        });

        self.infer_body(body);
@ -4562,7 +4563,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
                }
                TargetKind::Single => {
                    let iterable_type = self.infer_standalone_expression(iterable);
-                    iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
+                    iterable_type
+                        .try_iterate(self.db())
+                        .map(|tuple| tuple.homogeneous_element_type(self.db()))
+                        .unwrap_or_else(|err| {
                            err.report_diagnostic(&self.context, iterable_type, iterable.into());
                            err.fallback_element_type(self.db())
                        })
@ -5671,6 +5675,7 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
                builder.infer_standalone_expression(iter_expr)
            }
            .iterate(builder.db())
+            .homogeneous_element_type(builder.db())
        });
        for expr in ifs {
            self.infer_standalone_expression(expr);
@ -5719,7 +5724,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
                }
                TargetKind::Single => {
                    let iterable_type = infer_iterable_type();
-                    iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
+                    iterable_type
+                        .try_iterate(self.db())
+                        .map(|tuple| tuple.homogeneous_element_type(self.db()))
+                        .unwrap_or_else(|err| {
                            err.report_diagnostic(&self.context, iterable_type, iterable.into());
                            err.fallback_element_type(self.db())
                        })
@ -6062,7 +6070,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
        } = starred;

        let iterable_type = self.infer_expression(value);
-        iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
+        iterable_type
+            .try_iterate(self.db())
+            .map(|tuple| tuple.homogeneous_element_type(self.db()))
+            .unwrap_or_else(|err| {
                err.report_diagnostic(&self.context, iterable_type, value.as_ref().into());
                err.fallback_element_type(self.db())
            });
@ -6089,7 +6100,10 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
        } = yield_from;

        let iterable_type = self.infer_expression(value);
-        iterable_type.try_iterate(self.db()).unwrap_or_else(|err| {
+        iterable_type
+            .try_iterate(self.db())
+            .map(|tuple| tuple.homogeneous_element_type(self.db()))
+            .unwrap_or_else(|err| {
                err.report_diagnostic(&self.context, iterable_type, value.as_ref().into());
                err.fallback_element_type(self.db())
            });
--- a/crates/ty_python_semantic/src/types/tuple.rs
+++ b/crates/ty_python_semantic/src/types/tuple.rs
@ -1014,6 +1014,10 @@ impl<T> Tuple<T> {
 }

 impl<'db> Tuple<Type<'db>> {
+    pub(crate) fn homogeneous_element_type(&self, db: &'db dyn Db) -> Type<'db> {
+        UnionType::from_elements(db, self.all_elements())
+    }
+
    /// Concatenates another tuple to the end of this tuple, returning a new tuple.
    pub(crate) fn concat(&self, db: &'db dyn Db, other: &Self) -> Self {
        match self {
--- a/crates/ty_python_semantic/src/types/unpacker.rs
+++ b/crates/ty_python_semantic/src/types/unpacker.rs
@ -8,7 +8,7 @@ use ruff_python_ast::{self as ast, AnyNodeRef};
 use crate::Db;
 use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
 use crate::semantic_index::place::ScopeId;
-use crate::types::tuple::{ResizeTupleError, Tuple, TupleLength, TupleUnpacker};
+use crate::types::tuple::{ResizeTupleError, Tuple, TupleLength, TupleSpec, TupleUnpacker};
 use crate::types::{Type, TypeCheckDiagnostics, infer_expression_types};
 use crate::unpack::{UnpackKind, UnpackValue};

@ -64,7 +64,10 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
                    value_type
                }
            }
-            UnpackKind::Iterable => value_type.try_iterate(self.db()).unwrap_or_else(|err| {
+            UnpackKind::Iterable => value_type
+                .try_iterate(self.db())
+                .map(|tuple| tuple.homogeneous_element_type(self.db()))
+                .unwrap_or_else(|err| {
                    err.report_diagnostic(
                        &self.context,
                        value_type,
@ -118,30 +121,10 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
                };

                for ty in unpack_types.iter().copied() {
-                    let tuple = match ty {
-                        Type::Tuple(tuple_ty) => Cow::Borrowed(tuple_ty.tuple(self.db())),
-                        Type::StringLiteral(string_literal_ty) => {
-                            // We could go further and deconstruct to an array of `StringLiteral`
-                            // with each individual character, instead of just an array of
-                            // `LiteralString`, but there would be a cost and it's not clear that
-                            // it's worth it.
-                            Cow::Owned(Tuple::from_elements(std::iter::repeat_n(
-                                Type::LiteralString,
-                                string_literal_ty.python_len(self.db()),
-                            )))
-                        }
-                        Type::LiteralString => Cow::Owned(Tuple::homogeneous(Type::LiteralString)),
-                        _ => {
-                            // TODO: Update our iterator protocol machinery to return a tuple
-                            // describing the returned values in more detail, when we can.
-                            Cow::Owned(Tuple::homogeneous(
-                                ty.try_iterate(self.db()).unwrap_or_else(|err| {
+                    let tuple = ty.try_iterate(self.db()).unwrap_or_else(|err| {
                        err.report_diagnostic(&self.context, ty, value_expr);
-                                    err.fallback_element_type(self.db())
-                                }),
-                            ))
-                        }
-                    };
+                        Cow::Owned(TupleSpec::homogeneous(err.fallback_element_type(self.db())))
+                    });

                    if let Err(err) = unpacker.unpack_tuple(tuple.as_ref()) {
                        unpacker