[ty] Heterogeneous unpacking support for unions (#20377)

This commit is contained in:
Alex Waygood 2025-10-15 19:30:03 +01:00 committed by GitHub
parent 9de34e7ac1
commit fd568f0221
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 250 additions and 103 deletions

View file

@ -694,6 +694,51 @@ def _(
f1(*args10) # error: [invalid-argument-type]
```
A union of heterogeneous tuples provided to a variadic parameter:
```py
# Test inspired by ecosystem code at:
# - <https://github.com/home-assistant/core/blob/bde4eb50111a72f9717fe73ee5929e50eb06911b/homeassistant/components/lovelace/websocket.py#L50-L59>
# - <https://github.com/pydata/xarray/blob/3572f4e70f2b12ef9935c1f8c3c1b74045d2a092/xarray/tests/test_groupby.py#L3058-L3059>
def f2(a: str, b: bool): ...
def f3(coinflip: bool):
if coinflip:
args = "foo", True
else:
args = "bar", False
# revealed: tuple[Literal["foo"], Literal[True]] | tuple[Literal["bar"], Literal[False]]
reveal_type(args)
f2(*args) # fine
if coinflip:
other_args = "foo", True
else:
other_args = "bar", (True,)
# revealed: tuple[Literal["foo"], Literal[True]] | tuple[Literal["bar"], tuple[Literal[True]]]
reveal_type(other_args)
# error: [invalid-argument-type] "Argument to function `f2` is incorrect: Expected `bool`, found `Literal[True] | tuple[Literal[True]]`"
f2(*other_args)
def f4(a=None, b=None, c=None, d=None, e=None): ...
my_args = ((1, 2), (3, 4), (5, 6))
for tup in my_args:
f4(*tup, e=None) # fine
my_other_args = (
(1, 2, 3, 4, 5),
(6, 7, 8, 9, 10),
)
for tup in my_other_args:
# error: [parameter-already-assigned] "Multiple values provided for parameter `e` of function `f4`"
f4(*tup, e=None)
```
### Mixed argument and parameter containing variadic
```toml

View file

@ -260,8 +260,22 @@ def g(
reveal_type(x) # revealed: int | str
for y in b:
reveal_type(y) # revealed: str | int
for z in c:
reveal_type(z) # revealed: LiteralString | int
```
## Union type as iterable where some elements in the union have precise tuple specs
If all elements in a union can be iterated over, we "union together" their "tuple specs" and are
able to infer the iterable element precisely when iterating over the union, in the same way that we
infer a precise type for the iterable element when iterating over a `Literal` string or bytes type:
```py
from typing import Literal
def f(x: Literal["foo", b"bar"], y: Literal["foo"] | range):
for item in x:
reveal_type(item) # revealed: Literal["f", "o", 98, 97, 114]
for item in y:
reveal_type(item) # revealed: Literal["f", "o"] | int
```
## Union type as iterable where one union element has no `__iter__` method

View file

@ -68,6 +68,10 @@ reveal_type((1,).__class__()) # revealed: tuple[Literal[1]]
# error: [missing-argument] "No argument provided for required parameter `iterable`"
reveal_type((1, 2).__class__()) # revealed: tuple[Literal[1], Literal[2]]
def g(x: tuple[int, str] | tuple[bytes, bool], y: tuple[int, str] | tuple[bytes, bool, bytes]):
reveal_type(tuple(x)) # revealed: tuple[int, str] | tuple[bytes, bool]
reveal_type(tuple(y)) # revealed: tuple[int, str] | tuple[bytes, bool, bytes]
```
## Instantiating tuple subclasses

View file

@ -60,7 +60,7 @@ use crate::types::infer::infer_unpack_types;
use crate::types::mro::{Mro, MroError, MroIterator};
pub(crate) use crate::types::narrow::infer_narrowing_constraint;
use crate::types::signatures::{ParameterForm, walk_signature};
use crate::types::tuple::TupleSpec;
use crate::types::tuple::{TupleSpec, TupleSpecBuilder};
pub(crate) use crate::types::typed_dict::{TypedDictParams, TypedDictType, walk_typed_dict_type};
use crate::types::variance::{TypeVarVariance, VarianceInferable};
use crate::types::visitor::any_over_type;
@ -5534,11 +5534,117 @@ impl<'db> Type<'db> {
db: &'db dyn Db,
mode: EvaluationMode,
) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
const MAX_TUPLE_LENGTH: usize = 128;
fn non_async_special_case<'db>(
db: &'db dyn Db,
ty: Type<'db>,
) -> Option<Cow<'db, TupleSpec<'db>>> {
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
const MAX_TUPLE_LENGTH: usize = 128;
match ty {
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
"*tuple[] annotations"
))))
}
Type::StringLiteral(string_literal_ty) => {
let string_literal = string_literal_ty.value(db);
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
string_literal
.chars()
.map(|c| Type::string_literal(db, &c.to_string())),
)
} else {
TupleSpec::homogeneous(Type::LiteralString)
};
Some(Cow::Owned(spec))
}
Type::BytesLiteral(bytes) => {
let bytes_literal = bytes.value(db);
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
bytes_literal
.iter()
.map(|b| Type::IntLiteral(i64::from(*b))),
)
} else {
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
};
Some(Cow::Owned(spec))
}
Type::Never => {
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
// TODO: Consider removing this special case, and instead hide the indexing
// diagnostic in unreachable code.
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
}
Type::TypeAlias(alias) => {
non_async_special_case(db, alias.value_type(db))
}
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db)? {
TypeVarBoundOrConstraints::UpperBound(bound) => {
non_async_special_case(db, bound)
}
TypeVarBoundOrConstraints::Constraints(union) => non_async_special_case(db, Type::Union(union)),
},
Type::TypeVar(_) => unreachable!(
"should not be able to iterate over type variable {} in inferable position",
ty.display(db)
),
Type::Union(union) => {
let elements = union.elements(db);
if elements.len() < MAX_TUPLE_LENGTH {
let mut elements_iter = elements.iter();
let first_element_spec = elements_iter.next()?.try_iterate_with_mode(db, EvaluationMode::Sync).ok()?;
let mut builder = TupleSpecBuilder::from(&*first_element_spec);
for element in elements_iter {
builder = builder.union(db, &*element.try_iterate_with_mode(db, EvaluationMode::Sync).ok()?);
}
Some(Cow::Owned(builder.build()))
} else {
None
}
}
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(ty))),
Type::FunctionLiteral(_)
| Type::GenericAlias(_)
| Type::BoundMethod(_)
| Type::KnownBoundMethod(_)
| Type::WrapperDescriptor(_)
| Type::DataclassDecorator(_)
| Type::DataclassTransformer(_)
| Type::Callable(_)
| Type::ModuleLiteral(_)
// We could infer a precise tuple spec for enum classes with members,
// but it's not clear whether that's worth the added complexity:
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
// (enums can have `EnumMeta` subclasses as their metaclasses).
| Type::ClassLiteral(_)
| Type::SubclassOf(_)
| Type::ProtocolInstance(_)
| Type::SpecialForm(_)
| Type::KnownInstance(_)
| Type::PropertyInstance(_)
| Type::Intersection(_)
| Type::AlwaysTruthy
| Type::AlwaysFalsy
| Type::IntLiteral(_)
| Type::BooleanLiteral(_)
| Type::EnumLiteral(_)
| Type::BoundSuper(_)
| Type::TypeIs(_)
| Type::TypedDict(_) => None
}
}
if mode.is_async() {
let try_call_dunder_anext_on_iterator = |iterator: Type<'db>| -> Result<
@ -5605,97 +5711,7 @@ impl<'db> Type<'db> {
};
}
let special_case = match self {
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
"*tuple[] annotations"
))))
}
Type::StringLiteral(string_literal_ty) => {
let string_literal = string_literal_ty.value(db);
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
string_literal
.chars()
.map(|c| Type::string_literal(db, &c.to_string())),
)
} else {
TupleSpec::homogeneous(Type::LiteralString)
};
Some(Cow::Owned(spec))
}
Type::BytesLiteral(bytes) => {
let bytes_literal = bytes.value(db);
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
bytes_literal
.iter()
.map(|b| Type::IntLiteral(i64::from(*b))),
)
} else {
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
};
Some(Cow::Owned(spec))
}
Type::Never => {
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
// TODO: Consider removing this special case, and instead hide the indexing
// diagnostic in unreachable code.
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
}
Type::TypeAlias(alias) => {
Some(alias.value_type(db).try_iterate_with_mode(db, mode)?)
}
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db) {
Some(TypeVarBoundOrConstraints::UpperBound(bound)) => {
Some(bound.try_iterate_with_mode(db, mode)?)
}
// TODO: could we create a "union of tuple specs"...?
// (Same question applies to the `Type::Union()` branch lower down)
Some(TypeVarBoundOrConstraints::Constraints(_)) | None => None
},
Type::TypeVar(_) => unreachable!(
"should not be able to iterate over type variable {} in inferable position",
self.display(db)
),
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(self))),
Type::FunctionLiteral(_)
| Type::GenericAlias(_)
| Type::BoundMethod(_)
| Type::KnownBoundMethod(_)
| Type::WrapperDescriptor(_)
| Type::DataclassDecorator(_)
| Type::DataclassTransformer(_)
| Type::Callable(_)
| Type::ModuleLiteral(_)
// We could infer a precise tuple spec for enum classes with members,
// but it's not clear whether that's worth the added complexity:
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
// (enums can have `EnumMeta` subclasses as their metaclasses).
| Type::ClassLiteral(_)
| Type::SubclassOf(_)
| Type::ProtocolInstance(_)
| Type::SpecialForm(_)
| Type::KnownInstance(_)
| Type::PropertyInstance(_)
| Type::Union(_)
| Type::Intersection(_)
| Type::AlwaysTruthy
| Type::AlwaysFalsy
| Type::IntLiteral(_)
| Type::BooleanLiteral(_)
| Type::EnumLiteral(_)
| Type::BoundSuper(_)
| Type::TypeIs(_)
| Type::TypedDict(_) => None
};
if let Some(special_case) = special_case {
if let Some(special_case) = non_async_special_case(db, self) {
return Ok(special_case);
}

View file

@ -1106,10 +1106,14 @@ impl<'db> Bindings<'db> {
// iterable (it could be a Liskov-uncompliant subtype of the `Iterable` class that sets
// `__iter__ = None`, for example). That would be badly written Python code, but we still
// need to be able to handle it without crashing.
overload.set_return_type(Type::tuple(TupleType::new(
db,
&argument.iterate(db),
)));
let return_type = if let Type::Union(union) = argument {
union.map(db, |element| {
Type::tuple(TupleType::new(db, &element.iterate(db)))
})
} else {
Type::tuple(TupleType::new(db, &argument.iterate(db)))
};
overload.set_return_type(return_type);
}
}
@ -2309,6 +2313,12 @@ impl<'a, 'db> ArgumentMatcher<'a, 'db> {
argument: Argument<'a>,
argument_type: Option<Type<'db>>,
) -> Result<(), ()> {
// TODO: `Type::iterate` internally handles unions, but in a lossy way.
// It might be superior here to manually map over the union and call `try_iterate`
// on each element, similar to the way that `unpacker.rs` does in the `unpack_inner` method.
// It might be a bit of a refactor, though.
// See <https://github.com/astral-sh/ruff/pull/20377#issuecomment-3401380305>
// for more details. --Alex
let tuple = argument_type.map(|ty| ty.iterate(db));
let (mut argument_types, length, variable_element) = match tuple.as_ref() {
Some(tuple) => (

View file

@ -1583,6 +1583,59 @@ impl<'db> TupleSpecBuilder<'db> {
}
}
fn all_elements(&self) -> impl Iterator<Item = &Type<'db>> {
match self {
TupleSpecBuilder::Fixed(elements) => Either::Left(elements.iter()),
TupleSpecBuilder::Variable {
prefix,
variable,
suffix,
} => Either::Right(prefix.iter().chain(std::iter::once(variable)).chain(suffix)),
}
}
/// Return a new tuple-spec builder that reflects the union of this tuple and another tuple.
///
/// For example, if `self` is a tuple-spec builder for `tuple[Literal[42], str]` and `other` is a
/// tuple-spec for `tuple[Literal[56], str]`, the result will be a tuple-spec builder for
/// `tuple[Literal[42, 56], str]`.
///
/// To keep things simple, we currently only attempt to preserve the "fixed-length-ness" of
/// a tuple spec if both `self` and `other` have the exact same length. For example,
/// if `self` is a tuple-spec builder for `tuple[int, str]` and `other` is a tuple-spec for
/// `tuple[int, str, bytes]`, the result will be a tuple-spec builder for
/// `tuple[int | str | bytes, ...]`. We could consider improving this in the future if real-world
/// use cases arise.
pub(crate) fn union(mut self, db: &'db dyn Db, other: &TupleSpec<'db>) -> Self {
match (&mut self, other) {
(TupleSpecBuilder::Fixed(our_elements), TupleSpec::Fixed(new_elements))
if our_elements.len() == new_elements.len() =>
{
for (existing, new) in our_elements.iter_mut().zip(new_elements.elements()) {
*existing = UnionType::from_elements(db, [*existing, *new]);
}
self
}
// We *could* have a branch here where both `self` and `other` are mixed tuples
// with same-length prefixes and same-length suffixes. We *could* zip the two
// `prefix` vecs together, unioning each pair of elements to create a new `prefix`
// vec, and do the same for the `suffix` vecs. This would preserve the tuple specs
// of the union elements more closely. But it's hard to think of a test where this
// would actually lead to more precise inference, so it's probably not worth the
// complexity.
_ => {
let unioned =
UnionType::from_elements(db, self.all_elements().chain(other.all_elements()));
TupleSpecBuilder::Variable {
prefix: vec![],
variable: unioned,
suffix: vec![],
}
}
}
}
pub(super) fn build(self) -> TupleSpec<'db> {
match self {
TupleSpecBuilder::Fixed(elements) => {

View file

@ -118,6 +118,11 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
};
let mut unpacker = TupleUnpacker::new(self.db(), target_len);
// N.B. `Type::try_iterate` internally handles unions, but in a lossy way.
// For our purposes here, we get better error messages and more precise inference
// if we manually map over the union and call `try_iterate` on each union element.
// See <https://github.com/astral-sh/ruff/pull/20377#issuecomment-3401380305>
// for more discussion.
let unpack_types = match value_ty {
Type::Union(union_ty) => union_ty.elements(self.db()),
_ => std::slice::from_ref(&value_ty),