[ty] Heterogeneous unpacking support for unions (#20377)

This commit is contained in:
Alex Waygood 2025-10-15 19:30:03 +01:00 committed by GitHub
parent 9de34e7ac1
commit fd568f0221
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 250 additions and 103 deletions

View file

@ -60,7 +60,7 @@ use crate::types::infer::infer_unpack_types;
use crate::types::mro::{Mro, MroError, MroIterator};
pub(crate) use crate::types::narrow::infer_narrowing_constraint;
use crate::types::signatures::{ParameterForm, walk_signature};
use crate::types::tuple::TupleSpec;
use crate::types::tuple::{TupleSpec, TupleSpecBuilder};
pub(crate) use crate::types::typed_dict::{TypedDictParams, TypedDictType, walk_typed_dict_type};
use crate::types::variance::{TypeVarVariance, VarianceInferable};
use crate::types::visitor::any_over_type;
@ -5534,11 +5534,117 @@ impl<'db> Type<'db> {
db: &'db dyn Db,
mode: EvaluationMode,
) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
const MAX_TUPLE_LENGTH: usize = 128;
fn non_async_special_case<'db>(
db: &'db dyn Db,
ty: Type<'db>,
) -> Option<Cow<'db, TupleSpec<'db>>> {
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
const MAX_TUPLE_LENGTH: usize = 128;
match ty {
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
"*tuple[] annotations"
))))
}
Type::StringLiteral(string_literal_ty) => {
let string_literal = string_literal_ty.value(db);
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
string_literal
.chars()
.map(|c| Type::string_literal(db, &c.to_string())),
)
} else {
TupleSpec::homogeneous(Type::LiteralString)
};
Some(Cow::Owned(spec))
}
Type::BytesLiteral(bytes) => {
let bytes_literal = bytes.value(db);
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
bytes_literal
.iter()
.map(|b| Type::IntLiteral(i64::from(*b))),
)
} else {
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
};
Some(Cow::Owned(spec))
}
Type::Never => {
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
// TODO: Consider removing this special case, and instead hide the indexing
// diagnostic in unreachable code.
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
}
Type::TypeAlias(alias) => {
non_async_special_case(db, alias.value_type(db))
}
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db)? {
TypeVarBoundOrConstraints::UpperBound(bound) => {
non_async_special_case(db, bound)
}
TypeVarBoundOrConstraints::Constraints(union) => non_async_special_case(db, Type::Union(union)),
},
Type::TypeVar(_) => unreachable!(
"should not be able to iterate over type variable {} in inferable position",
ty.display(db)
),
Type::Union(union) => {
let elements = union.elements(db);
if elements.len() < MAX_TUPLE_LENGTH {
let mut elements_iter = elements.iter();
let first_element_spec = elements_iter.next()?.try_iterate_with_mode(db, EvaluationMode::Sync).ok()?;
let mut builder = TupleSpecBuilder::from(&*first_element_spec);
for element in elements_iter {
builder = builder.union(db, &*element.try_iterate_with_mode(db, EvaluationMode::Sync).ok()?);
}
Some(Cow::Owned(builder.build()))
} else {
None
}
}
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(ty))),
Type::FunctionLiteral(_)
| Type::GenericAlias(_)
| Type::BoundMethod(_)
| Type::KnownBoundMethod(_)
| Type::WrapperDescriptor(_)
| Type::DataclassDecorator(_)
| Type::DataclassTransformer(_)
| Type::Callable(_)
| Type::ModuleLiteral(_)
// We could infer a precise tuple spec for enum classes with members,
// but it's not clear whether that's worth the added complexity:
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
// (enums can have `EnumMeta` subclasses as their metaclasses).
| Type::ClassLiteral(_)
| Type::SubclassOf(_)
| Type::ProtocolInstance(_)
| Type::SpecialForm(_)
| Type::KnownInstance(_)
| Type::PropertyInstance(_)
| Type::Intersection(_)
| Type::AlwaysTruthy
| Type::AlwaysFalsy
| Type::IntLiteral(_)
| Type::BooleanLiteral(_)
| Type::EnumLiteral(_)
| Type::BoundSuper(_)
| Type::TypeIs(_)
| Type::TypedDict(_) => None
}
}
if mode.is_async() {
let try_call_dunder_anext_on_iterator = |iterator: Type<'db>| -> Result<
@ -5605,97 +5711,7 @@ impl<'db> Type<'db> {
};
}
let special_case = match self {
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
"*tuple[] annotations"
))))
}
Type::StringLiteral(string_literal_ty) => {
let string_literal = string_literal_ty.value(db);
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
string_literal
.chars()
.map(|c| Type::string_literal(db, &c.to_string())),
)
} else {
TupleSpec::homogeneous(Type::LiteralString)
};
Some(Cow::Owned(spec))
}
Type::BytesLiteral(bytes) => {
let bytes_literal = bytes.value(db);
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
TupleSpec::heterogeneous(
bytes_literal
.iter()
.map(|b| Type::IntLiteral(i64::from(*b))),
)
} else {
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
};
Some(Cow::Owned(spec))
}
Type::Never => {
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
// TODO: Consider removing this special case, and instead hide the indexing
// diagnostic in unreachable code.
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
}
Type::TypeAlias(alias) => {
Some(alias.value_type(db).try_iterate_with_mode(db, mode)?)
}
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db) {
Some(TypeVarBoundOrConstraints::UpperBound(bound)) => {
Some(bound.try_iterate_with_mode(db, mode)?)
}
// TODO: could we create a "union of tuple specs"...?
// (Same question applies to the `Type::Union()` branch lower down)
Some(TypeVarBoundOrConstraints::Constraints(_)) | None => None
},
Type::TypeVar(_) => unreachable!(
"should not be able to iterate over type variable {} in inferable position",
self.display(db)
),
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(self))),
Type::FunctionLiteral(_)
| Type::GenericAlias(_)
| Type::BoundMethod(_)
| Type::KnownBoundMethod(_)
| Type::WrapperDescriptor(_)
| Type::DataclassDecorator(_)
| Type::DataclassTransformer(_)
| Type::Callable(_)
| Type::ModuleLiteral(_)
// We could infer a precise tuple spec for enum classes with members,
// but it's not clear whether that's worth the added complexity:
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
// (enums can have `EnumMeta` subclasses as their metaclasses).
| Type::ClassLiteral(_)
| Type::SubclassOf(_)
| Type::ProtocolInstance(_)
| Type::SpecialForm(_)
| Type::KnownInstance(_)
| Type::PropertyInstance(_)
| Type::Union(_)
| Type::Intersection(_)
| Type::AlwaysTruthy
| Type::AlwaysFalsy
| Type::IntLiteral(_)
| Type::BooleanLiteral(_)
| Type::EnumLiteral(_)
| Type::BoundSuper(_)
| Type::TypeIs(_)
| Type::TypedDict(_) => None
};
if let Some(special_case) = special_case {
if let Some(special_case) = non_async_special_case(db, self) {
return Ok(special_case);
}

View file

@ -1106,10 +1106,14 @@ impl<'db> Bindings<'db> {
// iterable (it could be a Liskov-uncompliant subtype of the `Iterable` class that sets
// `__iter__ = None`, for example). That would be badly written Python code, but we still
// need to be able to handle it without crashing.
overload.set_return_type(Type::tuple(TupleType::new(
db,
&argument.iterate(db),
)));
let return_type = if let Type::Union(union) = argument {
union.map(db, |element| {
Type::tuple(TupleType::new(db, &element.iterate(db)))
})
} else {
Type::tuple(TupleType::new(db, &argument.iterate(db)))
};
overload.set_return_type(return_type);
}
}
@ -2309,6 +2313,12 @@ impl<'a, 'db> ArgumentMatcher<'a, 'db> {
argument: Argument<'a>,
argument_type: Option<Type<'db>>,
) -> Result<(), ()> {
// TODO: `Type::iterate` internally handles unions, but in a lossy way.
// It might be superior here to manually map over the union and call `try_iterate`
// on each element, similar to the way that `unpacker.rs` does in the `unpack_inner` method.
// It might be a bit of a refactor, though.
// See <https://github.com/astral-sh/ruff/pull/20377#issuecomment-3401380305>
// for more details. --Alex
let tuple = argument_type.map(|ty| ty.iterate(db));
let (mut argument_types, length, variable_element) = match tuple.as_ref() {
Some(tuple) => (

View file

@ -1583,6 +1583,59 @@ impl<'db> TupleSpecBuilder<'db> {
}
}
fn all_elements(&self) -> impl Iterator<Item = &Type<'db>> {
match self {
TupleSpecBuilder::Fixed(elements) => Either::Left(elements.iter()),
TupleSpecBuilder::Variable {
prefix,
variable,
suffix,
} => Either::Right(prefix.iter().chain(std::iter::once(variable)).chain(suffix)),
}
}
/// Return a new tuple-spec builder that reflects the union of this tuple and another tuple.
///
/// For example, if `self` is a tuple-spec builder for `tuple[Literal[42], str]` and `other` is a
/// tuple-spec for `tuple[Literal[56], str]`, the result will be a tuple-spec builder for
/// `tuple[Literal[42, 56], str]`.
///
/// To keep things simple, we currently only attempt to preserve the "fixed-length-ness" of
/// a tuple spec if both `self` and `other` have the exact same length. For example,
/// if `self` is a tuple-spec builder for `tuple[int, str]` and `other` is a tuple-spec for
/// `tuple[int, str, bytes]`, the result will be a tuple-spec builder for
/// `tuple[int | str | bytes, ...]`. We could consider improving this in the future if real-world
/// use cases arise.
pub(crate) fn union(mut self, db: &'db dyn Db, other: &TupleSpec<'db>) -> Self {
match (&mut self, other) {
(TupleSpecBuilder::Fixed(our_elements), TupleSpec::Fixed(new_elements))
if our_elements.len() == new_elements.len() =>
{
for (existing, new) in our_elements.iter_mut().zip(new_elements.elements()) {
*existing = UnionType::from_elements(db, [*existing, *new]);
}
self
}
// We *could* have a branch here where both `self` and `other` are mixed tuples
// with same-length prefixes and same-length suffixes. We *could* zip the two
// `prefix` vecs together, unioning each pair of elements to create a new `prefix`
// vec, and do the same for the `suffix` vecs. This would preserve the tuple specs
// of the union elements more closely. But it's hard to think of a test where this
// would actually lead to more precise inference, so it's probably not worth the
// complexity.
_ => {
let unioned =
UnionType::from_elements(db, self.all_elements().chain(other.all_elements()));
TupleSpecBuilder::Variable {
prefix: vec![],
variable: unioned,
suffix: vec![],
}
}
}
}
pub(super) fn build(self) -> TupleSpec<'db> {
match self {
TupleSpecBuilder::Fixed(elements) => {

View file

@ -118,6 +118,11 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
};
let mut unpacker = TupleUnpacker::new(self.db(), target_len);
// N.B. `Type::try_iterate` internally handles unions, but in a lossy way.
// For our purposes here, we get better error messages and more precise inference
// if we manually map over the union and call `try_iterate` on each union element.
// See <https://github.com/astral-sh/ruff/pull/20377#issuecomment-3401380305>
// for more discussion.
let unpack_types = match value_ty {
Type::Union(union_ty) => union_ty.elements(self.db()),
_ => std::slice::from_ref(&value_ty),