mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-17 22:07:42 +00:00
[ty] Heterogeneous unpacking support for unions (#20377)
This commit is contained in:
parent
9de34e7ac1
commit
fd568f0221
7 changed files with 250 additions and 103 deletions
|
@ -694,6 +694,51 @@ def _(
|
|||
f1(*args10) # error: [invalid-argument-type]
|
||||
```
|
||||
|
||||
A union of heterogeneous tuples provided to a variadic parameter:
|
||||
|
||||
```py
|
||||
# Test inspired by ecosystem code at:
|
||||
# - <https://github.com/home-assistant/core/blob/bde4eb50111a72f9717fe73ee5929e50eb06911b/homeassistant/components/lovelace/websocket.py#L50-L59>
|
||||
# - <https://github.com/pydata/xarray/blob/3572f4e70f2b12ef9935c1f8c3c1b74045d2a092/xarray/tests/test_groupby.py#L3058-L3059>
|
||||
|
||||
def f2(a: str, b: bool): ...
|
||||
def f3(coinflip: bool):
|
||||
if coinflip:
|
||||
args = "foo", True
|
||||
else:
|
||||
args = "bar", False
|
||||
|
||||
# revealed: tuple[Literal["foo"], Literal[True]] | tuple[Literal["bar"], Literal[False]]
|
||||
reveal_type(args)
|
||||
f2(*args) # fine
|
||||
|
||||
if coinflip:
|
||||
other_args = "foo", True
|
||||
else:
|
||||
other_args = "bar", (True,)
|
||||
|
||||
# revealed: tuple[Literal["foo"], Literal[True]] | tuple[Literal["bar"], tuple[Literal[True]]]
|
||||
reveal_type(other_args)
|
||||
# error: [invalid-argument-type] "Argument to function `f2` is incorrect: Expected `bool`, found `Literal[True] | tuple[Literal[True]]`"
|
||||
f2(*other_args)
|
||||
|
||||
def f4(a=None, b=None, c=None, d=None, e=None): ...
|
||||
|
||||
my_args = ((1, 2), (3, 4), (5, 6))
|
||||
|
||||
for tup in my_args:
|
||||
f4(*tup, e=None) # fine
|
||||
|
||||
my_other_args = (
|
||||
(1, 2, 3, 4, 5),
|
||||
(6, 7, 8, 9, 10),
|
||||
)
|
||||
|
||||
for tup in my_other_args:
|
||||
# error: [parameter-already-assigned] "Multiple values provided for parameter `e` of function `f4`"
|
||||
f4(*tup, e=None)
|
||||
```
|
||||
|
||||
### Mixed argument and parameter containing variadic
|
||||
|
||||
```toml
|
||||
|
|
|
@ -260,8 +260,22 @@ def g(
|
|||
reveal_type(x) # revealed: int | str
|
||||
for y in b:
|
||||
reveal_type(y) # revealed: str | int
|
||||
for z in c:
|
||||
reveal_type(z) # revealed: LiteralString | int
|
||||
```
|
||||
|
||||
## Union type as iterable where some elements in the union have precise tuple specs
|
||||
|
||||
If all elements in a union can be iterated over, we "union together" their "tuple specs" and are
|
||||
able to infer the iterable element precisely when iterating over the union, in the same way that we
|
||||
infer a precise type for the iterable element when iterating over a `Literal` string or bytes type:
|
||||
|
||||
```py
|
||||
from typing import Literal
|
||||
|
||||
def f(x: Literal["foo", b"bar"], y: Literal["foo"] | range):
|
||||
for item in x:
|
||||
reveal_type(item) # revealed: Literal["f", "o", 98, 97, 114]
|
||||
for item in y:
|
||||
reveal_type(item) # revealed: Literal["f", "o"] | int
|
||||
```
|
||||
|
||||
## Union type as iterable where one union element has no `__iter__` method
|
||||
|
|
|
@ -68,6 +68,10 @@ reveal_type((1,).__class__()) # revealed: tuple[Literal[1]]
|
|||
|
||||
# error: [missing-argument] "No argument provided for required parameter `iterable`"
|
||||
reveal_type((1, 2).__class__()) # revealed: tuple[Literal[1], Literal[2]]
|
||||
|
||||
def g(x: tuple[int, str] | tuple[bytes, bool], y: tuple[int, str] | tuple[bytes, bool, bytes]):
|
||||
reveal_type(tuple(x)) # revealed: tuple[int, str] | tuple[bytes, bool]
|
||||
reveal_type(tuple(y)) # revealed: tuple[int, str] | tuple[bytes, bool, bytes]
|
||||
```
|
||||
|
||||
## Instantiating tuple subclasses
|
||||
|
|
|
@ -60,7 +60,7 @@ use crate::types::infer::infer_unpack_types;
|
|||
use crate::types::mro::{Mro, MroError, MroIterator};
|
||||
pub(crate) use crate::types::narrow::infer_narrowing_constraint;
|
||||
use crate::types::signatures::{ParameterForm, walk_signature};
|
||||
use crate::types::tuple::TupleSpec;
|
||||
use crate::types::tuple::{TupleSpec, TupleSpecBuilder};
|
||||
pub(crate) use crate::types::typed_dict::{TypedDictParams, TypedDictType, walk_typed_dict_type};
|
||||
use crate::types::variance::{TypeVarVariance, VarianceInferable};
|
||||
use crate::types::visitor::any_over_type;
|
||||
|
@ -5534,11 +5534,117 @@ impl<'db> Type<'db> {
|
|||
db: &'db dyn Db,
|
||||
mode: EvaluationMode,
|
||||
) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
|
||||
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
|
||||
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
|
||||
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
|
||||
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
|
||||
const MAX_TUPLE_LENGTH: usize = 128;
|
||||
fn non_async_special_case<'db>(
|
||||
db: &'db dyn Db,
|
||||
ty: Type<'db>,
|
||||
) -> Option<Cow<'db, TupleSpec<'db>>> {
|
||||
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
|
||||
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
|
||||
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
|
||||
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
|
||||
const MAX_TUPLE_LENGTH: usize = 128;
|
||||
|
||||
match ty {
|
||||
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
|
||||
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
|
||||
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
|
||||
"*tuple[] annotations"
|
||||
))))
|
||||
}
|
||||
Type::StringLiteral(string_literal_ty) => {
|
||||
let string_literal = string_literal_ty.value(db);
|
||||
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
|
||||
TupleSpec::heterogeneous(
|
||||
string_literal
|
||||
.chars()
|
||||
.map(|c| Type::string_literal(db, &c.to_string())),
|
||||
)
|
||||
} else {
|
||||
TupleSpec::homogeneous(Type::LiteralString)
|
||||
};
|
||||
Some(Cow::Owned(spec))
|
||||
}
|
||||
Type::BytesLiteral(bytes) => {
|
||||
let bytes_literal = bytes.value(db);
|
||||
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
|
||||
TupleSpec::heterogeneous(
|
||||
bytes_literal
|
||||
.iter()
|
||||
.map(|b| Type::IntLiteral(i64::from(*b))),
|
||||
)
|
||||
} else {
|
||||
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
|
||||
};
|
||||
Some(Cow::Owned(spec))
|
||||
}
|
||||
Type::Never => {
|
||||
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
|
||||
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
|
||||
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
|
||||
// TODO: Consider removing this special case, and instead hide the indexing
|
||||
// diagnostic in unreachable code.
|
||||
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
|
||||
}
|
||||
Type::TypeAlias(alias) => {
|
||||
non_async_special_case(db, alias.value_type(db))
|
||||
}
|
||||
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db)? {
|
||||
TypeVarBoundOrConstraints::UpperBound(bound) => {
|
||||
non_async_special_case(db, bound)
|
||||
}
|
||||
TypeVarBoundOrConstraints::Constraints(union) => non_async_special_case(db, Type::Union(union)),
|
||||
},
|
||||
Type::TypeVar(_) => unreachable!(
|
||||
"should not be able to iterate over type variable {} in inferable position",
|
||||
ty.display(db)
|
||||
),
|
||||
Type::Union(union) => {
|
||||
let elements = union.elements(db);
|
||||
if elements.len() < MAX_TUPLE_LENGTH {
|
||||
let mut elements_iter = elements.iter();
|
||||
let first_element_spec = elements_iter.next()?.try_iterate_with_mode(db, EvaluationMode::Sync).ok()?;
|
||||
let mut builder = TupleSpecBuilder::from(&*first_element_spec);
|
||||
for element in elements_iter {
|
||||
builder = builder.union(db, &*element.try_iterate_with_mode(db, EvaluationMode::Sync).ok()?);
|
||||
}
|
||||
Some(Cow::Owned(builder.build()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
|
||||
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(ty))),
|
||||
|
||||
Type::FunctionLiteral(_)
|
||||
| Type::GenericAlias(_)
|
||||
| Type::BoundMethod(_)
|
||||
| Type::KnownBoundMethod(_)
|
||||
| Type::WrapperDescriptor(_)
|
||||
| Type::DataclassDecorator(_)
|
||||
| Type::DataclassTransformer(_)
|
||||
| Type::Callable(_)
|
||||
| Type::ModuleLiteral(_)
|
||||
// We could infer a precise tuple spec for enum classes with members,
|
||||
// but it's not clear whether that's worth the added complexity:
|
||||
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
|
||||
// (enums can have `EnumMeta` subclasses as their metaclasses).
|
||||
| Type::ClassLiteral(_)
|
||||
| Type::SubclassOf(_)
|
||||
| Type::ProtocolInstance(_)
|
||||
| Type::SpecialForm(_)
|
||||
| Type::KnownInstance(_)
|
||||
| Type::PropertyInstance(_)
|
||||
| Type::Intersection(_)
|
||||
| Type::AlwaysTruthy
|
||||
| Type::AlwaysFalsy
|
||||
| Type::IntLiteral(_)
|
||||
| Type::BooleanLiteral(_)
|
||||
| Type::EnumLiteral(_)
|
||||
| Type::BoundSuper(_)
|
||||
| Type::TypeIs(_)
|
||||
| Type::TypedDict(_) => None
|
||||
}
|
||||
}
|
||||
|
||||
if mode.is_async() {
|
||||
let try_call_dunder_anext_on_iterator = |iterator: Type<'db>| -> Result<
|
||||
|
@ -5605,97 +5711,7 @@ impl<'db> Type<'db> {
|
|||
};
|
||||
}
|
||||
|
||||
let special_case = match self {
|
||||
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
|
||||
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
|
||||
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
|
||||
"*tuple[] annotations"
|
||||
))))
|
||||
}
|
||||
Type::StringLiteral(string_literal_ty) => {
|
||||
let string_literal = string_literal_ty.value(db);
|
||||
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
|
||||
TupleSpec::heterogeneous(
|
||||
string_literal
|
||||
.chars()
|
||||
.map(|c| Type::string_literal(db, &c.to_string())),
|
||||
)
|
||||
} else {
|
||||
TupleSpec::homogeneous(Type::LiteralString)
|
||||
};
|
||||
Some(Cow::Owned(spec))
|
||||
}
|
||||
Type::BytesLiteral(bytes) => {
|
||||
let bytes_literal = bytes.value(db);
|
||||
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
|
||||
TupleSpec::heterogeneous(
|
||||
bytes_literal
|
||||
.iter()
|
||||
.map(|b| Type::IntLiteral(i64::from(*b))),
|
||||
)
|
||||
} else {
|
||||
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
|
||||
};
|
||||
Some(Cow::Owned(spec))
|
||||
}
|
||||
Type::Never => {
|
||||
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
|
||||
// simplifies to `tuple[()]`. That will will cause us to emit false positives if we
|
||||
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
|
||||
// TODO: Consider removing this special case, and instead hide the indexing
|
||||
// diagnostic in unreachable code.
|
||||
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
|
||||
}
|
||||
Type::TypeAlias(alias) => {
|
||||
Some(alias.value_type(db).try_iterate_with_mode(db, mode)?)
|
||||
}
|
||||
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db) {
|
||||
Some(TypeVarBoundOrConstraints::UpperBound(bound)) => {
|
||||
Some(bound.try_iterate_with_mode(db, mode)?)
|
||||
}
|
||||
// TODO: could we create a "union of tuple specs"...?
|
||||
// (Same question applies to the `Type::Union()` branch lower down)
|
||||
Some(TypeVarBoundOrConstraints::Constraints(_)) | None => None
|
||||
},
|
||||
Type::TypeVar(_) => unreachable!(
|
||||
"should not be able to iterate over type variable {} in inferable position",
|
||||
self.display(db)
|
||||
),
|
||||
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
|
||||
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(self))),
|
||||
|
||||
Type::FunctionLiteral(_)
|
||||
| Type::GenericAlias(_)
|
||||
| Type::BoundMethod(_)
|
||||
| Type::KnownBoundMethod(_)
|
||||
| Type::WrapperDescriptor(_)
|
||||
| Type::DataclassDecorator(_)
|
||||
| Type::DataclassTransformer(_)
|
||||
| Type::Callable(_)
|
||||
| Type::ModuleLiteral(_)
|
||||
// We could infer a precise tuple spec for enum classes with members,
|
||||
// but it's not clear whether that's worth the added complexity:
|
||||
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
|
||||
// (enums can have `EnumMeta` subclasses as their metaclasses).
|
||||
| Type::ClassLiteral(_)
|
||||
| Type::SubclassOf(_)
|
||||
| Type::ProtocolInstance(_)
|
||||
| Type::SpecialForm(_)
|
||||
| Type::KnownInstance(_)
|
||||
| Type::PropertyInstance(_)
|
||||
| Type::Union(_)
|
||||
| Type::Intersection(_)
|
||||
| Type::AlwaysTruthy
|
||||
| Type::AlwaysFalsy
|
||||
| Type::IntLiteral(_)
|
||||
| Type::BooleanLiteral(_)
|
||||
| Type::EnumLiteral(_)
|
||||
| Type::BoundSuper(_)
|
||||
| Type::TypeIs(_)
|
||||
| Type::TypedDict(_) => None
|
||||
};
|
||||
|
||||
if let Some(special_case) = special_case {
|
||||
if let Some(special_case) = non_async_special_case(db, self) {
|
||||
return Ok(special_case);
|
||||
}
|
||||
|
||||
|
|
|
@ -1106,10 +1106,14 @@ impl<'db> Bindings<'db> {
|
|||
// iterable (it could be a Liskov-uncompliant subtype of the `Iterable` class that sets
|
||||
// `__iter__ = None`, for example). That would be badly written Python code, but we still
|
||||
// need to be able to handle it without crashing.
|
||||
overload.set_return_type(Type::tuple(TupleType::new(
|
||||
db,
|
||||
&argument.iterate(db),
|
||||
)));
|
||||
let return_type = if let Type::Union(union) = argument {
|
||||
union.map(db, |element| {
|
||||
Type::tuple(TupleType::new(db, &element.iterate(db)))
|
||||
})
|
||||
} else {
|
||||
Type::tuple(TupleType::new(db, &argument.iterate(db)))
|
||||
};
|
||||
overload.set_return_type(return_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2309,6 +2313,12 @@ impl<'a, 'db> ArgumentMatcher<'a, 'db> {
|
|||
argument: Argument<'a>,
|
||||
argument_type: Option<Type<'db>>,
|
||||
) -> Result<(), ()> {
|
||||
// TODO: `Type::iterate` internally handles unions, but in a lossy way.
|
||||
// It might be superior here to manually map over the union and call `try_iterate`
|
||||
// on each element, similar to the way that `unpacker.rs` does in the `unpack_inner` method.
|
||||
// It might be a bit of a refactor, though.
|
||||
// See <https://github.com/astral-sh/ruff/pull/20377#issuecomment-3401380305>
|
||||
// for more details. --Alex
|
||||
let tuple = argument_type.map(|ty| ty.iterate(db));
|
||||
let (mut argument_types, length, variable_element) = match tuple.as_ref() {
|
||||
Some(tuple) => (
|
||||
|
|
|
@ -1583,6 +1583,59 @@ impl<'db> TupleSpecBuilder<'db> {
|
|||
}
|
||||
}
|
||||
|
||||
fn all_elements(&self) -> impl Iterator<Item = &Type<'db>> {
|
||||
match self {
|
||||
TupleSpecBuilder::Fixed(elements) => Either::Left(elements.iter()),
|
||||
TupleSpecBuilder::Variable {
|
||||
prefix,
|
||||
variable,
|
||||
suffix,
|
||||
} => Either::Right(prefix.iter().chain(std::iter::once(variable)).chain(suffix)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a new tuple-spec builder that reflects the union of this tuple and another tuple.
|
||||
///
|
||||
/// For example, if `self` is a tuple-spec builder for `tuple[Literal[42], str]` and `other` is a
|
||||
/// tuple-spec for `tuple[Literal[56], str]`, the result will be a tuple-spec builder for
|
||||
/// `tuple[Literal[42, 56], str]`.
|
||||
///
|
||||
/// To keep things simple, we currently only attempt to preserve the "fixed-length-ness" of
|
||||
/// a tuple spec if both `self` and `other` have the exact same length. For example,
|
||||
/// if `self` is a tuple-spec builder for `tuple[int, str]` and `other` is a tuple-spec for
|
||||
/// `tuple[int, str, bytes]`, the result will be a tuple-spec builder for
|
||||
/// `tuple[int | str | bytes, ...]`. We could consider improving this in the future if real-world
|
||||
/// use cases arise.
|
||||
pub(crate) fn union(mut self, db: &'db dyn Db, other: &TupleSpec<'db>) -> Self {
|
||||
match (&mut self, other) {
|
||||
(TupleSpecBuilder::Fixed(our_elements), TupleSpec::Fixed(new_elements))
|
||||
if our_elements.len() == new_elements.len() =>
|
||||
{
|
||||
for (existing, new) in our_elements.iter_mut().zip(new_elements.elements()) {
|
||||
*existing = UnionType::from_elements(db, [*existing, *new]);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
// We *could* have a branch here where both `self` and `other` are mixed tuples
|
||||
// with same-length prefixes and same-length suffixes. We *could* zip the two
|
||||
// `prefix` vecs together, unioning each pair of elements to create a new `prefix`
|
||||
// vec, and do the same for the `suffix` vecs. This would preserve the tuple specs
|
||||
// of the union elements more closely. But it's hard to think of a test where this
|
||||
// would actually lead to more precise inference, so it's probably not worth the
|
||||
// complexity.
|
||||
_ => {
|
||||
let unioned =
|
||||
UnionType::from_elements(db, self.all_elements().chain(other.all_elements()));
|
||||
TupleSpecBuilder::Variable {
|
||||
prefix: vec![],
|
||||
variable: unioned,
|
||||
suffix: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn build(self) -> TupleSpec<'db> {
|
||||
match self {
|
||||
TupleSpecBuilder::Fixed(elements) => {
|
||||
|
|
|
@ -118,6 +118,11 @@ impl<'db, 'ast> Unpacker<'db, 'ast> {
|
|||
};
|
||||
let mut unpacker = TupleUnpacker::new(self.db(), target_len);
|
||||
|
||||
// N.B. `Type::try_iterate` internally handles unions, but in a lossy way.
|
||||
// For our purposes here, we get better error messages and more precise inference
|
||||
// if we manually map over the union and call `try_iterate` on each union element.
|
||||
// See <https://github.com/astral-sh/ruff/pull/20377#issuecomment-3401380305>
|
||||
// for more discussion.
|
||||
let unpack_types = match value_ty {
|
||||
Type::Union(union_ty) => union_ty.elements(self.db()),
|
||||
_ => std::slice::from_ref(&value_ty),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue