mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 13:24:57 +00:00
[ty] Add precise iteration and unpacking inference for string literals and bytes literals (#20023)
## Summary Previously we held off from doing this because we weren't sure that it was worth the added complexity cost. But our code has changed in the months since we made that initial decision, and I think the structure of the code is such that it no longer really leads to much added complexity to add precise inference when unpacking a string literal or a bytes literal. The improved inference we gain from this has real benefits to users (see the mypy_primer report), and this PR doesn't appear to have a performance impact. ## Test plan mdtests
This commit is contained in:
parent
796819e7a0
commit
bc6ea68733
4 changed files with 203 additions and 43 deletions
|
@ -74,6 +74,22 @@ def match_non_exhaustive(x: Literal[0, 1, "a"]):
|
||||||
|
|
||||||
# this diagnostic is correct: the inferred type of `x` is `Literal[1]`
|
# this diagnostic is correct: the inferred type of `x` is `Literal[1]`
|
||||||
assert_never(x) # error: [type-assertion-failure]
|
assert_never(x) # error: [type-assertion-failure]
|
||||||
|
|
||||||
|
# This is based on real-world code:
|
||||||
|
# https://github.com/scipy/scipy/blob/99c0ef6af161a4d8157cae5276a20c30b7677c6f/scipy/linalg/tests/test_lapack.py#L147-L171
|
||||||
|
def exhaustiveness_using_containment_checks():
|
||||||
|
for norm_str in "Mm1OoIiFfEe":
|
||||||
|
if norm_str in "FfEe":
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
if norm_str in "Mm":
|
||||||
|
return
|
||||||
|
elif norm_str in "1Oo":
|
||||||
|
return
|
||||||
|
elif norm_str in "Ii":
|
||||||
|
return
|
||||||
|
|
||||||
|
assert_never(norm_str)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Checks on enum literals
|
## Checks on enum literals
|
||||||
|
|
|
@ -755,6 +755,18 @@ def f(never: Never):
|
||||||
reveal_type(x) # revealed: Unknown
|
reveal_type(x) # revealed: Unknown
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Iterating over literals
|
||||||
|
|
||||||
|
```py
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
for char in "abcde":
|
||||||
|
reveal_type(char) # revealed: Literal["a", "b", "c", "d", "e"]
|
||||||
|
|
||||||
|
for char in b"abcde":
|
||||||
|
reveal_type(char) # revealed: Literal[97, 98, 99, 100, 101]
|
||||||
|
```
|
||||||
|
|
||||||
## A class literal is iterable if it inherits from `Any`
|
## A class literal is iterable if it inherits from `Any`
|
||||||
|
|
||||||
A class literal can be iterated over if it has `Any` or `Unknown` in its MRO, since the
|
A class literal can be iterated over if it has `Any` or `Unknown` in its MRO, since the
|
||||||
|
|
|
@ -523,8 +523,8 @@ def f(x: MixedTupleSubclass):
|
||||||
|
|
||||||
```py
|
```py
|
||||||
a, b = "ab"
|
a, b = "ab"
|
||||||
reveal_type(a) # revealed: LiteralString
|
reveal_type(a) # revealed: Literal["a"]
|
||||||
reveal_type(b) # revealed: LiteralString
|
reveal_type(b) # revealed: Literal["b"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Uneven unpacking (1)
|
### Uneven unpacking (1)
|
||||||
|
@ -570,37 +570,37 @@ reveal_type(d) # revealed: Unknown
|
||||||
|
|
||||||
```py
|
```py
|
||||||
(a, *b, c) = "ab"
|
(a, *b, c) = "ab"
|
||||||
reveal_type(a) # revealed: LiteralString
|
reveal_type(a) # revealed: Literal["a"]
|
||||||
reveal_type(b) # revealed: list[Never]
|
reveal_type(b) # revealed: list[Never]
|
||||||
reveal_type(c) # revealed: LiteralString
|
reveal_type(c) # revealed: Literal["b"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Starred expression (3)
|
### Starred expression (3)
|
||||||
|
|
||||||
```py
|
```py
|
||||||
(a, *b, c) = "abc"
|
(a, *b, c) = "abc"
|
||||||
reveal_type(a) # revealed: LiteralString
|
reveal_type(a) # revealed: Literal["a"]
|
||||||
reveal_type(b) # revealed: list[LiteralString]
|
reveal_type(b) # revealed: list[Literal["b"]]
|
||||||
reveal_type(c) # revealed: LiteralString
|
reveal_type(c) # revealed: Literal["c"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Starred expression (4)
|
### Starred expression (4)
|
||||||
|
|
||||||
```py
|
```py
|
||||||
(a, *b, c, d) = "abcdef"
|
(a, *b, c, d) = "abcdef"
|
||||||
reveal_type(a) # revealed: LiteralString
|
reveal_type(a) # revealed: Literal["a"]
|
||||||
reveal_type(b) # revealed: list[LiteralString]
|
reveal_type(b) # revealed: list[Literal["b", "c", "d"]]
|
||||||
reveal_type(c) # revealed: LiteralString
|
reveal_type(c) # revealed: Literal["e"]
|
||||||
reveal_type(d) # revealed: LiteralString
|
reveal_type(d) # revealed: Literal["f"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Starred expression (5)
|
### Starred expression (5)
|
||||||
|
|
||||||
```py
|
```py
|
||||||
(a, b, *c) = "abcd"
|
(a, b, *c) = "abcd"
|
||||||
reveal_type(a) # revealed: LiteralString
|
reveal_type(a) # revealed: Literal["a"]
|
||||||
reveal_type(b) # revealed: LiteralString
|
reveal_type(b) # revealed: Literal["b"]
|
||||||
reveal_type(c) # revealed: list[LiteralString]
|
reveal_type(c) # revealed: list[Literal["c", "d"]]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Starred expression (6)
|
### Starred expression (6)
|
||||||
|
@ -650,8 +650,114 @@ reveal_type(b) # revealed: Unknown
|
||||||
```py
|
```py
|
||||||
(a, b) = "\ud800\udfff"
|
(a, b) = "\ud800\udfff"
|
||||||
|
|
||||||
|
reveal_type(a) # revealed: Literal["<22>"]
|
||||||
|
reveal_type(b) # revealed: Literal["<22>"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Very long literal
|
||||||
|
|
||||||
|
```py
|
||||||
|
string = "very long stringgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg"
|
||||||
|
|
||||||
|
a, *b = string
|
||||||
reveal_type(a) # revealed: LiteralString
|
reveal_type(a) # revealed: LiteralString
|
||||||
reveal_type(b) # revealed: LiteralString
|
reveal_type(b) # revealed: list[LiteralString]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Bytes
|
||||||
|
|
||||||
|
### Simple unpacking
|
||||||
|
|
||||||
|
```py
|
||||||
|
a, b = b"ab"
|
||||||
|
reveal_type(a) # revealed: Literal[97]
|
||||||
|
reveal_type(b) # revealed: Literal[98]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Uneven unpacking (1)
|
||||||
|
|
||||||
|
```py
|
||||||
|
# error: [invalid-assignment] "Not enough values to unpack: Expected 3"
|
||||||
|
a, b, c = b"ab"
|
||||||
|
reveal_type(a) # revealed: Unknown
|
||||||
|
reveal_type(b) # revealed: Unknown
|
||||||
|
reveal_type(c) # revealed: Unknown
|
||||||
|
```
|
||||||
|
|
||||||
|
### Uneven unpacking (2)
|
||||||
|
|
||||||
|
```py
|
||||||
|
# error: [invalid-assignment] "Too many values to unpack: Expected 2"
|
||||||
|
a, b = b"abc"
|
||||||
|
reveal_type(a) # revealed: Unknown
|
||||||
|
reveal_type(b) # revealed: Unknown
|
||||||
|
```
|
||||||
|
|
||||||
|
### Starred expression (1)
|
||||||
|
|
||||||
|
```py
|
||||||
|
# error: [invalid-assignment] "Not enough values to unpack: Expected at least 3"
|
||||||
|
(a, *b, c, d) = b"ab"
|
||||||
|
reveal_type(a) # revealed: Unknown
|
||||||
|
reveal_type(b) # revealed: list[Unknown]
|
||||||
|
reveal_type(c) # revealed: Unknown
|
||||||
|
reveal_type(d) # revealed: Unknown
|
||||||
|
```
|
||||||
|
|
||||||
|
```py
|
||||||
|
# error: [invalid-assignment] "Not enough values to unpack: Expected at least 3"
|
||||||
|
(a, b, *c, d) = b"a"
|
||||||
|
reveal_type(a) # revealed: Unknown
|
||||||
|
reveal_type(b) # revealed: Unknown
|
||||||
|
reveal_type(c) # revealed: list[Unknown]
|
||||||
|
reveal_type(d) # revealed: Unknown
|
||||||
|
```
|
||||||
|
|
||||||
|
### Starred expression (2)
|
||||||
|
|
||||||
|
```py
|
||||||
|
(a, *b, c) = b"ab"
|
||||||
|
reveal_type(a) # revealed: Literal[97]
|
||||||
|
reveal_type(b) # revealed: list[Never]
|
||||||
|
reveal_type(c) # revealed: Literal[98]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Starred expression (3)
|
||||||
|
|
||||||
|
```py
|
||||||
|
(a, *b, c) = b"abc"
|
||||||
|
reveal_type(a) # revealed: Literal[97]
|
||||||
|
reveal_type(b) # revealed: list[Literal[98]]
|
||||||
|
reveal_type(c) # revealed: Literal[99]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Starred expression (4)
|
||||||
|
|
||||||
|
```py
|
||||||
|
(a, *b, c, d) = b"abcdef"
|
||||||
|
reveal_type(a) # revealed: Literal[97]
|
||||||
|
reveal_type(b) # revealed: list[Literal[98, 99, 100]]
|
||||||
|
reveal_type(c) # revealed: Literal[101]
|
||||||
|
reveal_type(d) # revealed: Literal[102]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Starred expression (5)
|
||||||
|
|
||||||
|
```py
|
||||||
|
(a, b, *c) = b"abcd"
|
||||||
|
reveal_type(a) # revealed: Literal[97]
|
||||||
|
reveal_type(b) # revealed: Literal[98]
|
||||||
|
reveal_type(c) # revealed: list[Literal[99, 100]]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Very long literal
|
||||||
|
|
||||||
|
```py
|
||||||
|
too_long = b"very long bytes stringggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg"
|
||||||
|
|
||||||
|
a, *b = too_long
|
||||||
|
reveal_type(a) # revealed: int
|
||||||
|
reveal_type(b) # revealed: list[int]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Union
|
## Union
|
||||||
|
@ -714,7 +820,7 @@ def _(arg: tuple[int, tuple[str, bytes]] | tuple[tuple[int, bytes], Literal["ab"
|
||||||
a, (b, c) = arg
|
a, (b, c) = arg
|
||||||
reveal_type(a) # revealed: int | tuple[int, bytes]
|
reveal_type(a) # revealed: int | tuple[int, bytes]
|
||||||
reveal_type(b) # revealed: str
|
reveal_type(b) # revealed: str
|
||||||
reveal_type(c) # revealed: bytes | LiteralString
|
reveal_type(c) # revealed: bytes | Literal["b"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Starred expression
|
### Starred expression
|
||||||
|
@ -785,8 +891,8 @@ from typing import Literal
|
||||||
|
|
||||||
def _(arg: tuple[int, int] | Literal["ab"]):
|
def _(arg: tuple[int, int] | Literal["ab"]):
|
||||||
a, b = arg
|
a, b = arg
|
||||||
reveal_type(a) # revealed: int | LiteralString
|
reveal_type(a) # revealed: int | Literal["a"]
|
||||||
reveal_type(b) # revealed: int | LiteralString
|
reveal_type(b) # revealed: int | Literal["b"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Custom iterator (1)
|
### Custom iterator (1)
|
||||||
|
|
|
@ -4917,6 +4917,12 @@ impl<'db> Type<'db> {
|
||||||
db: &'db dyn Db,
|
db: &'db dyn Db,
|
||||||
mode: EvaluationMode,
|
mode: EvaluationMode,
|
||||||
) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
|
) -> Result<Cow<'db, TupleSpec<'db>>, IterationError<'db>> {
|
||||||
|
// We will not infer precise heterogeneous tuple specs for literals with lengths above this threshold.
|
||||||
|
// The threshold here is somewhat arbitrary and conservative; it could be increased if needed.
|
||||||
|
// However, it's probably very rare to need heterogeneous unpacking inference for long string literals
|
||||||
|
// or bytes literals, and creating long heterogeneous tuple specs has a performance cost.
|
||||||
|
const MAX_TUPLE_LENGTH: usize = 128;
|
||||||
|
|
||||||
if mode.is_async() {
|
if mode.is_async() {
|
||||||
let try_call_dunder_anext_on_iterator = |iterator: Type<'db>| -> Result<
|
let try_call_dunder_anext_on_iterator = |iterator: Type<'db>| -> Result<
|
||||||
Result<Type<'db>, AwaitError<'db>>,
|
Result<Type<'db>, AwaitError<'db>>,
|
||||||
|
@ -4972,26 +4978,38 @@ impl<'db> Type<'db> {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
match self {
|
let special_case = match self {
|
||||||
Type::NominalInstance(nominal) => {
|
Type::NominalInstance(nominal) => nominal.tuple_spec(db),
|
||||||
if let Some(spec) = nominal.tuple_spec(db) {
|
|
||||||
return Ok(spec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
|
Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => {
|
||||||
return Ok(Cow::Owned(TupleSpec::homogeneous(todo_type!(
|
Some(Cow::Owned(TupleSpec::homogeneous(todo_type!(
|
||||||
"*tuple[] annotations"
|
"*tuple[] annotations"
|
||||||
))));
|
))))
|
||||||
}
|
}
|
||||||
Type::StringLiteral(string_literal_ty) => {
|
Type::StringLiteral(string_literal_ty) => {
|
||||||
// We could go further and deconstruct to an array of `StringLiteral`
|
let string_literal = string_literal_ty.value(db);
|
||||||
// with each individual character, instead of just an array of
|
let spec = if string_literal.len() < MAX_TUPLE_LENGTH {
|
||||||
// `LiteralString`, but there would be a cost and it's not clear that
|
TupleSpec::heterogeneous(
|
||||||
// it's worth it.
|
string_literal
|
||||||
return Ok(Cow::Owned(TupleSpec::heterogeneous(std::iter::repeat_n(
|
.chars()
|
||||||
Type::LiteralString,
|
.map(|c| Type::string_literal(db, &c.to_string())),
|
||||||
string_literal_ty.python_len(db),
|
)
|
||||||
))));
|
} else {
|
||||||
|
TupleSpec::homogeneous(Type::LiteralString)
|
||||||
|
};
|
||||||
|
Some(Cow::Owned(spec))
|
||||||
|
}
|
||||||
|
Type::BytesLiteral(bytes) => {
|
||||||
|
let bytes_literal = bytes.value(db);
|
||||||
|
let spec = if bytes_literal.len() < MAX_TUPLE_LENGTH {
|
||||||
|
TupleSpec::heterogeneous(
|
||||||
|
bytes_literal
|
||||||
|
.iter()
|
||||||
|
.map(|b| Type::IntLiteral(i64::from(*b))),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
TupleSpec::homogeneous(KnownClass::Int.to_instance(db))
|
||||||
|
};
|
||||||
|
Some(Cow::Owned(spec))
|
||||||
}
|
}
|
||||||
Type::Never => {
|
Type::Never => {
|
||||||
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
|
// The dunder logic below would have us return `tuple[Never, ...]`, which eagerly
|
||||||
|
@ -4999,25 +5017,27 @@ impl<'db> Type<'db> {
|
||||||
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
|
// index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives.
|
||||||
// TODO: Consider removing this special case, and instead hide the indexing
|
// TODO: Consider removing this special case, and instead hide the indexing
|
||||||
// diagnostic in unreachable code.
|
// diagnostic in unreachable code.
|
||||||
return Ok(Cow::Owned(TupleSpec::homogeneous(Type::unknown())));
|
Some(Cow::Owned(TupleSpec::homogeneous(Type::unknown())))
|
||||||
}
|
}
|
||||||
Type::TypeAlias(alias) => {
|
Type::TypeAlias(alias) => {
|
||||||
return alias.value_type(db).try_iterate_with_mode(db, mode);
|
Some(alias.value_type(db).try_iterate_with_mode(db, mode)?)
|
||||||
}
|
}
|
||||||
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db) {
|
Type::NonInferableTypeVar(tvar) => match tvar.typevar(db).bound_or_constraints(db) {
|
||||||
Some(TypeVarBoundOrConstraints::UpperBound(bound)) => {
|
Some(TypeVarBoundOrConstraints::UpperBound(bound)) => {
|
||||||
return bound.try_iterate_with_mode(db, mode);
|
Some(bound.try_iterate_with_mode(db, mode)?)
|
||||||
}
|
}
|
||||||
// TODO: could we create a "union of tuple specs"...?
|
// TODO: could we create a "union of tuple specs"...?
|
||||||
// (Same question applies to the `Type::Union()` branch lower down)
|
// (Same question applies to the `Type::Union()` branch lower down)
|
||||||
Some(TypeVarBoundOrConstraints::Constraints(_)) | None => {}
|
Some(TypeVarBoundOrConstraints::Constraints(_)) | None => None
|
||||||
},
|
},
|
||||||
Type::TypeVar(_) => unreachable!(
|
Type::TypeVar(_) => unreachable!(
|
||||||
"should not be able to iterate over type variable {} in inferable position",
|
"should not be able to iterate over type variable {} in inferable position",
|
||||||
self.display(db)
|
self.display(db)
|
||||||
),
|
),
|
||||||
Type::Dynamic(_)
|
// N.B. These special cases aren't strictly necessary, they're just obvious optimizations
|
||||||
| Type::FunctionLiteral(_)
|
Type::LiteralString | Type::Dynamic(_) => Some(Cow::Owned(TupleSpec::homogeneous(self))),
|
||||||
|
|
||||||
|
Type::FunctionLiteral(_)
|
||||||
| Type::GenericAlias(_)
|
| Type::GenericAlias(_)
|
||||||
| Type::BoundMethod(_)
|
| Type::BoundMethod(_)
|
||||||
| Type::MethodWrapper(_)
|
| Type::MethodWrapper(_)
|
||||||
|
@ -5026,6 +5046,10 @@ impl<'db> Type<'db> {
|
||||||
| Type::DataclassTransformer(_)
|
| Type::DataclassTransformer(_)
|
||||||
| Type::Callable(_)
|
| Type::Callable(_)
|
||||||
| Type::ModuleLiteral(_)
|
| Type::ModuleLiteral(_)
|
||||||
|
// We could infer a precise tuple spec for enum classes with members,
|
||||||
|
// but it's not clear whether that's worth the added complexity:
|
||||||
|
// you'd have to check that `EnumMeta.__iter__` is not overridden for it to be sound
|
||||||
|
// (enums can have `EnumMeta` subclasses as their metaclasses).
|
||||||
| Type::ClassLiteral(_)
|
| Type::ClassLiteral(_)
|
||||||
| Type::SubclassOf(_)
|
| Type::SubclassOf(_)
|
||||||
| Type::ProtocolInstance(_)
|
| Type::ProtocolInstance(_)
|
||||||
|
@ -5039,11 +5063,13 @@ impl<'db> Type<'db> {
|
||||||
| Type::IntLiteral(_)
|
| Type::IntLiteral(_)
|
||||||
| Type::BooleanLiteral(_)
|
| Type::BooleanLiteral(_)
|
||||||
| Type::EnumLiteral(_)
|
| Type::EnumLiteral(_)
|
||||||
| Type::LiteralString
|
|
||||||
| Type::BytesLiteral(_)
|
|
||||||
| Type::BoundSuper(_)
|
| Type::BoundSuper(_)
|
||||||
| Type::TypeIs(_)
|
| Type::TypeIs(_)
|
||||||
| Type::TypedDict(_) => {}
|
| Type::TypedDict(_) => None
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(special_case) = special_case {
|
||||||
|
return Ok(special_case);
|
||||||
}
|
}
|
||||||
|
|
||||||
let try_call_dunder_getitem = || {
|
let try_call_dunder_getitem = || {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue