mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 13:25:17 +00:00
[red-knot] Infer precise types for len()
calls (#14599)
## Summary Resolves #14598. ## Test Plan Markdown tests. --------- Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
parent
04c887c8fc
commit
155d34bbb9
4 changed files with 337 additions and 13 deletions
|
@ -0,0 +1,219 @@
|
|||
# Length (`len()`)
|
||||
|
||||
## Literal and constructed iterables
|
||||
|
||||
### Strings and bytes literals
|
||||
|
||||
```py
|
||||
reveal_type(len("no\rmal")) # revealed: Literal[6]
|
||||
reveal_type(len(r"aw stri\ng")) # revealed: Literal[10]
|
||||
reveal_type(len(r"conca\t" "ena\tion")) # revealed: Literal[14]
|
||||
reveal_type(len(b"ytes lite" rb"al")) # revealed: Literal[11]
|
||||
reveal_type(len("𝒰𝕹🄸©🕲𝕕ℇ")) # revealed: Literal[7]
|
||||
|
||||
reveal_type( # revealed: Literal[7]
|
||||
len(
|
||||
"""foo
|
||||
bar"""
|
||||
)
|
||||
)
|
||||
reveal_type( # revealed: Literal[9]
|
||||
len(
|
||||
r"""foo\r
|
||||
bar"""
|
||||
)
|
||||
)
|
||||
reveal_type( # revealed: Literal[7]
|
||||
len(
|
||||
b"""foo
|
||||
bar"""
|
||||
)
|
||||
)
|
||||
reveal_type( # revealed: Literal[9]
|
||||
len(
|
||||
rb"""foo\r
|
||||
bar"""
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Tuples
|
||||
|
||||
```py
|
||||
reveal_type(len(())) # revealed: Literal[0]
|
||||
reveal_type(len((1,))) # revealed: Literal[1]
|
||||
reveal_type(len((1, 2))) # revealed: Literal[2]
|
||||
|
||||
# TODO: Handle constructor calls
|
||||
reveal_type(len(tuple())) # revealed: int
|
||||
|
||||
# TODO: Handle star unpacks; Should be: Literal[0]
|
||||
reveal_type(len((*[],))) # revealed: Literal[1]
|
||||
|
||||
# TODO: Handle star unpacks; Should be: Literal[1]
|
||||
reveal_type( # revealed: Literal[2]
|
||||
len(
|
||||
(
|
||||
*[],
|
||||
1,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# TODO: Handle star unpacks; Should be: Literal[2]
|
||||
reveal_type(len((*[], 1, 2))) # revealed: Literal[3]
|
||||
|
||||
# TODO: Handle star unpacks; Should be: Literal[0]
|
||||
reveal_type(len((*[], *{}))) # revealed: Literal[2]
|
||||
```
|
||||
|
||||
### Lists, sets and dictionaries
|
||||
|
||||
```py
|
||||
reveal_type(len([])) # revealed: int
|
||||
reveal_type(len([1])) # revealed: int
|
||||
reveal_type(len([1, 2])) # revealed: int
|
||||
reveal_type(len([*{}, *dict()])) # revealed: int
|
||||
|
||||
reveal_type(len({})) # revealed: int
|
||||
reveal_type(len({**{}})) # revealed: int
|
||||
reveal_type(len({**{}, **{}})) # revealed: int
|
||||
|
||||
reveal_type(len({1})) # revealed: int
|
||||
reveal_type(len({1, 2})) # revealed: int
|
||||
reveal_type(len({*[], 2})) # revealed: int
|
||||
|
||||
reveal_type(len(list())) # revealed: int
|
||||
reveal_type(len(set())) # revealed: int
|
||||
reveal_type(len(dict())) # revealed: int
|
||||
reveal_type(len(frozenset())) # revealed: int
|
||||
```
|
||||
|
||||
## `__len__`
|
||||
|
||||
The returned value of `__len__` is implicitly and recursively converted to `int`.
|
||||
|
||||
### Literal integers
|
||||
|
||||
```py
|
||||
from typing import Literal
|
||||
|
||||
class Zero:
|
||||
def __len__(self) -> Literal[0]: ...
|
||||
|
||||
class ZeroOrOne:
|
||||
def __len__(self) -> Literal[0, 1]: ...
|
||||
|
||||
class ZeroOrTrue:
|
||||
def __len__(self) -> Literal[0, True]: ...
|
||||
|
||||
class OneOrFalse:
|
||||
def __len__(self) -> Literal[1] | Literal[False]: ...
|
||||
|
||||
class OneOrFoo:
|
||||
def __len__(self) -> Literal[1, "foo"]: ...
|
||||
|
||||
class ZeroOrStr:
|
||||
def __len__(self) -> Literal[0] | str: ...
|
||||
|
||||
reveal_type(len(Zero())) # revealed: Literal[0]
|
||||
reveal_type(len(ZeroOrOne())) # revealed: Literal[0, 1]
|
||||
reveal_type(len(ZeroOrTrue())) # revealed: Literal[0, 1]
|
||||
reveal_type(len(OneOrFalse())) # revealed: Literal[0, 1]
|
||||
|
||||
# TODO: Emit a diagnostic
|
||||
reveal_type(len(OneOrFoo())) # revealed: int
|
||||
|
||||
# TODO: Emit a diagnostic
|
||||
reveal_type(len(ZeroOrStr())) # revealed: int
|
||||
```
|
||||
|
||||
### Literal booleans
|
||||
|
||||
```py
|
||||
from typing import Literal
|
||||
|
||||
class LiteralTrue:
|
||||
def __len__(self) -> Literal[True]: ...
|
||||
|
||||
class LiteralFalse:
|
||||
def __len__(self) -> Literal[False]: ...
|
||||
|
||||
reveal_type(len(LiteralTrue())) # revealed: Literal[1]
|
||||
reveal_type(len(LiteralFalse())) # revealed: Literal[0]
|
||||
```
|
||||
|
||||
### Enums
|
||||
|
||||
```py
|
||||
from enum import Enum, auto
|
||||
from typing import Literal
|
||||
|
||||
class SomeEnum(Enum):
|
||||
AUTO = auto()
|
||||
INT = 2
|
||||
STR = "4"
|
||||
TUPLE = (8, "16")
|
||||
INT_2 = 3_2
|
||||
|
||||
class Auto:
|
||||
def __len__(self) -> Literal[SomeEnum.AUTO]: ...
|
||||
|
||||
class Int:
|
||||
def __len__(self) -> Literal[SomeEnum.INT]: ...
|
||||
|
||||
class Str:
|
||||
def __len__(self) -> Literal[SomeEnum.STR]: ...
|
||||
|
||||
class Tuple:
|
||||
def __len__(self) -> Literal[SomeEnum.TUPLE]: ...
|
||||
|
||||
class IntUnion:
|
||||
def __len__(self) -> Literal[SomeEnum.INT, SomeEnum.INT_2]: ...
|
||||
|
||||
reveal_type(len(Auto())) # revealed: int
|
||||
reveal_type(len(Int())) # revealed: Literal[2]
|
||||
reveal_type(len(Str())) # revealed: int
|
||||
reveal_type(len(Tuple())) # revealed: int
|
||||
reveal_type(len(IntUnion())) # revealed: Literal[2, 32]
|
||||
```
|
||||
|
||||
### Negative integers
|
||||
|
||||
```py
|
||||
from typing import Literal
|
||||
|
||||
class Negative:
|
||||
def __len__(self) -> Literal[-1]: ...
|
||||
|
||||
# TODO: Emit a diagnostic
|
||||
reveal_type(len(Negative())) # revealed: int
|
||||
```
|
||||
|
||||
### Wrong signature
|
||||
|
||||
```py
|
||||
from typing import Literal
|
||||
|
||||
class SecondOptionalArgument:
|
||||
def __len__(self, v: int = 0) -> Literal[0]: ...
|
||||
|
||||
class SecondRequiredArgument:
|
||||
def __len__(self, v: int) -> Literal[1]: ...
|
||||
|
||||
# TODO: Emit a diagnostic
|
||||
reveal_type(len(SecondOptionalArgument())) # revealed: Literal[0]
|
||||
|
||||
# TODO: Emit a diagnostic
|
||||
reveal_type(len(SecondRequiredArgument())) # revealed: Literal[1]
|
||||
```
|
||||
|
||||
### No `__len__`
|
||||
|
||||
```py
|
||||
class NoDunderLen:
|
||||
pass
|
||||
|
||||
# TODO: Emit a diagnostic
|
||||
reveal_type(len(NoDunderLen())) # revealed: int
|
||||
```
|
|
@ -267,3 +267,42 @@ reveal_type(b) # revealed: LiteralString
|
|||
# TODO: Should be list[int] once support for assigning to starred expression is added
|
||||
reveal_type(c) # revealed: @Todo(starred unpacking)
|
||||
```
|
||||
|
||||
### Unicode
|
||||
|
||||
```py
|
||||
# TODO: Add diagnostic (need more values to unpack)
|
||||
(a, b) = "é"
|
||||
|
||||
reveal_type(a) # revealed: LiteralString
|
||||
reveal_type(b) # revealed: Unknown
|
||||
```
|
||||
|
||||
### Unicode escape (1)
|
||||
|
||||
```py
|
||||
# TODO: Add diagnostic (need more values to unpack)
|
||||
(a, b) = "\u9E6C"
|
||||
|
||||
reveal_type(a) # revealed: LiteralString
|
||||
reveal_type(b) # revealed: Unknown
|
||||
```
|
||||
|
||||
### Unicode escape (2)
|
||||
|
||||
```py
|
||||
# TODO: Add diagnostic (need more values to unpack)
|
||||
(a, b) = "\U0010FFFF"
|
||||
|
||||
reveal_type(a) # revealed: LiteralString
|
||||
reveal_type(b) # revealed: Unknown
|
||||
```
|
||||
|
||||
### Surrogates
|
||||
|
||||
```py
|
||||
(a, b) = "\uD800\uDFFF"
|
||||
|
||||
reveal_type(a) # revealed: LiteralString
|
||||
reveal_type(b) # revealed: LiteralString
|
||||
```
|
||||
|
|
|
@ -1417,22 +1417,77 @@ impl<'db> Type<'db> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return the type of `len()` on a type if it is known more precisely than `int`,
|
||||
/// or `None` otherwise.
|
||||
///
|
||||
/// In the second case, the return type of `len()` in `typeshed` (`int`)
|
||||
/// is used as a fallback.
|
||||
fn len(&self, db: &'db dyn Db) -> Option<Type<'db>> {
|
||||
fn non_negative_int_literal<'db>(db: &'db dyn Db, ty: Type<'db>) -> Option<Type<'db>> {
|
||||
match ty {
|
||||
// TODO: Emit diagnostic for non-integers and negative integers
|
||||
Type::IntLiteral(value) => (value >= 0).then_some(ty),
|
||||
Type::BooleanLiteral(value) => Some(Type::IntLiteral(value.into())),
|
||||
Type::Union(union) => {
|
||||
let mut builder = UnionBuilder::new(db);
|
||||
for element in union.elements(db) {
|
||||
builder = builder.add(non_negative_int_literal(db, *element)?);
|
||||
}
|
||||
Some(builder.build())
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
let usize_len = match self {
|
||||
Type::BytesLiteral(bytes) => Some(bytes.python_len(db)),
|
||||
Type::StringLiteral(string) => Some(string.python_len(db)),
|
||||
Type::Tuple(tuple) => Some(tuple.len(db)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(usize_len) = usize_len {
|
||||
return usize_len.try_into().ok().map(Type::IntLiteral);
|
||||
}
|
||||
|
||||
let return_ty = match self.call_dunder(db, "__len__", &[*self]) {
|
||||
// TODO: emit a diagnostic
|
||||
CallDunderResult::MethodNotAvailable => return None,
|
||||
|
||||
CallDunderResult::CallOutcome(outcome) | CallDunderResult::PossiblyUnbound(outcome) => {
|
||||
outcome.return_ty(db)?
|
||||
}
|
||||
};
|
||||
|
||||
non_negative_int_literal(db, return_ty)
|
||||
}
|
||||
|
||||
/// Return the outcome of calling an object of this type.
|
||||
#[must_use]
|
||||
fn call(self, db: &'db dyn Db, arg_types: &[Type<'db>]) -> CallOutcome<'db> {
|
||||
match self {
|
||||
// TODO validate typed call arguments vs callable signature
|
||||
Type::FunctionLiteral(function_type) => {
|
||||
if function_type.is_known(db, KnownFunction::RevealType) {
|
||||
CallOutcome::revealed(
|
||||
Type::FunctionLiteral(function_type) => match function_type.known(db) {
|
||||
Some(KnownFunction::RevealType) => CallOutcome::revealed(
|
||||
function_type.signature(db).return_ty,
|
||||
*arg_types.first().unwrap_or(&Type::Unknown),
|
||||
)
|
||||
} else {
|
||||
CallOutcome::callable(function_type.signature(db).return_ty)
|
||||
}
|
||||
),
|
||||
|
||||
Some(KnownFunction::Len) => {
|
||||
let normal_return_ty = function_type.signature(db).return_ty;
|
||||
|
||||
let [only_arg] = arg_types else {
|
||||
// TODO: Emit a diagnostic
|
||||
return CallOutcome::callable(normal_return_ty);
|
||||
};
|
||||
let len_ty = only_arg.len(db);
|
||||
|
||||
CallOutcome::callable(len_ty.unwrap_or(normal_return_ty))
|
||||
}
|
||||
|
||||
_ => CallOutcome::callable(function_type.signature(db).return_ty),
|
||||
},
|
||||
|
||||
// TODO annotated return type on `__new__` or metaclass `__call__`
|
||||
Type::ClassLiteral(ClassLiteralType { class }) => {
|
||||
CallOutcome::callable(match class.known(db) {
|
||||
|
@ -2597,13 +2652,15 @@ pub enum KnownFunction {
|
|||
ConstraintFunction(KnownConstraintFunction),
|
||||
/// `builtins.reveal_type`, `typing.reveal_type` or `typing_extensions.reveal_type`
|
||||
RevealType,
|
||||
/// `builtins.len`
|
||||
Len,
|
||||
}
|
||||
|
||||
impl KnownFunction {
|
||||
pub fn constraint_function(self) -> Option<KnownConstraintFunction> {
|
||||
match self {
|
||||
Self::ConstraintFunction(f) => Some(f),
|
||||
Self::RevealType => None,
|
||||
Self::RevealType | Self::Len => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2620,6 +2677,7 @@ impl KnownFunction {
|
|||
"issubclass" if definition.is_builtin_definition(db) => Some(
|
||||
KnownFunction::ConstraintFunction(KnownConstraintFunction::IsSubclass),
|
||||
),
|
||||
"len" if definition.is_builtin_definition(db) => Some(KnownFunction::Len),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -3074,8 +3132,9 @@ pub struct StringLiteralType<'db> {
|
|||
}
|
||||
|
||||
impl<'db> StringLiteralType<'db> {
|
||||
pub fn len(&self, db: &'db dyn Db) -> usize {
|
||||
self.value(db).len()
|
||||
/// The length of the string, as would be returned by Python's `len()`.
|
||||
pub fn python_len(&self, db: &'db dyn Db) -> usize {
|
||||
self.value(db).chars().count()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3085,6 +3144,12 @@ pub struct BytesLiteralType<'db> {
|
|||
value: Box<[u8]>,
|
||||
}
|
||||
|
||||
impl<'db> BytesLiteralType<'db> {
|
||||
pub fn python_len(&self, db: &'db dyn Db) -> usize {
|
||||
self.value(db).len()
|
||||
}
|
||||
}
|
||||
|
||||
#[salsa::interned]
|
||||
pub struct SliceLiteralType<'db> {
|
||||
start: Option<i32>,
|
||||
|
|
|
@ -95,7 +95,8 @@ impl<'db> Unpacker<'db> {
|
|||
// there would be a cost and it's not clear that it's worth it.
|
||||
let value_ty = Type::tuple(
|
||||
self.db,
|
||||
std::iter::repeat(Type::LiteralString).take(string_literal_ty.len(self.db)),
|
||||
std::iter::repeat(Type::LiteralString)
|
||||
.take(string_literal_ty.python_len(self.db)),
|
||||
);
|
||||
self.unpack(target, value_ty, scope);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue