[red-knot] Infer precise types for len() calls (#14599)

## Summary

Resolves #14598.

## Test Plan

Markdown tests.

---------

Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
InSync 2024-12-05 02:16:53 +07:00 committed by GitHub
parent 04c887c8fc
commit 155d34bbb9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 337 additions and 13 deletions

View file

@ -0,0 +1,219 @@
# Length (`len()`)
## Literal and constructed iterables
### Strings and bytes literals
```py
reveal_type(len("no\rmal")) # revealed: Literal[6]
reveal_type(len(r"aw stri\ng")) # revealed: Literal[10]
reveal_type(len(r"conca\t" "ena\tion")) # revealed: Literal[14]
reveal_type(len(b"ytes lite" rb"al")) # revealed: Literal[11]
reveal_type(len("𝒰𝕹🄸©🕲𝕕ℇ")) # revealed: Literal[7]
reveal_type( # revealed: Literal[7]
len(
"""foo
bar"""
)
)
reveal_type( # revealed: Literal[9]
len(
r"""foo\r
bar"""
)
)
reveal_type( # revealed: Literal[7]
len(
b"""foo
bar"""
)
)
reveal_type( # revealed: Literal[9]
len(
rb"""foo\r
bar"""
)
)
```
### Tuples
```py
reveal_type(len(())) # revealed: Literal[0]
reveal_type(len((1,))) # revealed: Literal[1]
reveal_type(len((1, 2))) # revealed: Literal[2]
# TODO: Handle constructor calls
reveal_type(len(tuple())) # revealed: int
# TODO: Handle star unpacks; Should be: Literal[0]
reveal_type(len((*[],))) # revealed: Literal[1]
# TODO: Handle star unpacks; Should be: Literal[1]
reveal_type( # revealed: Literal[2]
len(
(
*[],
1,
)
)
)
# TODO: Handle star unpacks; Should be: Literal[2]
reveal_type(len((*[], 1, 2))) # revealed: Literal[3]
# TODO: Handle star unpacks; Should be: Literal[0]
reveal_type(len((*[], *{}))) # revealed: Literal[2]
```
### Lists, sets and dictionaries
```py
reveal_type(len([])) # revealed: int
reveal_type(len([1])) # revealed: int
reveal_type(len([1, 2])) # revealed: int
reveal_type(len([*{}, *dict()])) # revealed: int
reveal_type(len({})) # revealed: int
reveal_type(len({**{}})) # revealed: int
reveal_type(len({**{}, **{}})) # revealed: int
reveal_type(len({1})) # revealed: int
reveal_type(len({1, 2})) # revealed: int
reveal_type(len({*[], 2})) # revealed: int
reveal_type(len(list())) # revealed: int
reveal_type(len(set())) # revealed: int
reveal_type(len(dict())) # revealed: int
reveal_type(len(frozenset())) # revealed: int
```
## `__len__`
The returned value of `__len__` is implicitly and recursively converted to `int`.
### Literal integers
```py
from typing import Literal
class Zero:
def __len__(self) -> Literal[0]: ...
class ZeroOrOne:
def __len__(self) -> Literal[0, 1]: ...
class ZeroOrTrue:
def __len__(self) -> Literal[0, True]: ...
class OneOrFalse:
def __len__(self) -> Literal[1] | Literal[False]: ...
class OneOrFoo:
def __len__(self) -> Literal[1, "foo"]: ...
class ZeroOrStr:
def __len__(self) -> Literal[0] | str: ...
reveal_type(len(Zero())) # revealed: Literal[0]
reveal_type(len(ZeroOrOne())) # revealed: Literal[0, 1]
reveal_type(len(ZeroOrTrue())) # revealed: Literal[0, 1]
reveal_type(len(OneOrFalse())) # revealed: Literal[0, 1]
# TODO: Emit a diagnostic
reveal_type(len(OneOrFoo())) # revealed: int
# TODO: Emit a diagnostic
reveal_type(len(ZeroOrStr())) # revealed: int
```
### Literal booleans
```py
from typing import Literal
class LiteralTrue:
def __len__(self) -> Literal[True]: ...
class LiteralFalse:
def __len__(self) -> Literal[False]: ...
reveal_type(len(LiteralTrue())) # revealed: Literal[1]
reveal_type(len(LiteralFalse())) # revealed: Literal[0]
```
### Enums
```py
from enum import Enum, auto
from typing import Literal
class SomeEnum(Enum):
AUTO = auto()
INT = 2
STR = "4"
TUPLE = (8, "16")
INT_2 = 3_2
class Auto:
def __len__(self) -> Literal[SomeEnum.AUTO]: ...
class Int:
def __len__(self) -> Literal[SomeEnum.INT]: ...
class Str:
def __len__(self) -> Literal[SomeEnum.STR]: ...
class Tuple:
def __len__(self) -> Literal[SomeEnum.TUPLE]: ...
class IntUnion:
def __len__(self) -> Literal[SomeEnum.INT, SomeEnum.INT_2]: ...
reveal_type(len(Auto())) # revealed: int
reveal_type(len(Int())) # revealed: Literal[2]
reveal_type(len(Str())) # revealed: int
reveal_type(len(Tuple())) # revealed: int
reveal_type(len(IntUnion())) # revealed: Literal[2, 32]
```
### Negative integers
```py
from typing import Literal
class Negative:
def __len__(self) -> Literal[-1]: ...
# TODO: Emit a diagnostic
reveal_type(len(Negative())) # revealed: int
```
### Wrong signature
```py
from typing import Literal
class SecondOptionalArgument:
def __len__(self, v: int = 0) -> Literal[0]: ...
class SecondRequiredArgument:
def __len__(self, v: int) -> Literal[1]: ...
# TODO: Emit a diagnostic
reveal_type(len(SecondOptionalArgument())) # revealed: Literal[0]
# TODO: Emit a diagnostic
reveal_type(len(SecondRequiredArgument())) # revealed: Literal[1]
```
### No `__len__`
```py
class NoDunderLen:
pass
# TODO: Emit a diagnostic
reveal_type(len(NoDunderLen())) # revealed: int
```

View file

@ -267,3 +267,42 @@ reveal_type(b) # revealed: LiteralString
# TODO: Should be list[int] once support for assigning to starred expression is added # TODO: Should be list[int] once support for assigning to starred expression is added
reveal_type(c) # revealed: @Todo(starred unpacking) reveal_type(c) # revealed: @Todo(starred unpacking)
``` ```
### Unicode
```py
# TODO: Add diagnostic (need more values to unpack)
(a, b) = "é"
reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: Unknown
```
### Unicode escape (1)
```py
# TODO: Add diagnostic (need more values to unpack)
(a, b) = "\u9E6C"
reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: Unknown
```
### Unicode escape (2)
```py
# TODO: Add diagnostic (need more values to unpack)
(a, b) = "\U0010FFFF"
reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: Unknown
```
### Surrogates
```py
(a, b) = "\uD800\uDFFF"
reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: LiteralString
```

View file

@ -1417,22 +1417,77 @@ impl<'db> Type<'db> {
} }
} }
/// Return the type of `len()` on a type if it is known more precisely than `int`,
/// or `None` otherwise.
///
/// In the second case, the return type of `len()` in `typeshed` (`int`)
/// is used as a fallback.
fn len(&self, db: &'db dyn Db) -> Option<Type<'db>> {
fn non_negative_int_literal<'db>(db: &'db dyn Db, ty: Type<'db>) -> Option<Type<'db>> {
match ty {
// TODO: Emit diagnostic for non-integers and negative integers
Type::IntLiteral(value) => (value >= 0).then_some(ty),
Type::BooleanLiteral(value) => Some(Type::IntLiteral(value.into())),
Type::Union(union) => {
let mut builder = UnionBuilder::new(db);
for element in union.elements(db) {
builder = builder.add(non_negative_int_literal(db, *element)?);
}
Some(builder.build())
}
_ => None,
}
}
let usize_len = match self {
Type::BytesLiteral(bytes) => Some(bytes.python_len(db)),
Type::StringLiteral(string) => Some(string.python_len(db)),
Type::Tuple(tuple) => Some(tuple.len(db)),
_ => None,
};
if let Some(usize_len) = usize_len {
return usize_len.try_into().ok().map(Type::IntLiteral);
}
let return_ty = match self.call_dunder(db, "__len__", &[*self]) {
// TODO: emit a diagnostic
CallDunderResult::MethodNotAvailable => return None,
CallDunderResult::CallOutcome(outcome) | CallDunderResult::PossiblyUnbound(outcome) => {
outcome.return_ty(db)?
}
};
non_negative_int_literal(db, return_ty)
}
/// Return the outcome of calling an object of this type. /// Return the outcome of calling an object of this type.
#[must_use] #[must_use]
fn call(self, db: &'db dyn Db, arg_types: &[Type<'db>]) -> CallOutcome<'db> { fn call(self, db: &'db dyn Db, arg_types: &[Type<'db>]) -> CallOutcome<'db> {
match self { match self {
// TODO validate typed call arguments vs callable signature // TODO validate typed call arguments vs callable signature
Type::FunctionLiteral(function_type) => { Type::FunctionLiteral(function_type) => match function_type.known(db) {
if function_type.is_known(db, KnownFunction::RevealType) { Some(KnownFunction::RevealType) => CallOutcome::revealed(
CallOutcome::revealed(
function_type.signature(db).return_ty, function_type.signature(db).return_ty,
*arg_types.first().unwrap_or(&Type::Unknown), *arg_types.first().unwrap_or(&Type::Unknown),
) ),
} else {
CallOutcome::callable(function_type.signature(db).return_ty) Some(KnownFunction::Len) => {
} let normal_return_ty = function_type.signature(db).return_ty;
let [only_arg] = arg_types else {
// TODO: Emit a diagnostic
return CallOutcome::callable(normal_return_ty);
};
let len_ty = only_arg.len(db);
CallOutcome::callable(len_ty.unwrap_or(normal_return_ty))
} }
_ => CallOutcome::callable(function_type.signature(db).return_ty),
},
// TODO annotated return type on `__new__` or metaclass `__call__` // TODO annotated return type on `__new__` or metaclass `__call__`
Type::ClassLiteral(ClassLiteralType { class }) => { Type::ClassLiteral(ClassLiteralType { class }) => {
CallOutcome::callable(match class.known(db) { CallOutcome::callable(match class.known(db) {
@ -2597,13 +2652,15 @@ pub enum KnownFunction {
ConstraintFunction(KnownConstraintFunction), ConstraintFunction(KnownConstraintFunction),
/// `builtins.reveal_type`, `typing.reveal_type` or `typing_extensions.reveal_type` /// `builtins.reveal_type`, `typing.reveal_type` or `typing_extensions.reveal_type`
RevealType, RevealType,
/// `builtins.len`
Len,
} }
impl KnownFunction { impl KnownFunction {
pub fn constraint_function(self) -> Option<KnownConstraintFunction> { pub fn constraint_function(self) -> Option<KnownConstraintFunction> {
match self { match self {
Self::ConstraintFunction(f) => Some(f), Self::ConstraintFunction(f) => Some(f),
Self::RevealType => None, Self::RevealType | Self::Len => None,
} }
} }
@ -2620,6 +2677,7 @@ impl KnownFunction {
"issubclass" if definition.is_builtin_definition(db) => Some( "issubclass" if definition.is_builtin_definition(db) => Some(
KnownFunction::ConstraintFunction(KnownConstraintFunction::IsSubclass), KnownFunction::ConstraintFunction(KnownConstraintFunction::IsSubclass),
), ),
"len" if definition.is_builtin_definition(db) => Some(KnownFunction::Len),
_ => None, _ => None,
} }
} }
@ -3074,8 +3132,9 @@ pub struct StringLiteralType<'db> {
} }
impl<'db> StringLiteralType<'db> { impl<'db> StringLiteralType<'db> {
pub fn len(&self, db: &'db dyn Db) -> usize { /// The length of the string, as would be returned by Python's `len()`.
self.value(db).len() pub fn python_len(&self, db: &'db dyn Db) -> usize {
self.value(db).chars().count()
} }
} }
@ -3085,6 +3144,12 @@ pub struct BytesLiteralType<'db> {
value: Box<[u8]>, value: Box<[u8]>,
} }
impl<'db> BytesLiteralType<'db> {
pub fn python_len(&self, db: &'db dyn Db) -> usize {
self.value(db).len()
}
}
#[salsa::interned] #[salsa::interned]
pub struct SliceLiteralType<'db> { pub struct SliceLiteralType<'db> {
start: Option<i32>, start: Option<i32>,

View file

@ -95,7 +95,8 @@ impl<'db> Unpacker<'db> {
// there would be a cost and it's not clear that it's worth it. // there would be a cost and it's not clear that it's worth it.
let value_ty = Type::tuple( let value_ty = Type::tuple(
self.db, self.db,
std::iter::repeat(Type::LiteralString).take(string_literal_ty.len(self.db)), std::iter::repeat(Type::LiteralString)
.take(string_literal_ty.python_len(self.db)),
); );
self.unpack(target, value_ty, scope); self.unpack(target, value_ty, scope);
} }