[ty] Implicit type aliases: Support for PEP 604 unions (#21195)

## Summary

Add support for implicit type aliases that use PEP 604 unions:
```py
IntOrStr = int | str

reveal_type(IntOrStr)  # UnionType

def _(int_or_str: IntOrStr):
    reveal_type(int_or_str)  # int | str
```

## Typing conformance

The changes are either removed false positives, or new diagnostics due
to known limitations unrelated to this PR.

## Ecosystem impact

Spot checked, a mix of true positives and known limitations.

## Test Plan

New Markdown tests.
This commit is contained in:
David Peter 2025-11-03 21:50:25 +01:00 committed by GitHub
parent fe4ee81b97
commit 1fe958c694
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 334 additions and 36 deletions

View file

@ -146,7 +146,7 @@ static FREQTRADE: Benchmark = Benchmark::new(
max_dep_date: "2025-06-17",
python_version: PythonVersion::PY312,
},
400,
500,
);
static PANDAS: Benchmark = Benchmark::new(

View file

@ -72,9 +72,6 @@ def f(x: Union) -> None:
## Implicit type aliases using new-style unions
We don't recognize these as type aliases yet, but we also don't emit false-positive diagnostics if
you use them in type expressions:
```toml
[environment]
python-version = "3.10"
@ -84,5 +81,5 @@ python-version = "3.10"
X = int | str
def f(y: X):
reveal_type(y) # revealed: @Todo(Support for `types.UnionType` instances in type expressions)
reveal_type(y) # revealed: int | str
```

View file

@ -17,6 +17,209 @@ def f(x: MyInt):
f(1)
```
## None
```py
MyNone = None
# TODO: this should not be an error
# error: [invalid-type-form] "Variable of type `None` is not allowed in a type expression"
def g(x: MyNone):
# TODO: this should be `None`
reveal_type(x) # revealed: Unknown
g(None)
```
## Unions
We also support unions in type aliases:
```py
from typing_extensions import Any, Never
from ty_extensions import Unknown
IntOrStr = int | str
IntOrStrOrBytes1 = int | str | bytes
IntOrStrOrBytes2 = (int | str) | bytes
IntOrStrOrBytes3 = int | (str | bytes)
IntOrStrOrBytes4 = IntOrStr | bytes
BytesOrIntOrStr = bytes | IntOrStr
IntOrNone = int | None
NoneOrInt = None | int
IntOrStrOrNone = IntOrStr | None
NoneOrIntOrStr = None | IntOrStr
IntOrAny = int | Any
AnyOrInt = Any | int
NoneOrAny = None | Any
AnyOrNone = Any | None
NeverOrAny = Never | Any
AnyOrNever = Any | Never
UnknownOrInt = Unknown | int
IntOrUnknown = int | Unknown
reveal_type(IntOrStr) # revealed: UnionType
reveal_type(IntOrStrOrBytes1) # revealed: UnionType
reveal_type(IntOrStrOrBytes2) # revealed: UnionType
reveal_type(IntOrStrOrBytes3) # revealed: UnionType
reveal_type(IntOrStrOrBytes4) # revealed: UnionType
reveal_type(BytesOrIntOrStr) # revealed: UnionType
reveal_type(IntOrNone) # revealed: UnionType
reveal_type(NoneOrInt) # revealed: UnionType
reveal_type(IntOrStrOrNone) # revealed: UnionType
reveal_type(NoneOrIntOrStr) # revealed: UnionType
reveal_type(IntOrAny) # revealed: UnionType
reveal_type(AnyOrInt) # revealed: UnionType
reveal_type(NoneOrAny) # revealed: UnionType
reveal_type(AnyOrNone) # revealed: UnionType
reveal_type(NeverOrAny) # revealed: UnionType
reveal_type(AnyOrNever) # revealed: UnionType
reveal_type(UnknownOrInt) # revealed: UnionType
reveal_type(IntOrUnknown) # revealed: UnionType
def _(
int_or_str: IntOrStr,
int_or_str_or_bytes1: IntOrStrOrBytes1,
int_or_str_or_bytes2: IntOrStrOrBytes2,
int_or_str_or_bytes3: IntOrStrOrBytes3,
int_or_str_or_bytes4: IntOrStrOrBytes4,
bytes_or_int_or_str: BytesOrIntOrStr,
int_or_none: IntOrNone,
none_or_int: NoneOrInt,
int_or_str_or_none: IntOrStrOrNone,
none_or_int_or_str: NoneOrIntOrStr,
int_or_any: IntOrAny,
any_or_int: AnyOrInt,
none_or_any: NoneOrAny,
any_or_none: AnyOrNone,
never_or_any: NeverOrAny,
any_or_never: AnyOrNever,
unknown_or_int: UnknownOrInt,
int_or_unknown: IntOrUnknown,
):
reveal_type(int_or_str) # revealed: int | str
reveal_type(int_or_str_or_bytes1) # revealed: int | str | bytes
reveal_type(int_or_str_or_bytes2) # revealed: int | str | bytes
reveal_type(int_or_str_or_bytes3) # revealed: int | str | bytes
reveal_type(int_or_str_or_bytes4) # revealed: int | str | bytes
reveal_type(bytes_or_int_or_str) # revealed: bytes | int | str
reveal_type(int_or_none) # revealed: int | None
reveal_type(none_or_int) # revealed: None | int
reveal_type(int_or_str_or_none) # revealed: int | str | None
reveal_type(none_or_int_or_str) # revealed: None | int | str
reveal_type(int_or_any) # revealed: int | Any
reveal_type(any_or_int) # revealed: Any | int
reveal_type(none_or_any) # revealed: None | Any
reveal_type(any_or_none) # revealed: Any | None
reveal_type(never_or_any) # revealed: Any
reveal_type(any_or_never) # revealed: Any
reveal_type(unknown_or_int) # revealed: Unknown | int
reveal_type(int_or_unknown) # revealed: int | Unknown
```
If a type is unioned with itself in a value expression, the result is just that type. No
`types.UnionType` instance is created:
```py
IntOrInt = int | int
ListOfIntOrListOfInt = list[int] | list[int]
reveal_type(IntOrInt) # revealed: <class 'int'>
reveal_type(ListOfIntOrListOfInt) # revealed: <class 'list[int]'>
def _(int_or_int: IntOrInt, list_of_int_or_list_of_int: ListOfIntOrListOfInt):
reveal_type(int_or_int) # revealed: int
reveal_type(list_of_int_or_list_of_int) # revealed: list[int]
```
`NoneType` has no special or-operator behavior, so this is an error:
```py
None | None # error: [unsupported-operator] "Operator `|` is unsupported between objects of type `None` and `None`"
```
When constructing something non-sensical like `int | 1`, we could ideally emit a diagnostic for the
expression itself, as it leads to a `TypeError` at runtime. No other type checker supports this, so
for now we only emit an error when it is used in a type expression:
```py
IntOrOne = int | 1
# error: [invalid-type-form] "Variable of type `Literal[1]` is not allowed in a type expression"
def _(int_or_one: IntOrOne):
reveal_type(int_or_one) # revealed: Unknown
```
If you were to somehow get hold of an opaque instance of `types.UnionType`, that could not be used
as a type expression:
```py
from types import UnionType
def f(SomeUnionType: UnionType):
# error: [invalid-type-form] "Variable of type `UnionType` is not allowed in a type expression"
some_union: SomeUnionType
f(int | str)
```
## Generic types
Implicit type aliases can also refer to generic types:
```py
from typing_extensions import TypeVar
T = TypeVar("T")
MyList = list[T]
def _(my_list: MyList[int]):
# TODO: This should be `list[int]`
reveal_type(my_list) # revealed: @Todo(unknown type subscript)
ListOrTuple = list[T] | tuple[T, ...]
reveal_type(ListOrTuple) # revealed: UnionType
def _(list_or_tuple: ListOrTuple[int]):
reveal_type(list_or_tuple) # revealed: @Todo(Generic specialization of types.UnionType)
```
## Stringified annotations?
From the [typing spec on type aliases](https://typing.python.org/en/latest/spec/aliases.html):
> Type aliases may be as complex as type hints in annotations anything that is acceptable as a
> type hint is acceptable in a type alias
However, no other type checker seems to support stringified annotations in implicit type aliases. We
currently also do not support them:
```py
AliasForStr = "str"
# error: [invalid-type-form] "Variable of type `Literal["str"]` is not allowed in a type expression"
def _(s: AliasForStr):
reveal_type(s) # revealed: Unknown
IntOrStr = int | "str"
# error: [invalid-type-form] "Variable of type `Literal["str"]` is not allowed in a type expression"
def _(int_or_str: IntOrStr):
reveal_type(int_or_str) # revealed: Unknown
```
We *do* support stringified annotations if they appear in a position where a type expression is
syntactically expected:
```py
ListOfInts = list["int"]
def _(list_of_ints: ListOfInts):
reveal_type(list_of_ints) # revealed: list[int]
```
## Recursive
### Old union syntax

View file

@ -291,6 +291,20 @@ class Foo(x): ...
reveal_mro(Foo) # revealed: (<class 'Foo'>, Unknown, <class 'object'>)
```
## `UnionType` instances are now allowed as a base
This is not legal:
```py
class A: ...
class B: ...
EitherOr = A | B
# error: [invalid-base] "Invalid class base with type `UnionType`"
class Foo(EitherOr): ...
```
## `__bases__` is a union of a dynamic type and valid bases
If a dynamic type such as `Any` or `Unknown` is one of the elements in the union, and all other

View file

@ -146,13 +146,11 @@ def _(flag: bool):
def _(flag: bool):
x = 1 if flag else "a"
# TODO: this should cause us to emit a diagnostic during
# type checking
# error: [invalid-argument-type] "Argument to function `isinstance` is incorrect: Expected `type | UnionType | tuple[Unknown, ...]`, found `Literal["a"]"
if isinstance(x, "a"):
reveal_type(x) # revealed: Literal[1, "a"]
# TODO: this should cause us to emit a diagnostic during
# type checking
# error: [invalid-argument-type] "Argument to function `isinstance` is incorrect: Expected `type | UnionType | tuple[Unknown, ...]`, found `Literal["int"]"
if isinstance(x, "int"):
reveal_type(x) # revealed: Literal[1, "a"]
```

View file

@ -214,8 +214,7 @@ def flag() -> bool:
t = int if flag() else str
# TODO: this should cause us to emit a diagnostic during
# type checking
# error: [invalid-argument-type] "Argument to function `issubclass` is incorrect: Expected `type | UnionType | tuple[Unknown, ...]`, found `Literal["str"]"
if issubclass(t, "str"):
reveal_type(t) # revealed: <class 'int'> | <class 'str'>

View file

@ -817,13 +817,11 @@ impl<'db> Type<'db> {
}
fn is_none(&self, db: &'db dyn Db) -> bool {
self.as_nominal_instance()
.is_some_and(|instance| instance.has_known_class(db, KnownClass::NoneType))
self.is_instance_of(db, KnownClass::NoneType)
}
fn is_bool(&self, db: &'db dyn Db) -> bool {
self.as_nominal_instance()
.is_some_and(|instance| instance.has_known_class(db, KnownClass::Bool))
self.is_instance_of(db, KnownClass::Bool)
}
fn is_enum(&self, db: &'db dyn Db) -> bool {
@ -857,8 +855,7 @@ impl<'db> Type<'db> {
}
pub(crate) fn is_notimplemented(&self, db: &'db dyn Db) -> bool {
self.as_nominal_instance()
.is_some_and(|instance| instance.has_known_class(db, KnownClass::NotImplementedType))
self.is_instance_of(db, KnownClass::NotImplementedType)
}
pub(crate) const fn is_todo(&self) -> bool {
@ -6436,6 +6433,17 @@ impl<'db> Type<'db> {
invalid_expressions: smallvec::smallvec_inline![InvalidTypeExpression::Generic],
fallback_type: Type::unknown(),
}),
KnownInstanceType::UnionType(union_type) => {
let mut builder = UnionBuilder::new(db);
for element in union_type.elements(db) {
builder = builder.add(element.in_type_expression(
db,
scope_id,
typevar_binding_context,
)?);
}
Ok(builder.build())
}
},
Type::SpecialForm(special_form) => match special_form {
@ -6604,9 +6612,6 @@ impl<'db> Type<'db> {
Some(KnownClass::GenericAlias) => Ok(todo_type!(
"Support for `typing.GenericAlias` instances in type expressions"
)),
Some(KnownClass::UnionType) => Ok(todo_type!(
"Support for `types.UnionType` instances in type expressions"
)),
_ => Err(InvalidTypeExpressionError {
invalid_expressions: smallvec::smallvec_inline![
InvalidTypeExpression::InvalidType(*self, scope_id)
@ -7646,6 +7651,10 @@ pub enum KnownInstanceType<'db> {
/// A constraint set, which is exposed in mdtests as an instance of
/// `ty_extensions.ConstraintSet`.
ConstraintSet(TrackedConstraintSet<'db>),
/// A single instance of `types.UnionType`, which stores the left- and
/// right-hand sides of a PEP 604 union.
UnionType(UnionTypeInstance<'db>),
}
fn walk_known_instance_type<'db, V: visitor::TypeVisitor<'db> + ?Sized>(
@ -7672,6 +7681,11 @@ fn walk_known_instance_type<'db, V: visitor::TypeVisitor<'db> + ?Sized>(
visitor.visit_type(db, default_ty);
}
}
KnownInstanceType::UnionType(union_type) => {
for element in union_type.elements(db) {
visitor.visit_type(db, element);
}
}
}
}
@ -7708,6 +7722,7 @@ impl<'db> KnownInstanceType<'db> {
// Nothing to normalize
Self::ConstraintSet(set)
}
Self::UnionType(union_type) => Self::UnionType(union_type.normalized_impl(db, visitor)),
}
}
@ -7722,6 +7737,7 @@ impl<'db> KnownInstanceType<'db> {
Self::Deprecated(_) => KnownClass::Deprecated,
Self::Field(_) => KnownClass::Field,
Self::ConstraintSet(_) => KnownClass::ConstraintSet,
Self::UnionType(_) => KnownClass::UnionType,
}
}
@ -7795,6 +7811,7 @@ impl<'db> KnownInstanceType<'db> {
constraints.display(self.db)
)
}
KnownInstanceType::UnionType(_) => f.write_str("UnionType"),
}
}
}
@ -8918,6 +8935,34 @@ impl<'db> TypeVarBoundOrConstraints<'db> {
}
}
/// An instance of `types.UnionType`.
///
/// # Ordering
/// Ordering is based on the context's salsa-assigned id and not on its values.
/// The id may change between runs, or when the context was garbage collected and recreated.
#[salsa::interned(debug)]
#[derive(PartialOrd, Ord)]
pub struct UnionTypeInstance<'db> {
left: Type<'db>,
right: Type<'db>,
}
impl get_size2::GetSize for UnionTypeInstance<'_> {}
impl<'db> UnionTypeInstance<'db> {
pub(crate) fn elements(self, db: &'db dyn Db) -> [Type<'db>; 2] {
[self.left(db), self.right(db)]
}
pub(crate) fn normalized_impl(self, db: &'db dyn Db, visitor: &NormalizedVisitor<'db>) -> Self {
UnionTypeInstance::new(
db,
self.left(db).normalized_impl(db, visitor),
self.right(db).normalized_impl(db, visitor),
)
}
}
/// Error returned if a type is not awaitable.
#[derive(Debug)]
enum AwaitError<'db> {

View file

@ -1307,9 +1307,7 @@ impl<'db> Field<'db> {
/// Returns true if this field is a `dataclasses.KW_ONLY` sentinel.
/// <https://docs.python.org/3/library/dataclasses.html#dataclasses.KW_ONLY>
pub(crate) fn is_kw_only_sentinel(&self, db: &'db dyn Db) -> bool {
self.declared_ty
.as_nominal_instance()
.is_some_and(|instance| instance.has_known_class(db, KnownClass::KwOnly))
self.declared_ty.is_instance_of(db, KnownClass::KwOnly)
}
}

View file

@ -170,7 +170,8 @@ impl<'db> ClassBase<'db> {
| KnownInstanceType::TypeVar(_)
| KnownInstanceType::Deprecated(_)
| KnownInstanceType::Field(_)
| KnownInstanceType::ConstraintSet(_) => None,
| KnownInstanceType::ConstraintSet(_)
| KnownInstanceType::UnionType(_) => None,
},
Type::SpecialForm(special_form) => match special_form {

View file

@ -290,7 +290,9 @@ impl<'db> AllMembers<'db> {
}
Type::ClassLiteral(class) if class.is_protocol(db) => continue,
Type::KnownInstance(
KnownInstanceType::TypeVar(_) | KnownInstanceType::TypeAliasType(_),
KnownInstanceType::TypeVar(_)
| KnownInstanceType::TypeAliasType(_)
| KnownInstanceType::UnionType(_),
) => continue,
Type::Dynamic(DynamicType::TodoTypeAlias) => continue,
_ => {}

View file

@ -103,7 +103,7 @@ use crate::types::{
TypeAliasType, TypeAndQualifiers, TypeContext, TypeQualifiers,
TypeVarBoundOrConstraintsEvaluation, TypeVarDefaultEvaluation, TypeVarIdentity,
TypeVarInstance, TypeVarKind, TypeVarVariance, TypedDictType, UnionBuilder, UnionType,
binding_type, todo_type,
UnionTypeInstance, binding_type, todo_type,
};
use crate::types::{ClassBase, add_inferred_python_version_hint_to_diagnostic};
use crate::unpack::{EvaluationMode, UnpackPosition};
@ -8449,19 +8449,48 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
)))
}
// Special-case `X | Y` with `X` and `Y` instances of `type` to produce a `types.UnionType` instance, in order to
// overwrite the typeshed return type for `type.__or__`, which would result in `types.UnionType | X`. We currently
// do this to avoid false positives when a legacy type alias like `IntOrStr = int | str` is later used in a type
// expression, because `types.UnionType` will result in a `@Todo` type, while `types.UnionType | <class 'int'>` does
// not.
//
// TODO: Remove this special case once we add support for legacy type aliases.
// PEP 604-style union types using the `|` operator.
(
Type::ClassLiteral(..) | Type::SubclassOf(..) | Type::GenericAlias(..),
Type::ClassLiteral(..) | Type::SubclassOf(..) | Type::GenericAlias(..),
Type::ClassLiteral(..)
| Type::SubclassOf(..)
| Type::GenericAlias(..)
| Type::SpecialForm(_)
| Type::KnownInstance(KnownInstanceType::UnionType(_)),
_,
ast::Operator::BitOr,
)
| (
_,
Type::ClassLiteral(..)
| Type::SubclassOf(..)
| Type::GenericAlias(..)
| Type::SpecialForm(_)
| Type::KnownInstance(KnownInstanceType::UnionType(_)),
ast::Operator::BitOr,
) if Program::get(self.db()).python_version(self.db()) >= PythonVersion::PY310 => {
Some(KnownClass::UnionType.to_instance(self.db()))
// For a value expression like `int | None`, the inferred type for `None` will be
// a nominal instance of `NoneType`, so we need to convert it to a class literal
// such that it can later be converted back to a nominal instance type when calling
// `.in_type_expression` on the `UnionType` instance.
let convert_none_type = |ty: Type<'db>| {
if ty.is_none(self.db()) {
KnownClass::NoneType.to_class_literal(self.db())
} else {
ty
}
};
if left_ty.is_equivalent_to(self.db(), right_ty) {
Some(left_ty)
} else {
Some(Type::KnownInstance(KnownInstanceType::UnionType(
UnionTypeInstance::new(
self.db(),
convert_none_type(left_ty),
convert_none_type(right_ty),
),
)))
}
}
// We've handled all of the special cases that we support for literals, so we need to

View file

@ -810,6 +810,10 @@ impl<'db> TypeInferenceBuilder<'db, '_> {
self.infer_type_expression(slice);
todo_type!("Generic manual PEP-695 type alias")
}
KnownInstanceType::UnionType(_) => {
self.infer_type_expression(slice);
todo_type!("Generic specialization of types.UnionType")
}
},
Type::Dynamic(DynamicType::Todo(_)) => {
self.infer_type_expression(slice);

View file

@ -95,6 +95,14 @@ impl<'db> Type<'db> {
}
}
/// Return `true` if `self` is a nominal instance of the given known class.
pub(crate) fn is_instance_of(self, db: &'db dyn Db, known_class: KnownClass) -> bool {
match self {
Type::NominalInstance(instance) => instance.class(db).is_known(db, known_class),
_ => false,
}
}
/// Synthesize a protocol instance type with a given set of read-only property members.
pub(super) fn protocol_with_readonly_members<'a, M>(db: &'db dyn Db, members: M) -> Self
where