[red-knot] function signature representation (#14304)

## Summary

Add a typed representation of function signatures (parameters and return
type) and infer it correctly from a function.

Convert existing usage of function return types to use the signature
representation.

This does not yet add inferred types for parameters within function body
scopes based on the annotations, but it should be easy to add as a next
step.

Part of #14161 and #13693.

## Test Plan

Added tests.
This commit is contained in:
Carl Meyer 2024-11-14 15:34:24 -08:00 committed by GitHub
parent ba6c7f6897
commit a48d779c4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 559 additions and 67 deletions

View file

@ -12,6 +12,7 @@ pub(crate) use self::display::TypeArrayDisplay;
pub(crate) use self::infer::{
infer_deferred_types, infer_definition_types, infer_expression_types, infer_scope_types,
};
pub(crate) use self::signatures::Signature;
use crate::module_resolver::file_to_module;
use crate::semantic_index::ast_ids::HasScopedAstId;
use crate::semantic_index::definition::Definition;
@ -35,6 +36,7 @@ mod display;
mod infer;
mod mro;
mod narrow;
mod signatures;
mod unpacker;
#[salsa::tracked(return_ref)]
@ -1271,11 +1273,11 @@ impl<'db> Type<'db> {
Type::FunctionLiteral(function_type) => {
if function_type.is_known(db, KnownFunction::RevealType) {
CallOutcome::revealed(
function_type.return_ty(db),
function_type.signature(db).return_ty,
*arg_types.first().unwrap_or(&Type::Unknown),
)
} else {
CallOutcome::callable(function_type.return_ty(db))
CallOutcome::callable(function_type.signature(db).return_ty)
}
}
@ -1461,6 +1463,24 @@ impl<'db> Type<'db> {
}
}
/// If we see a value of this type used as a type expression, what type does it name?
///
/// For example, the builtin `int` as a value expression is of type
/// `Type::ClassLiteral(builtins.int)`, that is, it is the `int` class itself. As a type
/// expression, it names the type `Type::Instance(builtins.int)`, that is, all objects whose
/// `__class__` is `int`.
#[must_use]
pub fn in_type_expression(&self, db: &'db dyn Db) -> Type<'db> {
match self {
Type::ClassLiteral(_) | Type::SubclassOf(_) => self.to_instance(db),
Type::Union(union) => union.map(db, |element| element.in_type_expression(db)),
Type::Unknown => Type::Unknown,
// TODO map this to a new `Type::TypeVar` variant
Type::KnownInstance(KnownInstanceType::TypeVar(_)) => *self,
_ => Type::Todo,
}
}
/// The type `NoneType` / `None`
pub fn none(db: &'db dyn Db) -> Type<'db> {
KnownClass::NoneType.to_instance(db)
@ -2322,7 +2342,10 @@ impl<'db> FunctionType<'db> {
self.decorators(db).contains(&decorator)
}
/// inferred return type for this function
/// Typed externally-visible signature for this function.
///
/// This is the signature as seen by external callers, possibly modified by decorators and/or
/// overloaded.
///
/// ## Why is this a salsa query?
///
@ -2331,34 +2354,32 @@ impl<'db> FunctionType<'db> {
///
/// Were this not a salsa query, then the calling query
/// would depend on the function's AST and rerun for every change in that file.
#[salsa::tracked]
pub fn return_ty(self, db: &'db dyn Db) -> Type<'db> {
#[salsa::tracked(return_ref)]
pub fn signature(self, db: &'db dyn Db) -> Signature<'db> {
let function_stmt_node = self.body_scope(db).node(db).expect_function();
let internal_signature = self.internal_signature(db);
if function_stmt_node.decorator_list.is_empty() {
return internal_signature;
}
// TODO process the effect of decorators on the signature
Signature::todo()
}
/// Typed internally-visible signature for this function.
///
/// This represents the annotations on the function itself, unmodified by decorators and
/// overloads.
///
/// These are the parameter and return types that should be used for type checking the body of
/// the function.
///
/// Don't call this when checking any other file; only when type-checking the function body
/// scope.
fn internal_signature(self, db: &'db dyn Db) -> Signature<'db> {
let scope = self.body_scope(db);
let function_stmt_node = scope.node(db).expect_function();
// TODO if a function `bar` is decorated by `foo`,
// where `foo` is annotated as returning a type `X` that is a subtype of `Callable`,
// we need to infer the return type from `X`'s return annotation
// rather than from `bar`'s return annotation
// in order to determine the type that `bar` returns
if !function_stmt_node.decorator_list.is_empty() {
return Type::Todo;
}
function_stmt_node
.returns
.as_ref()
.map(|returns| {
if function_stmt_node.is_async {
// TODO: generic `types.CoroutineType`!
Type::Todo
} else {
let definition =
semantic_index(db, scope.file(db)).definition(function_stmt_node);
definition_expression_ty(db, definition, returns.as_ref())
}
})
.unwrap_or(Type::Unknown)
let definition = semantic_index(db, scope.file(db)).definition(function_stmt_node);
Signature::from_function(db, definition, function_stmt_node)
}
pub fn is_known(self, db: &'db dyn Db, known_function: KnownFunction) -> bool {

View file

@ -85,7 +85,7 @@ impl Display for DisplayRepresentation<'_> {
Type::SubclassOf(SubclassOfType { class }) => {
write!(f, "type[{}]", class.name(self.db))
}
Type::KnownInstance(known_instance) => f.write_str(known_instance.as_str()),
Type::KnownInstance(known_instance) => f.write_str(known_instance.repr(self.db)),
Type::FunctionLiteral(function) => f.write_str(function.name(self.db)),
Type::Union(union) => union.display(self.db).fmt(f),
Type::Intersection(intersection) => intersection.display(self.db).fmt(f),

View file

@ -822,8 +822,7 @@ impl<'db> TypeInferenceBuilder<'db> {
.as_deref()
.expect("function type params scope without type params");
// TODO: defer annotation resolution in stubs, with __future__.annotations, or stringified
self.infer_optional_expression(function.returns.as_deref());
self.infer_optional_annotation_expression(function.returns.as_deref());
self.infer_type_parameters(type_params);
self.infer_parameters(&function.parameters);
}
@ -915,13 +914,11 @@ impl<'db> TypeInferenceBuilder<'db> {
// If there are type params, parameters and returns are evaluated in that scope, that is, in
// `infer_function_type_params`, rather than here.
if type_params.is_none() {
self.infer_parameters(parameters);
// TODO: this should also be applied to parameter annotations.
if self.are_all_types_deferred() {
self.types.has_deferred = true;
} else {
self.infer_optional_annotation_expression(returns.as_deref());
self.infer_parameters(parameters);
}
}
@ -971,7 +968,7 @@ impl<'db> TypeInferenceBuilder<'db> {
default: _,
} = parameter_with_default;
self.infer_optional_expression(parameter.annotation.as_deref());
self.infer_optional_annotation_expression(parameter.annotation.as_deref());
}
fn infer_parameter(&mut self, parameter: &ast::Parameter) {
@ -981,7 +978,7 @@ impl<'db> TypeInferenceBuilder<'db> {
annotation,
} = parameter;
self.infer_optional_expression(annotation.as_deref());
self.infer_optional_annotation_expression(annotation.as_deref());
}
fn infer_parameter_with_default_definition(
@ -1069,6 +1066,7 @@ impl<'db> TypeInferenceBuilder<'db> {
fn infer_function_deferred(&mut self, function: &ast::StmtFunctionDef) {
self.infer_optional_annotation_expression(function.returns.as_deref());
self.infer_parameters(function.parameters.as_ref());
}
fn infer_class_deferred(&mut self, class: &ast::StmtClassDef) {
@ -4099,7 +4097,9 @@ impl<'db> TypeInferenceBuilder<'db> {
match expression {
ast::Expr::Name(name) => match name.ctx {
ast::ExprContext::Load => self.infer_name_expression(name).to_instance(self.db),
ast::ExprContext::Load => {
self.infer_name_expression(name).in_type_expression(self.db)
}
ast::ExprContext::Invalid => Type::Unknown,
ast::ExprContext::Store | ast::ExprContext::Del => Type::Todo,
},
@ -4107,7 +4107,7 @@ impl<'db> TypeInferenceBuilder<'db> {
ast::Expr::Attribute(attribute_expression) => match attribute_expression.ctx {
ast::ExprContext::Load => self
.infer_attribute_expression(attribute_expression)
.to_instance(self.db),
.in_type_expression(self.db),
ast::ExprContext::Invalid => Type::Unknown,
ast::ExprContext::Store | ast::ExprContext::Del => Type::Todo,
},
@ -5019,24 +5019,8 @@ mod tests {
",
)?;
// TODO: sys.version_info, and need to understand @final and @type_check_only
assert_public_ty(&db, "src/a.py", "x", "EllipsisType | Unknown");
Ok(())
}
#[test]
fn function_return_type() -> anyhow::Result<()> {
let mut db = setup_db();
db.write_file("src/a.py", "def example() -> int: return 42")?;
let mod_file = system_path_to_file(&db, "src/a.py").unwrap();
let function = global_symbol(&db, mod_file, "example")
.expect_type()
.expect_function_literal();
let returns = function.return_ty(&db);
assert_eq!(returns.display(&db).to_string(), "int");
// TODO: sys.version_info
assert_public_ty(&db, "src/a.py", "x", "EllipsisType | ellipsis");
Ok(())
}
@ -5251,7 +5235,7 @@ mod tests {
fn deferred_annotations_regular_source_fails() -> anyhow::Result<()> {
let mut db = setup_db();
// In (regular) source files, deferred annotations are *not* resolved
// In (regular) source files, annotations are *not* deferred
// Also tests imports from `__future__` that are not annotations
db.write_dedented(
"/src/source.py",

View file

@ -0,0 +1,480 @@
#![allow(dead_code)]
use super::{definition_expression_ty, Type};
use crate::semantic_index::definition::Definition;
use crate::Db;
use ruff_python_ast::{self as ast, name::Name};
/// A typed callable signature.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct Signature<'db> {
parameters: Parameters<'db>,
/// Annotated return type (Unknown if no annotation.)
pub(crate) return_ty: Type<'db>,
}
impl<'db> Signature<'db> {
/// Return a todo signature: (*args: Todo, **kwargs: Todo) -> Todo
pub(crate) fn todo() -> Self {
Self {
parameters: Parameters::todo(),
return_ty: Type::Todo,
}
}
/// Return a typed signature from a function definition.
pub(super) fn from_function(
db: &'db dyn Db,
definition: Definition<'db>,
function_node: &'db ast::StmtFunctionDef,
) -> Self {
let return_ty = function_node
.returns
.as_ref()
.map(|returns| {
if function_node.is_async {
// TODO: generic `types.CoroutineType`!
Type::Todo
} else {
definition_expression_ty(db, definition, returns.as_ref())
}
})
.unwrap_or(Type::Unknown);
Self {
parameters: Parameters::from_parameters(
db,
definition,
function_node.parameters.as_ref(),
),
return_ty,
}
}
}
/// The parameters portion of a typed signature.
///
/// The ordering of parameters is always as given in this struct: first positional-only parameters,
/// then positional-or-keyword, then optionally the variadic parameter, then keyword-only
/// parameters, and last, optionally the variadic keywords parameter.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(super) struct Parameters<'db> {
/// Parameters which may only be filled by positional arguments.
positional_only: Box<[ParameterWithDefault<'db>]>,
/// Parameters which may be filled by positional or keyword arguments.
positional_or_keyword: Box<[ParameterWithDefault<'db>]>,
/// The `*args` variadic parameter, if any.
variadic: Option<Parameter<'db>>,
/// Parameters which may only be filled by keyword arguments.
keyword_only: Box<[ParameterWithDefault<'db>]>,
/// The `**kwargs` variadic keywords parameter, if any.
keywords: Option<Parameter<'db>>,
}
impl<'db> Parameters<'db> {
/// Return todo parameters: (*args: Todo, **kwargs: Todo)
fn todo() -> Self {
Self {
variadic: Some(Parameter {
name: Some(Name::new_static("args")),
annotated_ty: Type::Todo,
}),
keywords: Some(Parameter {
name: Some(Name::new_static("kwargs")),
annotated_ty: Type::Todo,
}),
..Default::default()
}
}
fn from_parameters(
db: &'db dyn Db,
definition: Definition<'db>,
parameters: &'db ast::Parameters,
) -> Self {
let ast::Parameters {
posonlyargs,
args,
vararg,
kwonlyargs,
kwarg,
range: _,
} = parameters;
let positional_only = posonlyargs
.iter()
.map(|arg| ParameterWithDefault::from_node(db, definition, arg))
.collect();
let positional_or_keyword = args
.iter()
.map(|arg| ParameterWithDefault::from_node(db, definition, arg))
.collect();
let variadic = vararg
.as_ref()
.map(|arg| Parameter::from_node(db, definition, arg));
let keyword_only = kwonlyargs
.iter()
.map(|arg| ParameterWithDefault::from_node(db, definition, arg))
.collect();
let keywords = kwarg
.as_ref()
.map(|arg| Parameter::from_node(db, definition, arg));
Self {
positional_only,
positional_or_keyword,
variadic,
keyword_only,
keywords,
}
}
}
/// A single parameter of a typed signature, with optional default value.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct ParameterWithDefault<'db> {
parameter: Parameter<'db>,
/// Type of the default value, if any.
default_ty: Option<Type<'db>>,
}
impl<'db> ParameterWithDefault<'db> {
fn from_node(
db: &'db dyn Db,
definition: Definition<'db>,
parameter_with_default: &'db ast::ParameterWithDefault,
) -> Self {
Self {
default_ty: parameter_with_default
.default
.as_deref()
.map(|default| definition_expression_ty(db, definition, default)),
parameter: Parameter::from_node(db, definition, &parameter_with_default.parameter),
}
}
}
/// A single parameter of a typed signature.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct Parameter<'db> {
/// Parameter name.
///
/// It is possible for signatures to be defined in ways that leave positional-only parameters
/// nameless (e.g. via `Callable` annotations).
name: Option<Name>,
/// Annotated type of the parameter (Unknown if no annotation.)
annotated_ty: Type<'db>,
}
impl<'db> Parameter<'db> {
fn from_node(
db: &'db dyn Db,
definition: Definition<'db>,
parameter: &'db ast::Parameter,
) -> Self {
Parameter {
name: Some(parameter.name.id.clone()),
annotated_ty: parameter
.annotation
.as_deref()
.map(|annotation| definition_expression_ty(db, definition, annotation))
.unwrap_or(Type::Unknown),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::db::tests::TestDb;
use crate::program::{Program, SearchPathSettings};
use crate::python_version::PythonVersion;
use crate::types::{global_symbol, FunctionType};
use crate::ProgramSettings;
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
pub(crate) fn setup_db() -> TestDb {
let db = TestDb::new();
let src_root = SystemPathBuf::from("/src");
db.memory_file_system()
.create_directory_all(&src_root)
.unwrap();
Program::from_settings(
&db,
&ProgramSettings {
target_version: PythonVersion::default(),
search_paths: SearchPathSettings::new(src_root),
},
)
.expect("Valid search path settings");
db
}
#[track_caller]
fn get_function_f<'db>(db: &'db TestDb, file: &'static str) -> FunctionType<'db> {
let module = ruff_db::files::system_path_to_file(db, file).unwrap();
global_symbol(db, module, "f")
.expect_type()
.expect_function_literal()
}
#[track_caller]
fn assert_param_with_default<'db>(
db: &'db TestDb,
param_with_default: &ParameterWithDefault<'db>,
expected_name: &'static str,
expected_annotation_ty_display: &'static str,
expected_default_ty_display: Option<&'static str>,
) {
assert_eq!(
param_with_default
.default_ty
.map(|ty| ty.display(db).to_string()),
expected_default_ty_display.map(ToString::to_string)
);
assert_param(
db,
&param_with_default.parameter,
expected_name,
expected_annotation_ty_display,
);
}
#[track_caller]
fn assert_param<'db>(
db: &'db TestDb,
param: &Parameter<'db>,
expected_name: &'static str,
expected_annotation_ty_display: &'static str,
) {
assert_eq!(param.name.as_ref().unwrap(), expected_name);
assert_eq!(
param.annotated_ty.display(db).to_string(),
expected_annotation_ty_display
);
}
#[test]
fn empty() {
let mut db = setup_db();
db.write_dedented("/src/a.py", "def f(): ...").unwrap();
let func = get_function_f(&db, "/src/a.py");
let sig = func.internal_signature(&db);
assert_eq!(sig.return_ty.display(&db).to_string(), "Unknown");
let params = sig.parameters;
assert!(params.positional_only.is_empty());
assert!(params.positional_or_keyword.is_empty());
assert!(params.variadic.is_none());
assert!(params.keyword_only.is_empty());
assert!(params.keywords.is_none());
}
#[test]
#[allow(clippy::many_single_char_names)]
fn full() {
let mut db = setup_db();
db.write_dedented(
"/src/a.py",
"
def f(a, b: int, c = 1, d: int = 2, /,
e = 3, f: Literal[4] = 4, *args: object,
g = 5, h: Literal[6] = 6, **kwargs: str) -> bytes: ...
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.py");
let sig = func.internal_signature(&db);
assert_eq!(sig.return_ty.display(&db).to_string(), "bytes");
let params = sig.parameters;
let [a, b, c, d] = &params.positional_only[..] else {
panic!("expected four positional-only parameters");
};
let [e, f] = &params.positional_or_keyword[..] else {
panic!("expected two positional-or-keyword parameters");
};
let Some(args) = params.variadic else {
panic!("expected a variadic parameter");
};
let [g, h] = &params.keyword_only[..] else {
panic!("expected two keyword-only parameters");
};
let Some(kwargs) = params.keywords else {
panic!("expected a kwargs parameter");
};
assert_param_with_default(&db, a, "a", "Unknown", None);
assert_param_with_default(&db, b, "b", "int", None);
assert_param_with_default(&db, c, "c", "Unknown", Some("Literal[1]"));
assert_param_with_default(&db, d, "d", "int", Some("Literal[2]"));
assert_param_with_default(&db, e, "e", "Unknown", Some("Literal[3]"));
assert_param_with_default(&db, f, "f", "Literal[4]", Some("Literal[4]"));
assert_param_with_default(&db, g, "g", "Unknown", Some("Literal[5]"));
assert_param_with_default(&db, h, "h", "Literal[6]", Some("Literal[6]"));
assert_param(&db, &args, "args", "object");
assert_param(&db, &kwargs, "kwargs", "str");
}
#[test]
fn not_deferred() {
let mut db = setup_db();
db.write_dedented(
"/src/a.py",
"
class A: ...
class B: ...
alias = A
def f(a: alias): ...
alias = B
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.py");
let sig = func.internal_signature(&db);
let [a] = &sig.parameters.positional_or_keyword[..] else {
panic!("expected one positional-or-keyword parameter");
};
// Parameter resolution not deferred; we should see A not B
assert_param_with_default(&db, a, "a", "A", None);
}
#[test]
fn deferred_in_stub() {
let mut db = setup_db();
db.write_dedented(
"/src/a.pyi",
"
class A: ...
class B: ...
alias = A
def f(a: alias): ...
alias = B
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.pyi");
let sig = func.internal_signature(&db);
let [a] = &sig.parameters.positional_or_keyword[..] else {
panic!("expected one positional-or-keyword parameter");
};
// Parameter resolution deferred; we should see B
assert_param_with_default(&db, a, "a", "B", None);
}
#[test]
fn generic_not_deferred() {
let mut db = setup_db();
db.write_dedented(
"/src/a.py",
"
class A: ...
class B: ...
alias = A
def f[T](a: alias, b: T) -> T: ...
alias = B
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.py");
let sig = func.internal_signature(&db);
let [a, b] = &sig.parameters.positional_or_keyword[..] else {
panic!("expected two positional-or-keyword parameters");
};
// TODO resolution should not be deferred; we should see A not B
assert_param_with_default(&db, a, "a", "B", None);
assert_param_with_default(&db, b, "b", "T", None);
}
#[test]
fn generic_deferred_in_stub() {
let mut db = setup_db();
db.write_dedented(
"/src/a.pyi",
"
class A: ...
class B: ...
alias = A
def f[T](a: alias, b: T) -> T: ...
alias = B
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.pyi");
let sig = func.internal_signature(&db);
let [a, b] = &sig.parameters.positional_or_keyword[..] else {
panic!("expected two positional-or-keyword parameters");
};
// Parameter resolution deferred; we should see B
assert_param_with_default(&db, a, "a", "B", None);
assert_param_with_default(&db, b, "b", "T", None);
}
#[test]
fn external_signature_no_decorator() {
let mut db = setup_db();
db.write_dedented(
"/src/a.py",
"
def f(a: int) -> int: ...
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.py");
let expected_sig = func.internal_signature(&db);
// With no decorators, internal and external signature are the same
assert_eq!(func.signature(&db), &expected_sig);
}
#[test]
fn external_signature_decorated() {
let mut db = setup_db();
db.write_dedented(
"/src/a.py",
"
def deco(func): ...
@deco
def f(a: int) -> int: ...
",
)
.unwrap();
let func = get_function_f(&db, "/src/a.py");
let expected_sig = Signature::todo();
// With no decorators, internal and external signature are the same
assert_eq!(func.signature(&db), &expected_sig);
}
}