[red-knot] add Declarations support to semantic indexing (#13334)

Add support for declared types to the semantic index. This involves a
lot of renaming to clarify the distinction between bindings and
declarations. The Definition (or more specifically, the DefinitionKind)
becomes responsible for determining which definitions are bindings,
which are declarations, and which are both, and the symbol table
building is refactored a bit so that the `IS_BOUND` (renamed from
`IS_DEFINED` for consistent terminology) flag is always set when a
binding is added, rather than being set separately (and requiring us to
ensure it is set properly).

The `SymbolState` is split into two parts, `SymbolBindings` and
`SymbolDeclarations`, because we need to store live bindings for every
declaration and live declarations for every binding; the split lets us
do this without storing more than we need.

The massive doc comment in `use_def.rs` is updated to reflect bindings
vs declarations.

The `UseDefMap` gains some new APIs which are allow-unused for now,
since this PR doesn't yet update type inference to take declarations
into account.
This commit is contained in:
Carl Meyer 2024-09-13 13:55:22 -04:00 committed by GitHub
parent 8558126df1
commit d988204b1b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 962 additions and 478 deletions

View file

@ -27,7 +27,7 @@ pub mod expression;
pub mod symbol;
mod use_def;
pub(crate) use self::use_def::{DefinitionWithConstraints, DefinitionWithConstraintsIterator};
pub(crate) use self::use_def::{BindingWithConstraints, BindingWithConstraintsIterator};
type SymbolMap = hashbrown::HashMap<ScopedSymbolId, (), ()>;
@ -326,16 +326,16 @@ mod tests {
use crate::Db;
impl UseDefMap<'_> {
fn first_public_definition(&self, symbol: ScopedSymbolId) -> Option<Definition<'_>> {
self.public_definitions(symbol)
fn first_public_binding(&self, symbol: ScopedSymbolId) -> Option<Definition<'_>> {
self.public_bindings(symbol)
.next()
.map(|constrained_definition| constrained_definition.definition)
.map(|constrained_binding| constrained_binding.binding)
}
fn first_use_definition(&self, use_id: ScopedUseId) -> Option<Definition<'_>> {
self.use_definitions(use_id)
fn first_binding_at_use(&self, use_id: ScopedUseId) -> Option<Definition<'_>> {
self.bindings_at_use(use_id)
.next()
.map(|constrained_definition| constrained_definition.definition)
.map(|constrained_binding| constrained_binding.binding)
}
}
@ -397,8 +397,8 @@ mod tests {
let foo = global_table.symbol_id_by_name("foo").unwrap();
let use_def = use_def_map(&db, scope);
let definition = use_def.first_public_definition(foo).unwrap();
assert!(matches!(definition.node(&db), DefinitionKind::Import(_)));
let binding = use_def.first_public_binding(foo).unwrap();
assert!(matches!(binding.kind(&db), DefinitionKind::Import(_)));
}
#[test]
@ -427,22 +427,19 @@ mod tests {
assert!(
global_table
.symbol_by_name("foo")
.is_some_and(|symbol| { symbol.is_defined() && !symbol.is_used() }),
.is_some_and(|symbol| { symbol.is_bound() && !symbol.is_used() }),
"symbols that are defined get the defined flag"
);
let use_def = use_def_map(&db, scope);
let definition = use_def
.first_public_definition(
let binding = use_def
.first_public_binding(
global_table
.symbol_id_by_name("foo")
.expect("symbol to exist"),
)
.unwrap();
assert!(matches!(
definition.node(&db),
DefinitionKind::ImportFrom(_)
));
assert!(matches!(binding.kind(&db), DefinitionKind::ImportFrom(_)));
}
#[test]
@ -455,17 +452,14 @@ mod tests {
assert!(
global_table
.symbol_by_name("foo")
.is_some_and(|symbol| { !symbol.is_defined() && symbol.is_used() }),
"a symbol used but not defined in a scope should have only the used flag"
.is_some_and(|symbol| { !symbol.is_bound() && symbol.is_used() }),
"a symbol used but not bound in a scope should have only the used flag"
);
let use_def = use_def_map(&db, scope);
let definition = use_def
.first_public_definition(global_table.symbol_id_by_name("x").expect("symbol exists"))
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name("x").expect("symbol exists"))
.unwrap();
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
assert!(matches!(binding.kind(&db), DefinitionKind::Assignment(_)));
}
#[test]
@ -477,12 +471,12 @@ mod tests {
assert_eq!(names(&global_table), vec!["x"]);
let use_def = use_def_map(&db, scope);
let definition = use_def
.first_public_definition(global_table.symbol_id_by_name("x").unwrap())
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name("x").unwrap())
.unwrap();
assert!(matches!(
definition.node(&db),
binding.kind(&db),
DefinitionKind::AugmentedAssignment(_)
));
}
@ -515,13 +509,10 @@ y = 2
assert_eq!(names(&class_table), vec!["x"]);
let use_def = index.use_def_map(class_scope_id);
let definition = use_def
.first_public_definition(class_table.symbol_id_by_name("x").expect("symbol exists"))
let binding = use_def
.first_public_binding(class_table.symbol_id_by_name("x").expect("symbol exists"))
.unwrap();
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
assert!(matches!(binding.kind(&db), DefinitionKind::Assignment(_)));
}
#[test]
@ -551,17 +542,14 @@ y = 2
assert_eq!(names(&function_table), vec!["x"]);
let use_def = index.use_def_map(function_scope_id);
let definition = use_def
.first_public_definition(
let binding = use_def
.first_public_binding(
function_table
.symbol_id_by_name("x")
.expect("symbol exists"),
)
.unwrap();
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
assert!(matches!(binding.kind(&db), DefinitionKind::Assignment(_)));
}
#[test]
@ -593,27 +581,27 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs):
let use_def = index.use_def_map(function_scope_id);
for name in ["a", "b", "c", "d"] {
let definition = use_def
.first_public_definition(
let binding = use_def
.first_public_binding(
function_table
.symbol_id_by_name(name)
.expect("symbol exists"),
)
.unwrap();
assert!(matches!(
definition.node(&db),
binding.kind(&db),
DefinitionKind::ParameterWithDefault(_)
));
}
for name in ["args", "kwargs"] {
let definition = use_def
.first_public_definition(
let binding = use_def
.first_public_binding(
function_table
.symbol_id_by_name(name)
.expect("symbol exists"),
)
.unwrap();
assert!(matches!(definition.node(&db), DefinitionKind::Parameter(_)));
assert!(matches!(binding.kind(&db), DefinitionKind::Parameter(_)));
}
}
@ -641,23 +629,19 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs):
let use_def = index.use_def_map(lambda_scope_id);
for name in ["a", "b", "c", "d"] {
let definition = use_def
.first_public_definition(
lambda_table.symbol_id_by_name(name).expect("symbol exists"),
)
let binding = use_def
.first_public_binding(lambda_table.symbol_id_by_name(name).expect("symbol exists"))
.unwrap();
assert!(matches!(
definition.node(&db),
binding.kind(&db),
DefinitionKind::ParameterWithDefault(_)
));
}
for name in ["args", "kwargs"] {
let definition = use_def
.first_public_definition(
lambda_table.symbol_id_by_name(name).expect("symbol exists"),
)
let binding = use_def
.first_public_binding(lambda_table.symbol_id_by_name(name).expect("symbol exists"))
.unwrap();
assert!(matches!(definition.node(&db), DefinitionKind::Parameter(_)));
assert!(matches!(binding.kind(&db), DefinitionKind::Parameter(_)));
}
}
@ -695,15 +679,15 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs):
let use_def = index.use_def_map(comprehension_scope_id);
for name in ["x", "y"] {
let definition = use_def
.first_public_definition(
let binding = use_def
.first_public_binding(
comprehension_symbol_table
.symbol_id_by_name(name)
.expect("symbol exists"),
)
.unwrap();
assert!(matches!(
definition.node(&db),
binding.kind(&db),
DefinitionKind::Comprehension(_)
));
}
@ -742,8 +726,8 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs):
let element_use_id =
element.scoped_use_id(&db, comprehension_scope_id.to_scope_id(&db, file));
let definition = use_def.first_use_definition(element_use_id).unwrap();
let DefinitionKind::Comprehension(comprehension) = definition.node(&db) else {
let binding = use_def.first_binding_at_use(element_use_id).unwrap();
let DefinitionKind::Comprehension(comprehension) = binding.kind(&db) else {
panic!("expected generator definition")
};
let target = comprehension.target();
@ -822,12 +806,10 @@ with item1 as x, item2 as y:
let use_def = index.use_def_map(FileScopeId::global());
for name in ["x", "y"] {
let Some(definition) = use_def.first_public_definition(
global_table.symbol_id_by_name(name).expect("symbol exists"),
) else {
panic!("Expected with item definition for {name}");
};
assert!(matches!(definition.node(&db), DefinitionKind::WithItem(_)));
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists"))
.expect("Expected with item definition for {name}");
assert!(matches!(binding.kind(&db), DefinitionKind::WithItem(_)));
}
}
@ -847,12 +829,10 @@ with context() as (x, y):
let use_def = index.use_def_map(FileScopeId::global());
for name in ["x", "y"] {
let Some(definition) = use_def.first_public_definition(
global_table.symbol_id_by_name(name).expect("symbol exists"),
) else {
panic!("Expected with item definition for {name}");
};
assert!(matches!(definition.node(&db), DefinitionKind::WithItem(_)));
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists"))
.expect("Expected with item definition for {name}");
assert!(matches!(binding.kind(&db), DefinitionKind::WithItem(_)));
}
}
@ -889,14 +869,14 @@ def func():
assert_eq!(names(&func2_table), vec!["y"]);
let use_def = index.use_def_map(FileScopeId::global());
let definition = use_def
.first_public_definition(
let binding = use_def
.first_public_binding(
global_table
.symbol_id_by_name("func")
.expect("symbol exists"),
)
.unwrap();
assert!(matches!(definition.node(&db), DefinitionKind::Function(_)));
assert!(matches!(binding.kind(&db), DefinitionKind::Function(_)));
}
#[test]
@ -964,7 +944,7 @@ class C[T]:
assert!(
ann_table
.symbol_by_name("T")
.is_some_and(|s| s.is_defined() && !s.is_used()),
.is_some_and(|s| s.is_bound() && !s.is_used()),
"type parameters are defined by the scope that introduces them"
);
@ -996,8 +976,8 @@ class C[T]:
};
let x_use_id = x_use_expr_name.scoped_use_id(&db, scope);
let use_def = use_def_map(&db, scope);
let definition = use_def.first_use_definition(x_use_id).unwrap();
let DefinitionKind::Assignment(assignment) = definition.node(&db) else {
let binding = use_def.first_binding_at_use(x_use_id).unwrap();
let DefinitionKind::Assignment(assignment) = binding.kind(&db) else {
panic!("should be an assignment definition")
};
let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
@ -1127,12 +1107,10 @@ match subject:
("k", 0),
("l", 1),
] {
let definition = use_def
.first_public_definition(
global_table.symbol_id_by_name(name).expect("symbol exists"),
)
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists"))
.expect("Expected with item definition for {name}");
if let DefinitionKind::MatchPattern(pattern) = definition.node(&db) {
if let DefinitionKind::MatchPattern(pattern) = binding.kind(&db) {
assert_eq!(pattern.index(), expected_index);
} else {
panic!("Expected match pattern definition for {name}");
@ -1159,12 +1137,10 @@ match 1:
let use_def = use_def_map(&db, global_scope_id);
for (name, expected_index) in [("first", 0), ("second", 0)] {
let definition = use_def
.first_public_definition(
global_table.symbol_id_by_name(name).expect("symbol exists"),
)
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists"))
.expect("Expected with item definition for {name}");
if let DefinitionKind::MatchPattern(pattern) = definition.node(&db) {
if let DefinitionKind::MatchPattern(pattern) = binding.kind(&db) {
assert_eq!(pattern.index(), expected_index);
} else {
panic!("Expected match pattern definition for {name}");
@ -1181,11 +1157,11 @@ match 1:
assert_eq!(&names(&global_table), &["a", "x"]);
let use_def = use_def_map(&db, scope);
let definition = use_def
.first_public_definition(global_table.symbol_id_by_name("x").unwrap())
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name("x").unwrap())
.unwrap();
assert!(matches!(definition.node(&db), DefinitionKind::For(_)));
assert!(matches!(binding.kind(&db), DefinitionKind::For(_)));
}
#[test]
@ -1197,15 +1173,15 @@ match 1:
assert_eq!(&names(&global_table), &["a", "x", "y"]);
let use_def = use_def_map(&db, scope);
let x_definition = use_def
.first_public_definition(global_table.symbol_id_by_name("x").unwrap())
let x_binding = use_def
.first_public_binding(global_table.symbol_id_by_name("x").unwrap())
.unwrap();
let y_definition = use_def
.first_public_definition(global_table.symbol_id_by_name("y").unwrap())
let y_binding = use_def
.first_public_binding(global_table.symbol_id_by_name("y").unwrap())
.unwrap();
assert!(matches!(x_definition.node(&db), DefinitionKind::For(_)));
assert!(matches!(y_definition.node(&db), DefinitionKind::For(_)));
assert!(matches!(x_binding.kind(&db), DefinitionKind::For(_)));
assert!(matches!(y_binding.kind(&db), DefinitionKind::For(_)));
}
#[test]
@ -1217,10 +1193,10 @@ match 1:
assert_eq!(&names(&global_table), &["e", "a", "b", "c", "d"]);
let use_def = use_def_map(&db, scope);
let definition = use_def
.first_public_definition(global_table.symbol_id_by_name("a").unwrap())
let binding = use_def
.first_public_binding(global_table.symbol_id_by_name("a").unwrap())
.unwrap();
assert!(matches!(definition.node(&db), DefinitionKind::For(_)));
assert!(matches!(binding.kind(&db), DefinitionKind::For(_)));
}
}

View file

@ -19,7 +19,7 @@ use crate::semantic_index::definition::{
};
use crate::semantic_index::expression::Expression;
use crate::semantic_index::symbol::{
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolFlags,
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId,
SymbolTableBuilder,
};
use crate::semantic_index::use_def::{FlowSnapshot, UseDefMapBuilder};
@ -28,7 +28,8 @@ use crate::Db;
use super::constraint::{Constraint, PatternConstraint};
use super::definition::{
ExceptHandlerDefinitionNodeRef, MatchPatternDefinitionNodeRef, WithItemDefinitionNodeRef,
DefinitionCategory, ExceptHandlerDefinitionNodeRef, MatchPatternDefinitionNodeRef,
WithItemDefinitionNodeRef,
};
pub(super) struct SemanticIndexBuilder<'db> {
@ -168,31 +169,38 @@ impl<'db> SemanticIndexBuilder<'db> {
self.current_use_def_map_mut().merge(state);
}
fn add_or_update_symbol(&mut self, name: Name, flags: SymbolFlags) -> ScopedSymbolId {
let symbol_table = self.current_symbol_table();
let (symbol_id, added) = symbol_table.add_or_update_symbol(name, flags);
fn add_symbol(&mut self, name: Name) -> ScopedSymbolId {
let (symbol_id, added) = self.current_symbol_table().add_symbol(name);
if added {
let use_def_map = self.current_use_def_map_mut();
use_def_map.add_symbol(symbol_id);
self.current_use_def_map_mut().add_symbol(symbol_id);
}
symbol_id
}
fn mark_symbol_bound(&mut self, id: ScopedSymbolId) {
self.current_symbol_table().mark_symbol_bound(id);
}
fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
self.current_symbol_table().mark_symbol_used(id);
}
fn add_definition<'a>(
&mut self,
symbol: ScopedSymbolId,
definition_node: impl Into<DefinitionNodeRef<'a>>,
) -> Definition<'db> {
let definition_node: DefinitionNodeRef<'_> = definition_node.into();
#[allow(unsafe_code)]
// SAFETY: `definition_node` is guaranteed to be a child of `self.module`
let kind = unsafe { definition_node.into_owned(self.module.clone()) };
let category = kind.category();
let definition = Definition::new(
self.db,
self.file,
self.current_scope(),
symbol,
#[allow(unsafe_code)]
unsafe {
definition_node.into_owned(self.module.clone())
},
kind,
countme::Count::default(),
);
@ -201,8 +209,18 @@ impl<'db> SemanticIndexBuilder<'db> {
.insert(definition_node.key(), definition);
debug_assert_eq!(existing_definition, None);
self.current_use_def_map_mut()
.record_definition(symbol, definition);
if category.is_binding() {
self.mark_symbol_bound(symbol);
}
let use_def = self.current_use_def_map_mut();
match category {
DefinitionCategory::DeclarationAndBinding => {
use_def.record_declaration_and_binding(symbol, definition);
}
DefinitionCategory::Declaration => use_def.record_declaration(symbol, definition),
DefinitionCategory::Binding => use_def.record_binding(symbol, definition),
}
definition
}
@ -284,10 +302,13 @@ impl<'db> SemanticIndexBuilder<'db> {
..
}) => (name, &None, default),
};
// TODO create Definition for typevars
self.add_or_update_symbol(name.id.clone(), SymbolFlags::IS_DEFINED);
if let Some(bound) = bound {
self.visit_expr(bound);
let symbol = self.add_symbol(name.id.clone());
// TODO create Definition for PEP 695 typevars
// note that the "bound" on the typevar is a totally different thing than whether
// or not a name is "bound" by a typevar declaration; the latter is always true.
self.mark_symbol_bound(symbol);
if let Some(bounds) = bound {
self.visit_expr(bounds);
}
if let Some(default) = default {
self.visit_expr(default);
@ -350,8 +371,7 @@ impl<'db> SemanticIndexBuilder<'db> {
}
fn declare_parameter(&mut self, parameter: AnyParameterRef) {
let symbol =
self.add_or_update_symbol(parameter.name().id().clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(parameter.name().id().clone());
let definition = self.add_definition(symbol, parameter);
@ -462,8 +482,7 @@ where
// The symbol for the function name itself has to be evaluated
// at the end to match the runtime evaluation of parameter defaults
// and return-type annotations.
let symbol = self
.add_or_update_symbol(function_def.name.id.clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(function_def.name.id.clone());
self.add_definition(symbol, function_def);
}
ast::Stmt::ClassDef(class) => {
@ -471,8 +490,7 @@ where
self.visit_decorator(decorator);
}
let symbol =
self.add_or_update_symbol(class.name.id.clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(class.name.id.clone());
self.add_definition(symbol, class);
self.with_type_params(
@ -498,7 +516,7 @@ where
Name::new(alias.name.id.split('.').next().unwrap())
};
let symbol = self.add_or_update_symbol(symbol_name, SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(symbol_name);
self.add_definition(symbol, alias);
}
}
@ -510,8 +528,7 @@ where
&alias.name.id
};
let symbol =
self.add_or_update_symbol(symbol_name.clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(symbol_name.clone());
self.add_definition(symbol, ImportFromDefinitionNodeRef { node, alias_index });
}
}
@ -725,8 +742,7 @@ where
// which is invalid syntax. However, it's still pretty obvious here that the user
// *wanted* `e` to be bound, so we should still create a definition here nonetheless.
if let Some(symbol_name) = symbol_name {
let symbol = self
.add_or_update_symbol(symbol_name.id.clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(symbol_name.id.clone());
self.add_definition(
symbol,
@ -756,24 +772,18 @@ where
match expr {
ast::Expr::Name(name_node @ ast::ExprName { id, ctx, .. }) => {
let flags = match (ctx, self.current_assignment) {
let (is_use, is_definition) = match (ctx, self.current_assignment) {
(ast::ExprContext::Store, Some(CurrentAssignment::AugAssign(_))) => {
// For augmented assignment, the target expression is also used.
SymbolFlags::IS_DEFINED | SymbolFlags::IS_USED
(true, true)
}
(ast::ExprContext::Store, Some(CurrentAssignment::AnnAssign(ann_assign)))
if ann_assign.value.is_none() =>
{
// An annotated assignment that doesn't assign a value is not a Definition
SymbolFlags::empty()
}
(ast::ExprContext::Load, _) => SymbolFlags::IS_USED,
(ast::ExprContext::Store, _) => SymbolFlags::IS_DEFINED,
(ast::ExprContext::Del, _) => SymbolFlags::IS_DEFINED,
(ast::ExprContext::Invalid, _) => SymbolFlags::empty(),
(ast::ExprContext::Load, _) => (true, false),
(ast::ExprContext::Store, _) => (false, true),
(ast::ExprContext::Del, _) => (false, true),
(ast::ExprContext::Invalid, _) => (false, false),
};
let symbol = self.add_or_update_symbol(id.clone(), flags);
if flags.contains(SymbolFlags::IS_DEFINED) {
let symbol = self.add_symbol(id.clone());
if is_definition {
match self.current_assignment {
Some(CurrentAssignment::Assign(assignment)) => {
self.add_definition(
@ -830,7 +840,8 @@ where
}
}
if flags.contains(SymbolFlags::IS_USED) {
if is_use {
self.mark_symbol_used(symbol);
let use_id = self.current_ast_ids().record_use(expr);
self.current_use_def_map_mut().record_use(symbol, use_id);
}
@ -970,7 +981,7 @@ where
range: _,
}) = pattern
{
let symbol = self.add_or_update_symbol(name.id().clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(name.id().clone());
let state = self.current_match_case.as_ref().unwrap();
self.add_definition(
symbol,
@ -991,7 +1002,7 @@ where
rest: Some(name), ..
}) = pattern
{
let symbol = self.add_or_update_symbol(name.id().clone(), SymbolFlags::IS_DEFINED);
let symbol = self.add_symbol(name.id().clone());
let state = self.current_match_case.as_ref().unwrap();
self.add_definition(
symbol,

View file

@ -23,7 +23,7 @@ pub struct Definition<'db> {
#[no_eq]
#[return_ref]
pub(crate) node: DefinitionKind,
pub(crate) kind: DefinitionKind,
#[no_eq]
count: countme::Count<Definition<'static>>,
@ -33,6 +33,21 @@ impl<'db> Definition<'db> {
pub(crate) fn scope(self, db: &'db dyn Db) -> ScopeId<'db> {
self.file_scope(db).to_scope_id(db, self.file(db))
}
#[allow(unused)]
pub(crate) fn category(self, db: &'db dyn Db) -> DefinitionCategory {
self.kind(db).category()
}
#[allow(unused)]
pub(crate) fn is_declaration(self, db: &'db dyn Db) -> bool {
self.kind(db).category().is_declaration()
}
#[allow(unused)]
pub(crate) fn is_binding(self, db: &'db dyn Db) -> bool {
self.kind(db).category().is_binding()
}
}
#[derive(Copy, Clone, Debug)]
@ -302,6 +317,41 @@ impl DefinitionNodeRef<'_> {
}
}
#[derive(Clone, Copy, Debug)]
pub(crate) enum DefinitionCategory {
/// A Definition which binds a value to a name (e.g. `x = 1`).
Binding,
/// A Definition which declares the upper-bound of acceptable types for this name (`x: int`).
Declaration,
/// A Definition which both declares a type and binds a value (e.g. `x: int = 1`).
DeclarationAndBinding,
}
impl DefinitionCategory {
/// True if this definition establishes a "declared type" for the symbol.
///
/// If so, any assignments reached by this definition are in error if they assign a value of a
/// type not assignable to the declared type.
///
/// Annotations establish a declared type. So do function and class definition.
pub(crate) fn is_declaration(self) -> bool {
matches!(
self,
DefinitionCategory::Declaration | DefinitionCategory::DeclarationAndBinding
)
}
/// True if this definition assigns a value to the symbol.
///
/// False only for annotated assignments without a RHS.
pub(crate) fn is_binding(self) -> bool {
matches!(
self,
DefinitionCategory::Binding | DefinitionCategory::DeclarationAndBinding
)
}
}
#[derive(Clone, Debug)]
pub enum DefinitionKind {
Import(AstNodeRef<ast::Alias>),
@ -321,6 +371,52 @@ pub enum DefinitionKind {
ExceptHandler(ExceptHandlerDefinitionKind),
}
impl DefinitionKind {
pub(crate) fn category(&self) -> DefinitionCategory {
match self {
// functions and classes always bind a value, and we always consider them declarations
DefinitionKind::Function(_) | DefinitionKind::Class(_) => {
DefinitionCategory::DeclarationAndBinding
}
// a parameter always binds a value, but is only a declaration if annotated
DefinitionKind::Parameter(parameter) => {
if parameter.annotation.is_some() {
DefinitionCategory::DeclarationAndBinding
} else {
DefinitionCategory::Binding
}
}
// presence of a default is irrelevant, same logic as for a no-default parameter
DefinitionKind::ParameterWithDefault(parameter_with_default) => {
if parameter_with_default.parameter.annotation.is_some() {
DefinitionCategory::DeclarationAndBinding
} else {
DefinitionCategory::Binding
}
}
// annotated assignment is always a declaration, only a binding if there is a RHS
DefinitionKind::AnnotatedAssignment(ann_assign) => {
if ann_assign.value.is_some() {
DefinitionCategory::DeclarationAndBinding
} else {
DefinitionCategory::Declaration
}
}
// all of these bind values without declaring a type
DefinitionKind::Import(_)
| DefinitionKind::ImportFrom(_)
| DefinitionKind::NamedExpression(_)
| DefinitionKind::Assignment(_)
| DefinitionKind::AugmentedAssignment(_)
| DefinitionKind::For(_)
| DefinitionKind::Comprehension(_)
| DefinitionKind::WithItem(_)
| DefinitionKind::MatchPattern(_)
| DefinitionKind::ExceptHandler(_) => DefinitionCategory::Binding,
}
}
}
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub struct MatchPatternDefinitionKind {
@ -441,8 +537,12 @@ pub struct ExceptHandlerDefinitionKind {
}
impl ExceptHandlerDefinitionKind {
pub(crate) fn node(&self) -> &ast::ExceptHandlerExceptHandler {
self.handler.node()
}
pub(crate) fn handled_exceptions(&self) -> Option<&ast::Expr> {
self.handler.node().type_.as_deref()
self.node().type_.as_deref()
}
pub(crate) fn is_star(&self) -> bool {

View file

@ -44,16 +44,16 @@ impl Symbol {
}
/// Is the symbol defined in its containing scope?
pub fn is_defined(&self) -> bool {
self.flags.contains(SymbolFlags::IS_DEFINED)
pub fn is_bound(&self) -> bool {
self.flags.contains(SymbolFlags::IS_BOUND)
}
}
bitflags! {
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(super) struct SymbolFlags: u8 {
struct SymbolFlags: u8 {
const IS_USED = 1 << 0;
const IS_DEFINED = 1 << 1;
const IS_BOUND = 1 << 1;
/// TODO: This flag is not yet set by anything
const MARKED_GLOBAL = 1 << 2;
/// TODO: This flag is not yet set by anything
@ -272,11 +272,7 @@ impl SymbolTableBuilder {
}
}
pub(super) fn add_or_update_symbol(
&mut self,
name: Name,
flags: SymbolFlags,
) -> (ScopedSymbolId, bool) {
pub(super) fn add_symbol(&mut self, name: Name) -> (ScopedSymbolId, bool) {
let hash = SymbolTable::hash_name(&name);
let entry = self
.table
@ -285,15 +281,9 @@ impl SymbolTableBuilder {
.from_hash(hash, |id| self.table.symbols[*id].name() == &name);
match entry {
RawEntryMut::Occupied(entry) => {
let symbol = &mut self.table.symbols[*entry.key()];
symbol.insert_flags(flags);
(*entry.key(), false)
}
RawEntryMut::Occupied(entry) => (*entry.key(), false),
RawEntryMut::Vacant(entry) => {
let mut symbol = Symbol::new(name);
symbol.insert_flags(flags);
let symbol = Symbol::new(name);
let id = self.table.symbols.push(symbol);
entry.insert_with_hasher(hash, id, (), |id| {
@ -304,6 +294,14 @@ impl SymbolTableBuilder {
}
}
pub(super) fn mark_symbol_bound(&mut self, id: ScopedSymbolId) {
self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND);
}
pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) {
self.table.symbols[id].insert_flags(SymbolFlags::IS_USED);
}
pub(super) fn finish(mut self) -> SymbolTable {
self.table.shrink_to_fit();
self.table

View file

@ -1,5 +1,79 @@
//! Build a map from each use of a symbol to the definitions visible from that use, and the
//! type-narrowing constraints that apply to each definition.
//! First, some terminology:
//!
//! * A "binding" gives a new value to a variable. This includes many different Python statements
//! (assignment statements of course, but also imports, `def` and `class` statements, `as`
//! clauses in `with` and `except` statements, match patterns, and others) and even one
//! expression kind (named expressions). It notably does not include annotated assignment
//! statements without a right-hand side value; these do not assign any new value to the
//! variable. We consider function parameters to be bindings as well, since (from the perspective
//! of the function's internal scope), a function parameter begins the scope bound to a value.
//!
//! * A "declaration" establishes an upper bound type for the values that a variable may be
//! permitted to take on. Annotated assignment statements (with or without an RHS value) are
//! declarations; annotated function parameters are also declarations. We consider `def` and
//! `class` statements to also be declarations, so as to prohibit accidentally shadowing them.
//!
//! Annotated assignments with a right-hand side, and annotated function parameters, are both
//! bindings and declarations.
//!
//! We use [`Definition`] as the universal term (and Salsa tracked struct) encompassing both
//! bindings and declarations. (This sacrifices a bit of type safety in exchange for improved
//! performance via fewer Salsa tracked structs and queries, since most declarations -- typed
//! parameters and annotated assignments with RHS -- are both bindings and declarations.)
//!
//! At any given use of a variable, we can ask about both its "declared type" and its "inferred
//! type". These may be different, but the inferred type must always be assignable to the declared
//! type; that is, the declared type is always wider, and the inferred type may be more precise. If
//! we see an invalid assignment, we emit a diagnostic and abandon our inferred type, deferring to
//! the declared type (this allows an explicit annotation to override bad inference, without a
//! cast), maintaining the invariant.
//!
//! The **inferred type** represents the most precise type we believe encompasses all possible
//! values for the variable at a given use. It is based on a union of the bindings which can reach
//! that use through some control flow path, and the narrowing constraints that control flow must
//! have passed through between the binding and the use. For example, in this code:
//!
//! ```python
//! x = 1 if flag else None
//! if x is not None:
//! use(x)
//! ```
//!
//! For the use of `x` on the third line, the inferred type should be `Literal[1]`. This is based
//! on the binding on the first line, which assigns the type `Literal[1] | None`, and the narrowing
//! constraint on the second line, which rules out the type `None`, since control flow must pass
//! through this constraint to reach the use in question.
//!
//! The **declared type** represents the code author's declaration (usually through a type
//! annotation) that a given variable should not be assigned any type outside the declared type. In
//! our model, declared types are also control-flow-sensitive; we allow the code author to
//! explicitly re-declare the same variable with a different type. So for a given binding of a
//! variable, we will want to ask which declarations of that variable can reach that binding, in
//! order to determine whether the binding is permitted, or should be a type error. For example:
//!
//! ```python
//! from pathlib import Path
//! def f(path: str):
//! path: Path = Path(path)
//! ```
//!
//! In this function, the initial declared type of `path` is `str`, meaning that the assignment
//! `path = Path(path)` would be a type error, since it assigns to `path` a value whose type is not
//! assignable to `str`. This is the purpose of declared types: they prevent accidental assignment
//! of the wrong type to a variable.
//!
//! But in some cases it is useful to "shadow" or "re-declare" a variable with a new type, and we
//! permit this, as long as it is done with an explicit re-annotation. So `path: Path =
//! Path(path)`, with the explicit `: Path` annotation, is permitted.
//!
//! The general rule is that whatever declaration(s) can reach a given binding determine the
//! validity of that binding. If there is a path in which the symbol is not declared, that is a
//! declaration of `Unknown`. If multiple declarations can reach a binding, we union them, but by
//! default we also issue a type error, since this implicit union of declared types may hide an
//! error.
//!
//! To support type inference, we build a map from each use of a symbol to the bindings live at
//! that use, and the type narrowing constraints that apply to each binding.
//!
//! Let's take this code sample:
//!
@ -7,147 +81,155 @@
//! x = 1
//! x = 2
//! y = x
//! if y is not None:
//! if flag:
//! x = 3
//! else:
//! x = 4
//! z = x
//! ```
//!
//! In this snippet, we have four definitions of `x` (the statements assigning `1`, `2`, `3`,
//! and `4` to it), and two uses of `x` (the `y = x` and `z = x` assignments). The first
//! [`Definition`] of `x` is never visible to any use, because it's immediately replaced by the
//! second definition, before any use happens. (A linter could thus flag the statement `x = 1`
//! as likely superfluous.)
//! In this snippet, we have four bindings of `x` (the statements assigning `1`, `2`, `3`, and `4`
//! to it), and two uses of `x` (the `y = x` and `z = x` assignments). The first binding of `x`
//! does not reach any use, because it's immediately replaced by the second binding, before any use
//! happens. (A linter could thus flag the statement `x = 1` as likely superfluous.)
//!
//! The first use of `x` has one definition visible to it: the assignment `x = 2`.
//! The first use of `x` has one live binding: the assignment `x = 2`.
//!
//! Things get a bit more complex when we have branches. We will definitely take either the `if` or
//! the `else` branch. Thus, the second use of `x` has two definitions visible to it: `x = 3` and
//! `x = 4`. The `x = 2` definition is no longer visible, because it must be replaced by either `x
//! = 3` or `x = 4`, no matter which branch was taken. We don't know which branch was taken, so we
//! must consider both definitions as visible, which means eventually we would (in type inference)
//! look at these two definitions and infer a type of `Literal[3, 4]` -- the union of `Literal[3]`
//! and `Literal[4]` -- for the second use of `x`.
//! the `else` branch. Thus, the second use of `x` has two live bindings: `x = 3` and `x = 4`. The
//! `x = 2` assignment is no longer visible, because it must be replaced by either `x = 3` or `x =
//! 4`, no matter which branch was taken. We don't know which branch was taken, so we must consider
//! both bindings as live, which means eventually we would (in type inference) look at these two
//! bindings and infer a type of `Literal[3, 4]` -- the union of `Literal[3]` and `Literal[4]` --
//! for the second use of `x`.
//!
//! So that's one question our use-def map needs to answer: given a specific use of a symbol, which
//! definition(s) is/are visible from that use. In
//! [`AstIds`](crate::semantic_index::ast_ids::AstIds) we number all uses (that means a `Name` node
//! with `Load` context) so we have a `ScopedUseId` to efficiently represent each use.
//! binding(s) can reach that use. In [`AstIds`](crate::semantic_index::ast_ids::AstIds) we number
//! all uses (that means a `Name` node with `Load` context) so we have a `ScopedUseId` to
//! efficiently represent each use.
//!
//! Another case we need to handle is when a symbol is referenced from a different scope (the most
//! obvious example of this is an import). We call this "public" use of a symbol. So the other
//! question we need to be able to answer is, what are the publicly-visible definitions of each
//! symbol?
//!
//! Technically, public use of a symbol could also occur from any point in control flow of the
//! scope where the symbol is defined (via inline imports and import cycles, in the case of an
//! import, or via a function call partway through the local scope that ends up using a symbol from
//! the scope via a global or nonlocal reference.) But modeling this fully accurately requires
//! whole-program analysis that isn't tractable for an efficient incremental compiler, since it
//! means a given symbol could have a different type every place it's referenced throughout the
//! program, depending on the shape of arbitrarily-sized call/import graphs. So we follow other
//! Python type-checkers in making the simplifying assumption that usually the scope will finish
//! execution before its symbols are made visible to other scopes; for instance, most imports will
//! import from a complete module, not a partially-executed module. (We may want to get a little
//! smarter than this in the future, in particular for closures, but for now this is where we
//! start.)
//!
//! So this means that the publicly-visible definitions of a symbol are the definitions still
//! visible at the end of the scope; effectively we have an implicit "use" of every symbol at the
//! end of the scope.
//!
//! We also need to know, for a given definition of a symbol, what type-narrowing constraints apply
//! We also need to know, for a given definition of a symbol, what type narrowing constraints apply
//! to it. For instance, in this code sample:
//!
//! ```python
//! x = 1 if flag else None
//! if x is not None:
//! y = x
//! use(x)
//! ```
//!
//! At the use of `x` in `y = x`, the visible definition of `x` is `1 if flag else None`, which
//! would infer as the type `Literal[1] | None`. But the constraint `x is not None` dominates this
//! use, which means we can rule out the possibility that `x` is `None` here, which should give us
//! the type `Literal[1]` for this use.
//! At the use of `x`, the live binding of `x` is `1 if flag else None`, which would infer as the
//! type `Literal[1] | None`. But the constraint `x is not None` dominates this use, which means we
//! can rule out the possibility that `x` is `None` here, which should give us the type
//! `Literal[1]` for this use.
//!
//! For declared types, we need to be able to answer the question "given a binding to a symbol,
//! which declarations of that symbol can reach the binding?" This allows us to emit a diagnostic
//! if the binding is attempting to bind a value of a type that is not assignable to the declared
//! type for that symbol, at that point in control flow.
//!
//! We also need to know, given a declaration of a symbol, what the inferred type of that symbol is
//! at that point. This allows us to emit a diagnostic in a case like `x = "foo"; x: int`. The
//! binding `x = "foo"` occurs before the declaration `x: int`, so according to our
//! control-flow-sensitive interpretation of declarations, the assignment is not an error. But the
//! declaration is an error, since it would violate the "inferred type must be assignable to
//! declared type" rule.
//!
//! Another case we need to handle is when a symbol is referenced from a different scope (for
//! example, an import or a nonlocal reference). We call this "public" use of a symbol. For public
//! use of a symbol, we prefer the declared type, if there are any declarations of that symbol; if
//! not, we fall back to the inferred type. So we also need to know which declarations and bindings
//! can reach the end of the scope.
//!
//! Technically, public use of a symbol could occur from any point in control flow of the scope
//! where the symbol is defined (via inline imports and import cycles, in the case of an import, or
//! via a function call partway through the local scope that ends up using a symbol from the scope
//! via a global or nonlocal reference.) But modeling this fully accurately requires whole-program
//! analysis that isn't tractable for an efficient analysis, since it means a given symbol could
//! have a different type every place it's referenced throughout the program, depending on the
//! shape of arbitrarily-sized call/import graphs. So we follow other Python type checkers in
//! making the simplifying assumption that usually the scope will finish execution before its
//! symbols are made visible to other scopes; for instance, most imports will import from a
//! complete module, not a partially-executed module. (We may want to get a little smarter than
//! this in the future for some closures, but for now this is where we start.)
//!
//! The data structure we build to answer these questions is the `UseDefMap`. It has a
//! `definitions_by_use` vector indexed by [`ScopedUseId`] and a `public_definitions` vector
//! indexed by [`ScopedSymbolId`]. The values in each of these vectors are (in principle) a list of
//! visible definitions at that use, or at the end of the scope for that symbol, with a list of the
//! dominating constraints for each of those definitions.
//! `bindings_by_use` vector of [`SymbolBindings`] indexed by [`ScopedUseId`], a
//! `declarations_by_binding` vector of [`SymbolDeclarations`] indexed by [`ScopedDefinitionId`], a
//! `bindings_by_declaration` vector of [`SymbolBindings`] indexed by [`ScopedDefinitionId`], and
//! `public_bindings` and `public_definitions` vectors indexed by [`ScopedSymbolId`]. The values in
//! each of these vectors are (in principle) a list of live bindings at that use/definition, or at
//! the end of the scope for that symbol, with a list of the dominating constraints for each
//! binding.
//!
//! In order to avoid vectors-of-vectors-of-vectors and all the allocations that would entail, we
//! don't actually store these "list of visible definitions" as a vector of [`Definition`].
//! Instead, the values in `definitions_by_use` and `public_definitions` are a [`SymbolState`]
//! struct which uses bit-sets to track definitions and constraints in terms of
//! [`ScopedDefinitionId`] and [`ScopedConstraintId`], which are indices into the `all_definitions`
//! and `all_constraints` indexvecs in the [`UseDefMap`].
//! Instead, [`SymbolBindings`] and [`SymbolDeclarations`] are structs which use bit-sets to track
//! definitions (and constraints, in the case of bindings) in terms of [`ScopedDefinitionId`] and
//! [`ScopedConstraintId`], which are indices into the `all_definitions` and `all_constraints`
//! indexvecs in the [`UseDefMap`].
//!
//! There is another special kind of possible "definition" for a symbol: there might be a path from
//! the scope entry to a given use in which the symbol is never bound.
//!
//! The simplest way to model "unbound" would be as an actual [`Definition`] itself: the initial
//! visible [`Definition`] for each symbol in a scope. But actually modeling it this way would
//! unnecessarily increase the number of [`Definition`] that Salsa must track. Since "unbound" is a
//! special definition in that all symbols share it, and it doesn't have any additional per-symbol
//! state, and constraints are irrelevant to it, we can represent it more efficiently: we use the
//! `may_be_unbound` boolean on the [`SymbolState`] struct. If this flag is `true`, it means the
//! symbol/use really has one additional visible "definition", which is the unbound state. If this
//! flag is `false`, it means we've eliminated the possibility of unbound: every path we've
//! followed includes a definition for this symbol.
//! The simplest way to model "unbound" would be as a "binding" itself: the initial "binding" for
//! each symbol in a scope. But actually modeling it this way would unnecessarily increase the
//! number of [`Definition`]s that Salsa must track. Since "unbound" is special in that all symbols
//! share it, and it doesn't have any additional per-symbol state, and constraints are irrelevant
//! to it, we can represent it more efficiently: we use the `may_be_unbound` boolean on the
//! [`SymbolBindings`] struct. If this flag is `true` for a use of a symbol, it means the symbol
//! has a path to the use in which it is never bound. If this flag is `false`, it means we've
//! eliminated the possibility of unbound: every control flow path to the use includes a binding
//! for this symbol.
//!
//! To build a [`UseDefMap`], the [`UseDefMapBuilder`] is notified of each new use, definition, and
//! constraint as they are encountered by the
//! [`SemanticIndexBuilder`](crate::semantic_index::builder::SemanticIndexBuilder) AST visit. For
//! each symbol, the builder tracks the `SymbolState` for that symbol. When we hit a use of a
//! symbol, it records the current state for that symbol for that use. When we reach the end of the
//! scope, it records the state for each symbol as the public definitions of that symbol.
//! each symbol, the builder tracks the `SymbolState` (`SymbolBindings` and `SymbolDeclarations`)
//! for that symbol. When we hit a use or definition of a symbol, we record the necessary parts of
//! the current state for that symbol that we need for that use or definition. When we reach the
//! end of the scope, it records the state for each symbol as the public definitions of that
//! symbol.
//!
//! Let's walk through the above example. Initially we record for `x` that it has no visible
//! definitions, and may be unbound. When we see `x = 1`, we record that as the sole visible
//! definition of `x`, and flip `may_be_unbound` to `false`. Then we see `x = 2`, and it replaces
//! `x = 1` as the sole visible definition of `x`. When we get to `y = x`, we record that the
//! visible definitions for that use of `x` are just the `x = 2` definition.
//! Let's walk through the above example. Initially we record for `x` that it has no bindings, and
//! may be unbound. When we see `x = 1`, we record that as the sole live binding of `x`, and flip
//! `may_be_unbound` to `false`. Then we see `x = 2`, and we replace `x = 1` as the sole live
//! binding of `x`. When we get to `y = x`, we record that the live bindings for that use of `x`
//! are just the `x = 2` definition.
//!
//! Then we hit the `if` branch. We visit the `test` node (`flag` in this case), since that will
//! happen regardless. Then we take a pre-branch snapshot of the currently visible definitions for
//! all symbols, which we'll need later. Then we record `flag` as a possible constraint on the
//! currently visible definition (`x = 2`), and go ahead and visit the `if` body. When we see `x =
//! 3`, it replaces `x = 2` (constrained by `flag`) as the sole visible definition of `x`. At the
//! end of the `if` body, we take another snapshot of the currently-visible definitions; we'll call
//! this the post-if-body snapshot.
//! happen regardless. Then we take a pre-branch snapshot of the current state for all symbols,
//! which we'll need later. Then we record `flag` as a possible constraint on the current binding
//! (`x = 2`), and go ahead and visit the `if` body. When we see `x = 3`, it replaces `x = 2`
//! (constrained by `flag`) as the sole live binding of `x`. At the end of the `if` body, we take
//! another snapshot of the current symbol state; we'll call this the post-if-body snapshot.
//!
//! Now we need to visit the `else` clause. The conditions when entering the `else` clause should
//! be the pre-if conditions; if we are entering the `else` clause, we know that the `if` test
//! failed and we didn't execute the `if` body. So we first reset the builder to the pre-if state,
//! using the snapshot we took previously (meaning we now have `x = 2` as the sole visible
//! definition for `x` again), then visit the `else` clause, where `x = 4` replaces `x = 2` as the
//! sole visible definition of `x`.
//! using the snapshot we took previously (meaning we now have `x = 2` as the sole binding for `x`
//! again), then visit the `else` clause, where `x = 4` replaces `x = 2` as the sole live binding
//! of `x`.
//!
//! Now we reach the end of the if/else, and want to visit the following code. The state here needs
//! to reflect that we might have gone through the `if` branch, or we might have gone through the
//! `else` branch, and we don't know which. So we need to "merge" our current builder state
//! (reflecting the end-of-else state, with `x = 4` as the only visible definition) with our
//! post-if-body snapshot (which has `x = 3` as the only visible definition). The result of this
//! merge is that we now have two visible definitions of `x`: `x = 3` and `x = 4`.
//! (reflecting the end-of-else state, with `x = 4` as the only live binding) with our post-if-body
//! snapshot (which has `x = 3` as the only live binding). The result of this merge is that we now
//! have two live bindings of `x`: `x = 3` and `x = 4`.
//!
//! The [`UseDefMapBuilder`] itself just exposes methods for taking a snapshot, resetting to a
//! snapshot, and merging a snapshot into the current state. The logic using these methods lives in
//! [`SemanticIndexBuilder`](crate::semantic_index::builder::SemanticIndexBuilder), e.g. where it
//! visits a `StmtIf` node.
//!
//! (In the future we may have some other questions we want to answer as well, such as "is this
//! definition used?", which will require tracking a bit more info in our map, e.g. a "used" bit
//! for each [`Definition`] which is flipped to true when we record that definition for a use.)
use self::symbol_state::{
ConstraintIdIterator, DefinitionIdWithConstraintsIterator, ScopedConstraintId,
ScopedDefinitionId, SymbolState,
BindingIdWithConstraintsIterator, ConstraintIdIterator, DeclarationIdIterator,
ScopedConstraintId, ScopedDefinitionId, SymbolBindings, SymbolDeclarations, SymbolState,
};
use crate::semantic_index::ast_ids::ScopedUseId;
use crate::semantic_index::definition::Definition;
use crate::semantic_index::symbol::ScopedSymbolId;
use ruff_index::IndexVec;
use rustc_hash::FxHashMap;
use super::constraint::Constraint;
@ -163,60 +245,139 @@ pub(crate) struct UseDefMap<'db> {
/// Array of [`Constraint`] in this scope.
all_constraints: IndexVec<ScopedConstraintId, Constraint<'db>>,
/// [`SymbolState`] visible at a [`ScopedUseId`].
definitions_by_use: IndexVec<ScopedUseId, SymbolState>,
/// [`SymbolBindings`] reaching a [`ScopedUseId`].
bindings_by_use: IndexVec<ScopedUseId, SymbolBindings>,
/// [`SymbolBindings`] or [`SymbolDeclarations`] reaching a given [`Definition`].
///
/// If the definition is a binding (only) -- `x = 1` for example -- then we need
/// [`SymbolDeclarations`] to know whether this binding is permitted by the live declarations.
///
/// If the definition is a declaration (only) -- `x: int` for example -- then we need
/// [`SymbolBindings`] to know whether this declaration is consistent with the previously
/// inferred type.
///
/// If the definition is both a declaration and a binding -- `x: int = 1` for example -- then
/// we don't actually need anything here, all we'll need to validate is that our own RHS is a
/// valid assignment to our own annotation.
definitions_by_definition: FxHashMap<Definition<'db>, SymbolDefinitions>,
/// [`SymbolState`] visible at end of scope for each symbol.
public_definitions: IndexVec<ScopedSymbolId, SymbolState>,
public_symbols: IndexVec<ScopedSymbolId, SymbolState>,
}
impl<'db> UseDefMap<'db> {
pub(crate) fn use_definitions(
pub(crate) fn bindings_at_use(
&self,
use_id: ScopedUseId,
) -> DefinitionWithConstraintsIterator<'_, 'db> {
DefinitionWithConstraintsIterator {
all_definitions: &self.all_definitions,
all_constraints: &self.all_constraints,
inner: self.definitions_by_use[use_id].visible_definitions(),
}
) -> BindingWithConstraintsIterator<'_, 'db> {
self.bindings_iterator(&self.bindings_by_use[use_id])
}
pub(crate) fn use_may_be_unbound(&self, use_id: ScopedUseId) -> bool {
self.definitions_by_use[use_id].may_be_unbound()
self.bindings_by_use[use_id].may_be_unbound()
}
pub(crate) fn public_definitions(
pub(crate) fn public_bindings(
&self,
symbol: ScopedSymbolId,
) -> DefinitionWithConstraintsIterator<'_, 'db> {
DefinitionWithConstraintsIterator {
all_definitions: &self.all_definitions,
all_constraints: &self.all_constraints,
inner: self.public_definitions[symbol].visible_definitions(),
}
) -> BindingWithConstraintsIterator<'_, 'db> {
self.bindings_iterator(self.public_symbols[symbol].bindings())
}
pub(crate) fn public_may_be_unbound(&self, symbol: ScopedSymbolId) -> bool {
self.public_definitions[symbol].may_be_unbound()
self.public_symbols[symbol].may_be_unbound()
}
#[allow(unused)]
pub(crate) fn bindings_at_declaration(
&self,
declaration: Definition<'db>,
) -> BindingWithConstraintsIterator<'_, 'db> {
if let SymbolDefinitions::Bindings(bindings) = &self.definitions_by_definition[&declaration]
{
self.bindings_iterator(bindings)
} else {
unreachable!("Declaration has non-Bindings in definitions_by_definition");
}
}
#[allow(unused)]
pub(crate) fn declarations_at_binding(
&self,
binding: Definition<'db>,
) -> DeclarationsIterator<'_, 'db> {
if let SymbolDefinitions::Declarations(declarations) =
&self.definitions_by_definition[&binding]
{
self.declarations_iterator(declarations)
} else {
unreachable!("Binding has non-Declarations in definitions_by_definition");
}
}
#[allow(unused)]
pub(crate) fn public_declarations(
&self,
symbol: ScopedSymbolId,
) -> DeclarationsIterator<'_, 'db> {
self.declarations_iterator(self.public_symbols[symbol].declarations())
}
#[allow(unused)]
pub(crate) fn has_public_declarations(&self, symbol: ScopedSymbolId) -> bool {
!self.public_symbols[symbol].declarations().is_empty()
}
#[allow(unused)]
pub(crate) fn public_may_be_undeclared(&self, symbol: ScopedSymbolId) -> bool {
self.public_symbols[symbol].may_be_undeclared()
}
fn bindings_iterator<'a>(
&'a self,
bindings: &'a SymbolBindings,
) -> BindingWithConstraintsIterator<'a, 'db> {
BindingWithConstraintsIterator {
all_definitions: &self.all_definitions,
all_constraints: &self.all_constraints,
inner: bindings.iter(),
}
}
fn declarations_iterator<'a>(
&'a self,
declarations: &'a SymbolDeclarations,
) -> DeclarationsIterator<'a, 'db> {
DeclarationsIterator {
all_definitions: &self.all_definitions,
inner: declarations.iter(),
}
}
}
#[derive(Debug)]
pub(crate) struct DefinitionWithConstraintsIterator<'map, 'db> {
all_definitions: &'map IndexVec<ScopedDefinitionId, Definition<'db>>,
all_constraints: &'map IndexVec<ScopedConstraintId, Constraint<'db>>,
inner: DefinitionIdWithConstraintsIterator<'map>,
/// Either live bindings or live declarations for a symbol.
#[derive(Debug, PartialEq, Eq)]
enum SymbolDefinitions {
Bindings(SymbolBindings),
Declarations(SymbolDeclarations),
}
impl<'map, 'db> Iterator for DefinitionWithConstraintsIterator<'map, 'db> {
type Item = DefinitionWithConstraints<'map, 'db>;
#[derive(Debug)]
pub(crate) struct BindingWithConstraintsIterator<'map, 'db> {
all_definitions: &'map IndexVec<ScopedDefinitionId, Definition<'db>>,
all_constraints: &'map IndexVec<ScopedConstraintId, Constraint<'db>>,
inner: BindingIdWithConstraintsIterator<'map>,
}
impl<'map, 'db> Iterator for BindingWithConstraintsIterator<'map, 'db> {
type Item = BindingWithConstraints<'map, 'db>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|def_id_with_constraints| DefinitionWithConstraints {
definition: self.all_definitions[def_id_with_constraints.definition],
.map(|def_id_with_constraints| BindingWithConstraints {
binding: self.all_definitions[def_id_with_constraints.definition],
constraints: ConstraintsIterator {
all_constraints: self.all_constraints,
constraint_ids: def_id_with_constraints.constraint_ids,
@ -225,10 +386,10 @@ impl<'map, 'db> Iterator for DefinitionWithConstraintsIterator<'map, 'db> {
}
}
impl std::iter::FusedIterator for DefinitionWithConstraintsIterator<'_, '_> {}
impl std::iter::FusedIterator for BindingWithConstraintsIterator<'_, '_> {}
pub(crate) struct DefinitionWithConstraints<'map, 'db> {
pub(crate) definition: Definition<'db>,
pub(crate) struct BindingWithConstraints<'map, 'db> {
pub(crate) binding: Definition<'db>,
pub(crate) constraints: ConstraintsIterator<'map, 'db>,
}
@ -249,25 +410,43 @@ impl<'map, 'db> Iterator for ConstraintsIterator<'map, 'db> {
impl std::iter::FusedIterator for ConstraintsIterator<'_, '_> {}
pub(crate) struct DeclarationsIterator<'map, 'db> {
all_definitions: &'map IndexVec<ScopedDefinitionId, Definition<'db>>,
inner: DeclarationIdIterator<'map>,
}
impl<'map, 'db> Iterator for DeclarationsIterator<'map, 'db> {
type Item = Definition<'db>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|def_id| self.all_definitions[def_id])
}
}
impl std::iter::FusedIterator for DeclarationsIterator<'_, '_> {}
/// A snapshot of the definitions and constraints state at a particular point in control flow.
#[derive(Clone, Debug)]
pub(super) struct FlowSnapshot {
definitions_by_symbol: IndexVec<ScopedSymbolId, SymbolState>,
symbol_states: IndexVec<ScopedSymbolId, SymbolState>,
}
#[derive(Debug, Default)]
pub(super) struct UseDefMapBuilder<'db> {
/// Append-only array of [`Definition`]; None is unbound.
/// Append-only array of [`Definition`].
all_definitions: IndexVec<ScopedDefinitionId, Definition<'db>>,
/// Append-only array of [`Constraint`].
all_constraints: IndexVec<ScopedConstraintId, Constraint<'db>>,
/// Visible definitions at each so-far-recorded use.
definitions_by_use: IndexVec<ScopedUseId, SymbolState>,
/// Live bindings at each so-far-recorded use.
bindings_by_use: IndexVec<ScopedUseId, SymbolBindings>,
/// Currently visible definitions for each symbol.
definitions_by_symbol: IndexVec<ScopedSymbolId, SymbolState>,
/// Live bindings or declarations for each so-far-recorded definition.
definitions_by_definition: FxHashMap<Definition<'db>, SymbolDefinitions>,
/// Currently live bindings and declarations for each symbol.
symbol_states: IndexVec<ScopedSymbolId, SymbolState>,
}
impl<'db> UseDefMapBuilder<'db> {
@ -276,86 +455,103 @@ impl<'db> UseDefMapBuilder<'db> {
}
pub(super) fn add_symbol(&mut self, symbol: ScopedSymbolId) {
let new_symbol = self.definitions_by_symbol.push(SymbolState::unbound());
let new_symbol = self.symbol_states.push(SymbolState::undefined());
debug_assert_eq!(symbol, new_symbol);
}
pub(super) fn record_definition(
&mut self,
symbol: ScopedSymbolId,
definition: Definition<'db>,
) {
// We have a new definition of a symbol; this replaces any previous definitions in this
// path.
let def_id = self.all_definitions.push(definition);
self.definitions_by_symbol[symbol] = SymbolState::with(def_id);
pub(super) fn record_binding(&mut self, symbol: ScopedSymbolId, binding: Definition<'db>) {
let def_id = self.all_definitions.push(binding);
let symbol_state = &mut self.symbol_states[symbol];
self.definitions_by_definition.insert(
binding,
SymbolDefinitions::Declarations(symbol_state.declarations().clone()),
);
symbol_state.record_binding(def_id);
}
pub(super) fn record_constraint(&mut self, constraint: Constraint<'db>) {
let constraint_id = self.all_constraints.push(constraint);
for definitions in &mut self.definitions_by_symbol {
definitions.add_constraint(constraint_id);
for state in &mut self.symbol_states {
state.record_constraint(constraint_id);
}
}
pub(super) fn record_declaration(
&mut self,
symbol: ScopedSymbolId,
declaration: Definition<'db>,
) {
let def_id = self.all_definitions.push(declaration);
let symbol_state = &mut self.symbol_states[symbol];
self.definitions_by_definition.insert(
declaration,
SymbolDefinitions::Bindings(symbol_state.bindings().clone()),
);
symbol_state.record_declaration(def_id);
}
pub(super) fn record_declaration_and_binding(
&mut self,
symbol: ScopedSymbolId,
definition: Definition<'db>,
) {
// We don't need to store anything in self.definitions_by_definition.
let def_id = self.all_definitions.push(definition);
let symbol_state = &mut self.symbol_states[symbol];
symbol_state.record_declaration(def_id);
symbol_state.record_binding(def_id);
}
pub(super) fn record_use(&mut self, symbol: ScopedSymbolId, use_id: ScopedUseId) {
// We have a use of a symbol; clone the currently visible definitions for that symbol, and
// record them as the visible definitions for this use.
// We have a use of a symbol; clone the current bindings for that symbol, and record them
// as the live bindings for this use.
let new_use = self
.definitions_by_use
.push(self.definitions_by_symbol[symbol].clone());
.bindings_by_use
.push(self.symbol_states[symbol].bindings().clone());
debug_assert_eq!(use_id, new_use);
}
/// Take a snapshot of the current visible-symbols state.
pub(super) fn snapshot(&self) -> FlowSnapshot {
FlowSnapshot {
definitions_by_symbol: self.definitions_by_symbol.clone(),
symbol_states: self.symbol_states.clone(),
}
}
/// Restore the current builder visible-definitions state to the given snapshot.
/// Restore the current builder symbols state to the given snapshot.
pub(super) fn restore(&mut self, snapshot: FlowSnapshot) {
// We never remove symbols from `definitions_by_symbol` (it's an IndexVec, and the symbol
// We never remove symbols from `symbol_states` (it's an IndexVec, and the symbol
// IDs must line up), so the current number of known symbols must always be equal to or
// greater than the number of known symbols in a previously-taken snapshot.
let num_symbols = self.definitions_by_symbol.len();
debug_assert!(num_symbols >= snapshot.definitions_by_symbol.len());
let num_symbols = self.symbol_states.len();
debug_assert!(num_symbols >= snapshot.symbol_states.len());
// Restore the current visible-definitions state to the given snapshot.
self.definitions_by_symbol = snapshot.definitions_by_symbol;
self.symbol_states = snapshot.symbol_states;
// If the snapshot we are restoring is missing some symbols we've recorded since, we need
// to fill them in so the symbol IDs continue to line up. Since they don't exist in the
// snapshot, the correct state to fill them in with is "unbound".
self.definitions_by_symbol
.resize(num_symbols, SymbolState::unbound());
// snapshot, the correct state to fill them in with is "undefined".
self.symbol_states
.resize(num_symbols, SymbolState::undefined());
}
/// Merge the given snapshot into the current state, reflecting that we might have taken either
/// path to get here. The new visible-definitions state for each symbol should include
/// definitions from both the prior state and the snapshot.
/// path to get here. The new state for each symbol should include definitions from both the
/// prior state and the snapshot.
pub(super) fn merge(&mut self, snapshot: FlowSnapshot) {
// The tricky thing about merging two Ranges pointing into `all_definitions` is that if the
// two Ranges aren't already adjacent in `all_definitions`, we will have to copy at least
// one or the other of the ranges to the end of `all_definitions` so as to make them
// adjacent. We can't ever move things around in `all_definitions` because previously
// recorded uses may still have ranges pointing to any part of it; all we can do is append.
// It's possible we may end up with some old entries in `all_definitions` that nobody is
// pointing to, but that's OK.
// We never remove symbols from `definitions_by_symbol` (it's an IndexVec, and the symbol
// We never remove symbols from `symbol_states` (it's an IndexVec, and the symbol
// IDs must line up), so the current number of known symbols must always be equal to or
// greater than the number of known symbols in a previously-taken snapshot.
debug_assert!(self.definitions_by_symbol.len() >= snapshot.definitions_by_symbol.len());
debug_assert!(self.symbol_states.len() >= snapshot.symbol_states.len());
let mut snapshot_definitions_iter = snapshot.definitions_by_symbol.into_iter();
for current in &mut self.definitions_by_symbol {
let mut snapshot_definitions_iter = snapshot.symbol_states.into_iter();
for current in &mut self.symbol_states {
if let Some(snapshot) = snapshot_definitions_iter.next() {
current.merge(snapshot);
} else {
// Symbol not present in snapshot, so it's unbound from that path.
current.add_unbound();
current.set_may_be_unbound();
}
}
}
@ -363,14 +559,16 @@ impl<'db> UseDefMapBuilder<'db> {
pub(super) fn finish(mut self) -> UseDefMap<'db> {
self.all_definitions.shrink_to_fit();
self.all_constraints.shrink_to_fit();
self.definitions_by_symbol.shrink_to_fit();
self.definitions_by_use.shrink_to_fit();
self.symbol_states.shrink_to_fit();
self.bindings_by_use.shrink_to_fit();
self.definitions_by_definition.shrink_to_fit();
UseDefMap {
all_definitions: self.all_definitions,
all_constraints: self.all_constraints,
definitions_by_use: self.definitions_by_use,
public_definitions: self.definitions_by_symbol,
bindings_by_use: self.bindings_by_use,
public_symbols: self.symbol_states,
definitions_by_definition: self.definitions_by_definition,
}
}
}

View file

@ -1,13 +1,13 @@
//! Track visible definitions of a symbol, and applicable constraints per definition.
//! Track live bindings per symbol, applicable constraints per binding, and live declarations.
//!
//! These data structures operate entirely on scope-local newtype-indices for definitions and
//! constraints, referring to their location in the `all_definitions` and `all_constraints`
//! indexvecs in [`super::UseDefMapBuilder`].
//!
//! We need to track arbitrary associations between definitions and constraints, not just a single
//! set of currently dominating constraints (where "dominating" means "control flow must have
//! passed through it to reach this point"), because we can have dominating constraints that apply
//! to some definitions but not others, as in this code:
//! We need to track arbitrary associations between bindings and constraints, not just a single set
//! of currently dominating constraints (where "dominating" means "control flow must have passed
//! through it to reach this point"), because we can have dominating constraints that apply to some
//! bindings but not others, as in this code:
//!
//! ```python
//! x = 1 if flag else None
@ -18,11 +18,11 @@
//! ```
//!
//! The `x is not None` constraint dominates the final use of `x`, but it applies only to the first
//! definition of `x`, not the second, so `None` is a possible value for `x`.
//! binding of `x`, not the second, so `None` is a possible value for `x`.
//!
//! And we can't just track, for each definition, an index into a list of dominating constraints,
//! either, because we can have definitions which are still visible, but subject to constraints
//! that are no longer dominating, as in this code:
//! And we can't just track, for each binding, an index into a list of dominating constraints,
//! either, because we can have bindings which are still visible, but subject to constraints that
//! are no longer dominating, as in this code:
//!
//! ```python
//! x = 0
@ -33,13 +33,16 @@
//! ```
//!
//! From the point of view of the final use of `x`, the `x is not None` constraint no longer
//! dominates, but it does dominate the `x = 1 if flag2 else None` definition, so we have to keep
//! dominates, but it does dominate the `x = 1 if flag2 else None` binding, so we have to keep
//! track of that.
//!
//! The data structures used here ([`BitSet`] and [`smallvec::SmallVec`]) optimize for keeping all
//! data inline (avoiding lots of scattered allocations) in small-to-medium cases, and falling back
//! to heap allocation to be able to scale to arbitrary numbers of definitions and constraints when
//! needed.
//! to heap allocation to be able to scale to arbitrary numbers of live bindings and constraints
//! when needed.
//!
//! Tracking live declarations is simpler, since constraints are not involved, but otherwise very
//! similar to tracking live bindings.
use super::bitset::{BitSet, BitSetIterator};
use ruff_index::newtype_index;
use smallvec::SmallVec;
@ -53,93 +56,192 @@ pub(super) struct ScopedDefinitionId;
pub(super) struct ScopedConstraintId;
/// Can reference this * 64 total definitions inline; more will fall back to the heap.
const INLINE_DEFINITION_BLOCKS: usize = 3;
const INLINE_BINDING_BLOCKS: usize = 3;
/// A [`BitSet`] of [`ScopedDefinitionId`], representing visible definitions of a symbol in a scope.
type Definitions = BitSet<INLINE_DEFINITION_BLOCKS>;
type DefinitionsIterator<'a> = BitSetIterator<'a, INLINE_DEFINITION_BLOCKS>;
/// A [`BitSet`] of [`ScopedDefinitionId`], representing live bindings of a symbol in a scope.
type Bindings = BitSet<INLINE_BINDING_BLOCKS>;
type BindingsIterator<'a> = BitSetIterator<'a, INLINE_BINDING_BLOCKS>;
/// Can reference this * 64 total declarations inline; more will fall back to the heap.
const INLINE_DECLARATION_BLOCKS: usize = 3;
/// A [`BitSet`] of [`ScopedDefinitionId`], representing live declarations of a symbol in a scope.
type Declarations = BitSet<INLINE_DECLARATION_BLOCKS>;
type DeclarationsIterator<'a> = BitSetIterator<'a, INLINE_DECLARATION_BLOCKS>;
/// Can reference this * 64 total constraints inline; more will fall back to the heap.
const INLINE_CONSTRAINT_BLOCKS: usize = 2;
/// Can keep inline this many visible definitions per symbol at a given time; more will go to heap.
const INLINE_VISIBLE_DEFINITIONS_PER_SYMBOL: usize = 4;
/// Can keep inline this many live bindings per symbol at a given time; more will go to heap.
const INLINE_BINDINGS_PER_SYMBOL: usize = 4;
/// One [`BitSet`] of applicable [`ScopedConstraintId`] per visible definition.
type InlineConstraintArray =
[BitSet<INLINE_CONSTRAINT_BLOCKS>; INLINE_VISIBLE_DEFINITIONS_PER_SYMBOL];
/// One [`BitSet`] of applicable [`ScopedConstraintId`] per live binding.
type InlineConstraintArray = [BitSet<INLINE_CONSTRAINT_BLOCKS>; INLINE_BINDINGS_PER_SYMBOL];
type Constraints = SmallVec<InlineConstraintArray>;
type ConstraintsIterator<'a> = std::slice::Iter<'a, BitSet<INLINE_CONSTRAINT_BLOCKS>>;
type ConstraintsIntoIterator = smallvec::IntoIter<InlineConstraintArray>;
/// Visible definitions and narrowing constraints for a single symbol at some point in control flow.
/// Live declarations for a single symbol at some point in control flow.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct SymbolState {
/// [`BitSet`]: which [`ScopedDefinitionId`] are visible for this symbol?
visible_definitions: Definitions,
pub(super) struct SymbolDeclarations {
/// [`BitSet`]: which declarations (as [`ScopedDefinitionId`]) can reach the current location?
live_declarations: Declarations,
/// For each definition, which [`ScopedConstraintId`] apply?
/// Could the symbol be un-declared at this point?
may_be_undeclared: bool,
}
impl SymbolDeclarations {
fn undeclared() -> Self {
Self {
live_declarations: Declarations::default(),
may_be_undeclared: true,
}
}
/// Record a newly-encountered declaration for this symbol.
fn record_declaration(&mut self, declaration_id: ScopedDefinitionId) {
self.live_declarations = Declarations::with(declaration_id.into());
self.may_be_undeclared = false;
}
/// Return an iterator over live declarations for this symbol.
#[allow(unused)]
pub(super) fn iter(&self) -> DeclarationIdIterator {
DeclarationIdIterator {
inner: self.live_declarations.iter(),
}
}
#[allow(unused)]
pub(super) fn is_empty(&self) -> bool {
self.live_declarations.is_empty()
}
pub(super) fn may_be_undeclared(&self) -> bool {
self.may_be_undeclared
}
}
/// Live bindings and narrowing constraints for a single symbol at some point in control flow.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct SymbolBindings {
/// [`BitSet`]: which bindings (as [`ScopedDefinitionId`]) can reach the current location?
live_bindings: Bindings,
/// For each live binding, which [`ScopedConstraintId`] apply?
///
/// This is a [`smallvec::SmallVec`] which should always have one [`BitSet`] of constraints per
/// definition in `visible_definitions`.
/// binding in `live_bindings`.
constraints: Constraints,
/// Could the symbol be unbound at this point?
may_be_unbound: bool,
}
/// A single [`ScopedDefinitionId`] with an iterator of its applicable [`ScopedConstraintId`].
#[derive(Debug)]
pub(super) struct DefinitionIdWithConstraints<'a> {
pub(super) definition: ScopedDefinitionId,
pub(super) constraint_ids: ConstraintIdIterator<'a>,
}
impl SymbolState {
/// Return a new [`SymbolState`] representing an unbound symbol.
pub(super) fn unbound() -> Self {
impl SymbolBindings {
fn unbound() -> Self {
Self {
visible_definitions: Definitions::default(),
live_bindings: Bindings::default(),
constraints: Constraints::default(),
may_be_unbound: true,
}
}
/// Return a new [`SymbolState`] representing a symbol with a single visible definition.
pub(super) fn with(definition_id: ScopedDefinitionId) -> Self {
let mut constraints = Constraints::with_capacity(1);
constraints.push(BitSet::default());
Self {
visible_definitions: Definitions::with(definition_id.into()),
constraints,
may_be_unbound: false,
}
}
/// Add Unbound as a possibility for this symbol.
pub(super) fn add_unbound(&mut self) {
fn set_may_be_unbound(&mut self) {
self.may_be_unbound = true;
}
/// Add given constraint to all currently-visible definitions.
pub(super) fn add_constraint(&mut self, constraint_id: ScopedConstraintId) {
/// Record a newly-encountered binding for this symbol.
pub(super) fn record_binding(&mut self, binding_id: ScopedDefinitionId) {
// The new binding replaces all previous live bindings in this path, and has no
// constraints.
self.live_bindings = Bindings::with(binding_id.into());
self.constraints = Constraints::with_capacity(1);
self.constraints.push(BitSet::default());
self.may_be_unbound = false;
}
/// Add given constraint to all live bindings.
pub(super) fn record_constraint(&mut self, constraint_id: ScopedConstraintId) {
for bitset in &mut self.constraints {
bitset.insert(constraint_id.into());
}
}
/// Iterate over currently live bindings for this symbol.
pub(super) fn iter(&self) -> BindingIdWithConstraintsIterator {
BindingIdWithConstraintsIterator {
definitions: self.live_bindings.iter(),
constraints: self.constraints.iter(),
}
}
pub(super) fn may_be_unbound(&self) -> bool {
self.may_be_unbound
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct SymbolState {
declarations: SymbolDeclarations,
bindings: SymbolBindings,
}
impl SymbolState {
/// Return a new [`SymbolState`] representing an unbound, undeclared symbol.
pub(super) fn undefined() -> Self {
Self {
declarations: SymbolDeclarations::undeclared(),
bindings: SymbolBindings::unbound(),
}
}
/// Add Unbound as a possibility for this symbol.
pub(super) fn set_may_be_unbound(&mut self) {
self.bindings.set_may_be_unbound();
}
/// Record a newly-encountered binding for this symbol.
pub(super) fn record_binding(&mut self, binding_id: ScopedDefinitionId) {
self.bindings.record_binding(binding_id);
}
/// Add given constraint to all live bindings.
pub(super) fn record_constraint(&mut self, constraint_id: ScopedConstraintId) {
self.bindings.record_constraint(constraint_id);
}
/// Record a newly-encountered declaration of this symbol.
pub(super) fn record_declaration(&mut self, declaration_id: ScopedDefinitionId) {
self.declarations.record_declaration(declaration_id);
}
/// Merge another [`SymbolState`] into this one.
pub(super) fn merge(&mut self, b: SymbolState) {
let mut a = Self {
visible_definitions: Definitions::default(),
constraints: Constraints::default(),
may_be_unbound: self.may_be_unbound || b.may_be_unbound,
bindings: SymbolBindings {
live_bindings: Bindings::default(),
constraints: Constraints::default(),
may_be_unbound: self.bindings.may_be_unbound || b.bindings.may_be_unbound,
},
declarations: SymbolDeclarations {
live_declarations: self.declarations.live_declarations.clone(),
may_be_undeclared: self.declarations.may_be_undeclared
|| b.declarations.may_be_undeclared,
},
};
std::mem::swap(&mut a, self);
let mut a_defs_iter = a.visible_definitions.iter();
let mut b_defs_iter = b.visible_definitions.iter();
let mut a_constraints_iter = a.constraints.into_iter();
let mut b_constraints_iter = b.constraints.into_iter();
self.declarations
.live_declarations
.union(&b.declarations.live_declarations);
let mut a_defs_iter = a.bindings.live_bindings.iter();
let mut b_defs_iter = b.bindings.live_bindings.iter();
let mut a_constraints_iter = a.bindings.constraints.into_iter();
let mut b_constraints_iter = b.bindings.constraints.into_iter();
let mut opt_a_def: Option<u32> = a_defs_iter.next();
let mut opt_b_def: Option<u32> = b_defs_iter.next();
@ -152,7 +254,7 @@ impl SymbolState {
// Helper to push `def`, with constraints in `constraints_iter`, onto `self`.
let push = |def, constraints_iter: &mut ConstraintsIntoIterator, merged: &mut Self| {
merged.visible_definitions.insert(def);
merged.bindings.live_bindings.insert(def);
// SAFETY: we only ever create SymbolState with either no definitions and no constraint
// bitsets (`::unbound`) or one definition and one constraint bitset (`::with`), and
// `::merge` always pushes one definition and one constraint bitset together (just
@ -161,7 +263,7 @@ impl SymbolState {
let constraints = constraints_iter
.next()
.expect("definitions and constraints length mismatch");
merged.constraints.push(constraints);
merged.bindings.constraints.push(constraints);
};
loop {
@ -191,7 +293,8 @@ impl SymbolState {
// If the same definition is visible through both paths, any constraint
// that applies on only one path is irrelevant to the resulting type from
// unioning the two paths, so we intersect the constraints.
self.constraints
self.bindings
.constraints
.last_mut()
.unwrap()
.intersect(&a_constraints);
@ -214,40 +317,54 @@ impl SymbolState {
}
}
/// Get iterator over visible definitions with constraints.
pub(super) fn visible_definitions(&self) -> DefinitionIdWithConstraintsIterator {
DefinitionIdWithConstraintsIterator {
definitions: self.visible_definitions.iter(),
constraints: self.constraints.iter(),
}
pub(super) fn bindings(&self) -> &SymbolBindings {
&self.bindings
}
pub(super) fn declarations(&self) -> &SymbolDeclarations {
&self.declarations
}
/// Could the symbol be unbound?
pub(super) fn may_be_unbound(&self) -> bool {
self.may_be_unbound
self.bindings.may_be_unbound()
}
/// Could the symbol be undeclared?
pub(super) fn may_be_undeclared(&self) -> bool {
self.declarations.may_be_undeclared()
}
}
/// The default state of a symbol (if we've seen no definitions of it) is unbound.
/// The default state of a symbol, if we've seen no definitions of it, is undefined (that is,
/// both unbound and undeclared).
impl Default for SymbolState {
fn default() -> Self {
SymbolState::unbound()
SymbolState::undefined()
}
}
/// A single binding (as [`ScopedDefinitionId`]) with an iterator of its applicable
/// [`ScopedConstraintId`].
#[derive(Debug)]
pub(super) struct BindingIdWithConstraints<'a> {
pub(super) definition: ScopedDefinitionId,
pub(super) constraint_ids: ConstraintIdIterator<'a>,
}
#[derive(Debug)]
pub(super) struct DefinitionIdWithConstraintsIterator<'a> {
definitions: DefinitionsIterator<'a>,
pub(super) struct BindingIdWithConstraintsIterator<'a> {
definitions: BindingsIterator<'a>,
constraints: ConstraintsIterator<'a>,
}
impl<'a> Iterator for DefinitionIdWithConstraintsIterator<'a> {
type Item = DefinitionIdWithConstraints<'a>;
impl<'a> Iterator for BindingIdWithConstraintsIterator<'a> {
type Item = BindingIdWithConstraints<'a>;
fn next(&mut self) -> Option<Self::Item> {
match (self.definitions.next(), self.constraints.next()) {
(None, None) => None,
(Some(def), Some(constraints)) => Some(DefinitionIdWithConstraints {
(Some(def), Some(constraints)) => Some(BindingIdWithConstraints {
definition: ScopedDefinitionId::from_u32(def),
constraint_ids: ConstraintIdIterator {
wrapped: constraints.iter(),
@ -259,7 +376,7 @@ impl<'a> Iterator for DefinitionIdWithConstraintsIterator<'a> {
}
}
impl std::iter::FusedIterator for DefinitionIdWithConstraintsIterator<'_> {}
impl std::iter::FusedIterator for BindingIdWithConstraintsIterator<'_> {}
#[derive(Debug)]
pub(super) struct ConstraintIdIterator<'a> {
@ -276,15 +393,32 @@ impl Iterator for ConstraintIdIterator<'_> {
impl std::iter::FusedIterator for ConstraintIdIterator<'_> {}
#[allow(unused)]
#[derive(Debug)]
pub(super) struct DeclarationIdIterator<'a> {
inner: DeclarationsIterator<'a>,
}
impl<'a> Iterator for DeclarationIdIterator<'a> {
type Item = ScopedDefinitionId;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(ScopedDefinitionId::from_u32)
}
}
impl std::iter::FusedIterator for DeclarationIdIterator<'_> {}
#[cfg(test)]
mod tests {
use super::{ScopedConstraintId, ScopedDefinitionId, SymbolState};
impl SymbolState {
pub(crate) fn assert(&self, may_be_unbound: bool, expected: &[&str]) {
pub(crate) fn assert_bindings(&self, may_be_unbound: bool, expected: &[&str]) {
assert_eq!(self.may_be_unbound(), may_be_unbound);
let actual = self
.visible_definitions()
.bindings()
.iter()
.map(|def_id_with_constraints| {
format!(
"{}<{}>",
@ -300,75 +434,142 @@ mod tests {
.collect::<Vec<_>>();
assert_eq!(actual, expected);
}
pub(crate) fn assert_declarations(&self, may_be_undeclared: bool, expected: &[u32]) {
assert_eq!(self.may_be_undeclared(), may_be_undeclared);
let actual = self
.declarations()
.iter()
.map(ScopedDefinitionId::as_u32)
.collect::<Vec<_>>();
assert_eq!(actual, expected);
}
}
#[test]
fn unbound() {
let cd = SymbolState::unbound();
let sym = SymbolState::undefined();
cd.assert(true, &[]);
sym.assert_bindings(true, &[]);
}
#[test]
fn with() {
let cd = SymbolState::with(ScopedDefinitionId::from_u32(0));
let mut sym = SymbolState::undefined();
sym.record_binding(ScopedDefinitionId::from_u32(0));
cd.assert(false, &["0<>"]);
sym.assert_bindings(false, &["0<>"]);
}
#[test]
fn add_unbound() {
let mut cd = SymbolState::with(ScopedDefinitionId::from_u32(0));
cd.add_unbound();
fn set_may_be_unbound() {
let mut sym = SymbolState::undefined();
sym.record_binding(ScopedDefinitionId::from_u32(0));
sym.set_may_be_unbound();
cd.assert(true, &["0<>"]);
sym.assert_bindings(true, &["0<>"]);
}
#[test]
fn add_constraint() {
let mut cd = SymbolState::with(ScopedDefinitionId::from_u32(0));
cd.add_constraint(ScopedConstraintId::from_u32(0));
fn record_constraint() {
let mut sym = SymbolState::undefined();
sym.record_binding(ScopedDefinitionId::from_u32(0));
sym.record_constraint(ScopedConstraintId::from_u32(0));
cd.assert(false, &["0<0>"]);
sym.assert_bindings(false, &["0<0>"]);
}
#[test]
fn merge() {
// merging the same definition with the same constraint keeps the constraint
let mut cd0a = SymbolState::with(ScopedDefinitionId::from_u32(0));
cd0a.add_constraint(ScopedConstraintId::from_u32(0));
let mut sym0a = SymbolState::undefined();
sym0a.record_binding(ScopedDefinitionId::from_u32(0));
sym0a.record_constraint(ScopedConstraintId::from_u32(0));
let mut cd0b = SymbolState::with(ScopedDefinitionId::from_u32(0));
cd0b.add_constraint(ScopedConstraintId::from_u32(0));
let mut sym0b = SymbolState::undefined();
sym0b.record_binding(ScopedDefinitionId::from_u32(0));
sym0b.record_constraint(ScopedConstraintId::from_u32(0));
cd0a.merge(cd0b);
let mut cd0 = cd0a;
cd0.assert(false, &["0<0>"]);
sym0a.merge(sym0b);
let mut sym0 = sym0a;
sym0.assert_bindings(false, &["0<0>"]);
// merging the same definition with differing constraints drops all constraints
let mut cd1a = SymbolState::with(ScopedDefinitionId::from_u32(1));
cd1a.add_constraint(ScopedConstraintId::from_u32(1));
let mut sym1a = SymbolState::undefined();
sym1a.record_binding(ScopedDefinitionId::from_u32(1));
sym1a.record_constraint(ScopedConstraintId::from_u32(1));
let mut cd1b = SymbolState::with(ScopedDefinitionId::from_u32(1));
cd1b.add_constraint(ScopedConstraintId::from_u32(2));
let mut sym1b = SymbolState::undefined();
sym1b.record_binding(ScopedDefinitionId::from_u32(1));
sym1b.record_constraint(ScopedConstraintId::from_u32(2));
cd1a.merge(cd1b);
let cd1 = cd1a;
cd1.assert(false, &["1<>"]);
sym1a.merge(sym1b);
let sym1 = sym1a;
sym1.assert_bindings(false, &["1<>"]);
// merging a constrained definition with unbound keeps both
let mut cd2a = SymbolState::with(ScopedDefinitionId::from_u32(2));
cd2a.add_constraint(ScopedConstraintId::from_u32(3));
let mut sym2a = SymbolState::undefined();
sym2a.record_binding(ScopedDefinitionId::from_u32(2));
sym2a.record_constraint(ScopedConstraintId::from_u32(3));
let cd2b = SymbolState::unbound();
let sym2b = SymbolState::undefined();
cd2a.merge(cd2b);
let cd2 = cd2a;
cd2.assert(true, &["2<3>"]);
sym2a.merge(sym2b);
let sym2 = sym2a;
sym2.assert_bindings(true, &["2<3>"]);
// merging different definitions keeps them each with their existing constraints
cd0.merge(cd2);
let cd = cd0;
cd.assert(true, &["0<0>", "2<3>"]);
sym0.merge(sym2);
let sym = sym0;
sym.assert_bindings(true, &["0<0>", "2<3>"]);
}
#[test]
fn no_declaration() {
let sym = SymbolState::undefined();
sym.assert_declarations(true, &[]);
}
#[test]
fn record_declaration() {
let mut sym = SymbolState::undefined();
sym.record_declaration(ScopedDefinitionId::from_u32(1));
sym.assert_declarations(false, &[1]);
}
#[test]
fn record_declaration_override() {
let mut sym = SymbolState::undefined();
sym.record_declaration(ScopedDefinitionId::from_u32(1));
sym.record_declaration(ScopedDefinitionId::from_u32(2));
sym.assert_declarations(false, &[2]);
}
#[test]
fn record_declaration_merge() {
let mut sym = SymbolState::undefined();
sym.record_declaration(ScopedDefinitionId::from_u32(1));
let mut sym2 = SymbolState::undefined();
sym2.record_declaration(ScopedDefinitionId::from_u32(2));
sym.merge(sym2);
sym.assert_declarations(false, &[1, 2]);
}
#[test]
fn record_declaration_merge_partial_undeclared() {
let mut sym = SymbolState::undefined();
sym.record_declaration(ScopedDefinitionId::from_u32(1));
let sym2 = SymbolState::undefined();
sym.merge(sym2);
sym.assert_declarations(true, &[1]);
}
}

View file

@ -7,8 +7,8 @@ use crate::semantic_index::ast_ids::HasScopedAstId;
use crate::semantic_index::definition::{Definition, DefinitionKind};
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId};
use crate::semantic_index::{
global_scope, semantic_index, symbol_table, use_def_map, DefinitionWithConstraints,
DefinitionWithConstraintsIterator,
global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints,
BindingWithConstraintsIterator,
};
use crate::stdlib::{builtins_symbol_ty, types_symbol_ty, typeshed_symbol_ty};
use crate::types::narrow::narrowing_constraint;
@ -51,7 +51,7 @@ pub(crate) fn symbol_ty_by_id<'db>(
let use_def = use_def_map(db, scope);
definitions_ty(
db,
use_def.public_definitions(symbol),
use_def.public_bindings(symbol),
use_def
.public_may_be_unbound(symbol)
.then_some(Type::Unbound),
@ -113,28 +113,28 @@ pub(crate) fn definition_expression_ty<'db>(
/// provide an `unbound_ty`.
pub(crate) fn definitions_ty<'db>(
db: &'db dyn Db,
definitions_with_constraints: DefinitionWithConstraintsIterator<'_, 'db>,
bindings_with_constraints: BindingWithConstraintsIterator<'_, 'db>,
unbound_ty: Option<Type<'db>>,
) -> Type<'db> {
let def_types = definitions_with_constraints.map(
|DefinitionWithConstraints {
definition,
let def_types = bindings_with_constraints.map(
|BindingWithConstraints {
binding,
constraints,
}| {
let mut constraint_tys = constraints
.filter_map(|constraint| narrowing_constraint(db, constraint, definition));
let definition_ty = definition_ty(db, definition);
let mut constraint_tys =
constraints.filter_map(|constraint| narrowing_constraint(db, constraint, binding));
let binding_ty = definition_ty(db, binding);
if let Some(first_constraint_ty) = constraint_tys.next() {
let mut builder = IntersectionBuilder::new(db);
builder = builder
.add_positive(definition_ty)
.add_positive(binding_ty)
.add_positive(first_constraint_ty);
for constraint_ty in constraint_tys {
builder = builder.add_positive(constraint_ty);
}
builder.build()
} else {
definition_ty
binding_ty
}
},
);
@ -589,7 +589,7 @@ impl<'db> FunctionType<'db> {
/// inferred return type for this function
pub fn return_type(&self, db: &'db dyn Db) -> Type<'db> {
let definition = self.definition(db);
let DefinitionKind::Function(function_stmt_node) = definition.node(db) else {
let DefinitionKind::Function(function_stmt_node) = definition.kind(db) else {
panic!("Function type definition must have `DefinitionKind::Function`")
};
@ -644,7 +644,7 @@ impl<'db> ClassType<'db> {
/// If `definition` is not a `DefinitionKind::Class`.
pub fn bases(&self, db: &'db dyn Db) -> impl Iterator<Item = Type<'db>> {
let definition = self.definition(db);
let DefinitionKind::Class(class_stmt_node) = definition.node(db) else {
let DefinitionKind::Class(class_stmt_node) = definition.kind(db) else {
panic!("Class type definition must have DefinitionKind::Class");
};
class_stmt_node

View file

@ -364,7 +364,7 @@ impl<'db> TypeInferenceBuilder<'db> {
}
fn infer_region_definition(&mut self, definition: Definition<'db>) {
match definition.node(self.db) {
match definition.kind(self.db) {
DefinitionKind::Function(function) => {
self.infer_function_definition(function.node(), definition);
}
@ -435,7 +435,7 @@ impl<'db> TypeInferenceBuilder<'db> {
}
fn infer_region_deferred(&mut self, definition: Definition<'db>) {
match definition.node(self.db) {
match definition.kind(self.db) {
DefinitionKind::Function(function) => self.infer_function_deferred(function.node()),
DefinitionKind::Class(class) => self.infer_class_deferred(class.node()),
DefinitionKind::AnnotatedAssignment(_annotated_assignment) => {
@ -1938,17 +1938,17 @@ impl<'db> TypeInferenceBuilder<'db> {
/// Look up a name reference that isn't bound in the local scope.
fn lookup_name(&self, name: &ast::name::Name) -> Type<'db> {
let file_scope_id = self.scope.file_scope_id(self.db);
let is_defined = self
let is_bound = self
.index
.symbol_table(file_scope_id)
.symbol_by_name(name)
.expect("Symbol table should create a symbol for every Name node")
.is_defined();
.is_bound();
// In function-like scopes, any local variable (symbol that is defined in this
// scope) can only have a definition in this scope, or be undefined; it never references
// another scope. (At runtime, it would use the `LOAD_FAST` opcode.)
if !is_defined || !self.scope.is_function_like(self.db) {
// In function-like scopes, any local variable (symbol that is bound in this scope) can
// only have a definition in this scope, or error; it never references another scope.
// (At runtime, it would use the `LOAD_FAST` opcode.)
if !is_bound || !self.scope.is_function_like(self.db) {
// Walk up parent scopes looking for a possible enclosing scope that may have a
// definition of this name visible to us (would be `LOAD_DEREF` at runtime.)
for (enclosing_scope_file_id, _) in self.index.ancestor_scopes(file_scope_id) {
@ -1963,7 +1963,7 @@ impl<'db> TypeInferenceBuilder<'db> {
let Some(enclosing_symbol) = enclosing_symbol_table.symbol_by_name(name) else {
continue;
};
if enclosing_symbol.is_defined() {
if enclosing_symbol.is_bound() {
// We can return early here, because the nearest function-like scope that
// defines a name must be the only source for the nonlocal reference (at
// runtime, it is the scope that creates the cell for our closure.) If the name
@ -2005,13 +2005,13 @@ impl<'db> TypeInferenceBuilder<'db> {
// if we're inferring types of deferred expressions, always treat them as public symbols
let (definitions, may_be_unbound) = if self.is_deferred() {
(
use_def.public_definitions(symbol),
use_def.public_bindings(symbol),
use_def.public_may_be_unbound(symbol),
)
} else {
let use_id = name.scoped_use_id(self.db, self.scope);
(
use_def.use_definitions(use_id),
use_def.bindings_at_use(use_id),
use_def.use_may_be_unbound(use_id),
)
};
@ -5087,13 +5087,13 @@ mod tests {
// Incremental inference tests
fn first_public_def<'db>(db: &'db TestDb, file: File, name: &str) -> Definition<'db> {
fn first_public_binding<'db>(db: &'db TestDb, file: File, name: &str) -> Definition<'db> {
let scope = global_scope(db, file);
use_def_map(db, scope)
.public_definitions(symbol_table(db, scope).symbol_id_by_name(name).unwrap())
.public_bindings(symbol_table(db, scope).symbol_id_by_name(name).unwrap())
.next()
.unwrap()
.definition
.binding
}
#[test]
@ -5151,7 +5151,7 @@ mod tests {
assert_function_query_was_not_run(
&db,
infer_definition_types,
first_public_def(&db, a, "x"),
first_public_binding(&db, a, "x"),
&events,
);
@ -5187,7 +5187,7 @@ mod tests {
assert_function_query_was_not_run(
&db,
infer_definition_types,
first_public_def(&db, a, "x"),
first_public_binding(&db, a, "x"),
&events,
);
Ok(())