[ty] improve lazy scope place lookup (#19321)

Co-authored-by: David Peter <sharkdp@users.noreply.github.com>
Co-authored-by: Carl Meyer <carl@oddbird.net>
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Shunsuke Shibayama 2025-07-25 16:11:11 +09:00 committed by GitHub
parent 57373a7e4d
commit b124e182ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 493 additions and 179 deletions

View file

@ -43,9 +43,9 @@ use crate::semantic_index::reachability_constraints::{
ReachabilityConstraintsBuilder, ScopedReachabilityConstraintId,
};
use crate::semantic_index::use_def::{
EagerSnapshotKey, FlowSnapshot, ScopedEagerSnapshotId, UseDefMapBuilder,
EnclosingSnapshotKey, FlowSnapshot, ScopedEnclosingSnapshotId, UseDefMapBuilder,
};
use crate::semantic_index::{ArcUseDefMap, ExpressionsScopeMap, SemanticIndex};
use crate::semantic_index::{ArcUseDefMap, ExpressionsScopeMap, ScopeLaziness, SemanticIndex};
use crate::semantic_model::HasTrackedScope;
use crate::unpack::{Unpack, UnpackKind, UnpackPosition, UnpackValue};
use crate::{Db, Program};
@ -113,7 +113,7 @@ pub(super) struct SemanticIndexBuilder<'db, 'ast> {
///
/// [generator functions]: https://docs.python.org/3/glossary.html#term-generator
generator_functions: FxHashSet<FileScopeId>,
eager_snapshots: FxHashMap<EagerSnapshotKey, ScopedEagerSnapshotId>,
enclosing_snapshots: FxHashMap<EnclosingSnapshotKey, ScopedEnclosingSnapshotId>,
/// Errors collected by the `semantic_checker`.
semantic_syntax_errors: RefCell<Vec<SemanticSyntaxError>>,
}
@ -148,7 +148,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
imported_modules: FxHashSet::default(),
generator_functions: FxHashSet::default(),
eager_snapshots: FxHashMap::default(),
enclosing_snapshots: FxHashMap::default(),
python_version: Program::get(db).python_version(db),
source_text: OnceCell::new(),
@ -276,25 +276,8 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
});
}
fn pop_scope(&mut self) -> FileScopeId {
self.try_node_context_stack_manager.exit_scope();
let ScopeInfo {
file_scope_id: popped_scope_id,
..
} = self
.scope_stack
.pop()
.expect("Root scope should be present");
let children_end = self.scopes.next_index();
let popped_scope = &mut self.scopes[popped_scope_id];
popped_scope.extend_descendants(children_end);
if !popped_scope.is_eager() {
return popped_scope_id;
}
// Records snapshots of the place states visible from the current eager scope.
fn record_eager_snapshots(&mut self, popped_scope_id: FileScopeId) {
// If the scope that we just popped off is an eager scope, we need to "lock" our view of
// which bindings reach each of the uses in the scope. Loop through each enclosing scope,
// looking for any that bind each place.
@ -327,27 +310,163 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
// Snapshot the state of this place that are visible at this point in this
// enclosing scope.
let key = EagerSnapshotKey {
let key = EnclosingSnapshotKey {
enclosing_scope: enclosing_scope_id,
enclosing_place: enclosing_place_id,
nested_scope: popped_scope_id,
nested_laziness: ScopeLaziness::Eager,
};
let eager_snapshot = self.use_def_maps[enclosing_scope_id].snapshot_eager_state(
let eager_snapshot = self.use_def_maps[enclosing_scope_id].snapshot_outer_state(
enclosing_place_id,
enclosing_scope_kind,
enclosing_place,
);
self.eager_snapshots.insert(key, eager_snapshot);
self.enclosing_snapshots.insert(key, eager_snapshot);
}
// Lazy scopes are "sticky": once we see a lazy scope we stop doing lookups
// eagerly, even if we would encounter another eager enclosing scope later on.
// Also, narrowing constraints outside a lazy scope are not applicable.
// TODO: If the place has never been rewritten, they are applicable.
if !enclosing_scope_kind.is_eager() {
break;
}
}
}
fn bound_scope(
&self,
enclosing_scope: FileScopeId,
place_expr: &PlaceExpr,
) -> Option<FileScopeId> {
self.scope_stack
.iter()
.rev()
.skip_while(|scope| scope.file_scope_id != enclosing_scope)
.find_map(|scope_info| {
let scope_id = scope_info.file_scope_id;
let place_table = &self.place_tables[scope_id];
let place_id = place_table.place_id_by_expr(place_expr)?;
if place_table.place_expr(place_id).is_bound() {
Some(scope_id)
} else {
None
}
})
}
// Records snapshots of the place states visible from the current lazy scope.
fn record_lazy_snapshots(&mut self, popped_scope_id: FileScopeId) {
for enclosing_scope_info in self.scope_stack.iter().rev() {
let enclosing_scope_id = enclosing_scope_info.file_scope_id;
let enclosing_scope_kind = self.scopes[enclosing_scope_id].kind();
let enclosing_place_table = &self.place_tables[enclosing_scope_id];
for nested_place in self.place_tables[popped_scope_id].places() {
// We don't record lazy snapshots of attributes or subscripts, because these are difficult to track as they modify.
// For the same reason, symbols declared as nonlocal or global are not recorded.
// Also, if the enclosing scope allows its members to be modified from elsewhere, the snapshot will not be recorded.
if !nested_place.is_name()
|| self.scopes[enclosing_scope_id].visibility().is_public()
{
continue;
}
// Skip this place if this enclosing scope doesn't contain any bindings for it.
// Note that even if this place is bound in the popped scope,
// it may refer to the enclosing scope bindings
// so we also need to snapshot the bindings of the enclosing scope.
let Some(enclosing_place_id) =
enclosing_place_table.place_id_by_expr(&nested_place.expr)
else {
continue;
};
let enclosing_place = enclosing_place_table.place_expr(enclosing_place_id);
if !enclosing_place.is_bound() {
// If the bound scope of a place can be modified from elsewhere, the snapshot will not be recorded.
if self
.bound_scope(enclosing_scope_id, &nested_place.expr)
.is_none_or(|scope| self.scopes[scope].visibility().is_public())
{
continue;
}
}
// Snapshot the state of this place that are visible at this point in this
// enclosing scope (this may later be invalidated and swept away).
let key = EnclosingSnapshotKey {
enclosing_scope: enclosing_scope_id,
enclosing_place: enclosing_place_id,
nested_scope: popped_scope_id,
nested_laziness: ScopeLaziness::Lazy,
};
let lazy_snapshot = self.use_def_maps[enclosing_scope_id].snapshot_outer_state(
enclosing_place_id,
enclosing_scope_kind,
enclosing_place,
);
self.enclosing_snapshots.insert(key, lazy_snapshot);
}
}
}
/// Any lazy snapshots of places that have been reassigned or modified are no longer valid, so delete them.
fn sweep_lazy_snapshots(&mut self, popped_scope_id: FileScopeId) {
self.enclosing_snapshots.retain(|key, _| {
let place_table = &self.place_tables[key.enclosing_scope];
key.nested_laziness.is_eager()
|| key.enclosing_scope != popped_scope_id
|| !place_table.is_place_reassigned(key.enclosing_place)
});
}
fn sweep_nonlocal_lazy_snapshots(&mut self) {
self.enclosing_snapshots.retain(|key, _| {
let place_table = &self.place_tables[key.enclosing_scope];
let is_place_bound_and_nonlocal = || -> bool {
let place_expr = place_table.place_expr(key.enclosing_place);
self.scopes
.iter_enumerated()
.skip_while(|(scope_id, _)| *scope_id != key.enclosing_scope)
.any(|(scope_id, _)| {
let other_scope_place_table = &self.place_tables[scope_id];
let Some(place_id) =
other_scope_place_table.place_id_by_expr(&place_expr.expr)
else {
return false;
};
let place = other_scope_place_table.place_expr(place_id);
place.is_marked_nonlocal() && place.is_bound()
})
};
key.nested_laziness.is_eager() || !is_place_bound_and_nonlocal()
});
}
fn pop_scope(&mut self) -> FileScopeId {
self.try_node_context_stack_manager.exit_scope();
let ScopeInfo {
file_scope_id: popped_scope_id,
..
} = self
.scope_stack
.pop()
.expect("Root scope should be present");
self.sweep_lazy_snapshots(popped_scope_id);
let children_end = self.scopes.next_index();
let popped_scope = &mut self.scopes[popped_scope_id];
popped_scope.extend_descendants(children_end);
if popped_scope.is_eager() {
self.record_eager_snapshots(popped_scope_id);
} else {
self.record_lazy_snapshots(popped_scope_id);
}
popped_scope_id
}
@ -1037,6 +1156,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
// Pop the root scope
self.pop_scope();
self.sweep_nonlocal_lazy_snapshots();
assert!(self.scope_stack.is_empty());
assert_eq!(&self.current_assignments, &[]);
@ -1076,7 +1196,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.scope_ids_by_scope.shrink_to_fit();
self.scopes_by_node.shrink_to_fit();
self.generator_functions.shrink_to_fit();
self.eager_snapshots.shrink_to_fit();
self.enclosing_snapshots.shrink_to_fit();
SemanticIndex {
place_tables,
@ -1090,7 +1210,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
use_def_maps,
imported_modules: Arc::new(self.imported_modules),
has_future_annotations: self.has_future_annotations,
eager_snapshots: self.eager_snapshots,
enclosing_snapshots: self.enclosing_snapshots,
semantic_syntax_errors: self.semantic_syntax_errors.into_inner(),
generator_functions: self.generator_functions,
}

View file

@ -43,7 +43,7 @@ pub(crate) type ScopedNarrowingConstraint = List<ScopedNarrowingConstraintPredic
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) enum ConstraintKey {
NarrowingConstraint(ScopedNarrowingConstraint),
EagerNestedScope(FileScopeId),
NestedScope(FileScopeId),
UseId(ScopedUseId),
}

View file

@ -16,7 +16,9 @@ use crate::Db;
use crate::ast_node_ref::AstNodeRef;
use crate::node_key::NodeKey;
use crate::semantic_index::reachability_constraints::ScopedReachabilityConstraintId;
use crate::semantic_index::{PlaceSet, SemanticIndex, semantic_index};
use crate::semantic_index::{
PlaceSet, ScopeLaziness, ScopeVisibility, SemanticIndex, semantic_index,
};
#[derive(Debug, Clone, PartialEq, Eq, Hash, get_size2::GetSize)]
pub(crate) enum PlaceExprSubSegment {
@ -349,6 +351,10 @@ impl PlaceExprWithFlags {
self.flags.contains(PlaceFlags::MARKED_NONLOCAL)
}
pub fn is_reassigned(&self) -> bool {
self.flags.contains(PlaceFlags::IS_REASSIGNED)
}
pub(crate) fn as_name(&self) -> Option<&Name> {
self.expr.as_name()
}
@ -419,6 +425,7 @@ bitflags! {
const MARKED_GLOBAL = 1 << 3;
const MARKED_NONLOCAL = 1 << 4;
const IS_INSTANCE_ATTRIBUTE = 1 << 5;
const IS_REASSIGNED = 1 << 6;
}
}
@ -579,6 +586,10 @@ impl Scope {
self.node().scope_kind()
}
pub(crate) fn visibility(&self) -> ScopeVisibility {
self.kind().visibility()
}
pub fn descendants(&self) -> Range<FileScopeId> {
self.descendants.clone()
}
@ -613,12 +624,27 @@ pub enum ScopeKind {
impl ScopeKind {
pub(crate) const fn is_eager(self) -> bool {
self.laziness().is_eager()
}
pub(crate) const fn laziness(self) -> ScopeLaziness {
match self {
ScopeKind::Module | ScopeKind::Class | ScopeKind::Comprehension => true,
ScopeKind::Module | ScopeKind::Class | ScopeKind::Comprehension => ScopeLaziness::Eager,
ScopeKind::Annotation
| ScopeKind::Function
| ScopeKind::Lambda
| ScopeKind::TypeAlias => false,
| ScopeKind::TypeAlias => ScopeLaziness::Lazy,
}
}
pub(crate) const fn visibility(self) -> ScopeVisibility {
match self {
ScopeKind::Module | ScopeKind::Class => ScopeVisibility::Public,
ScopeKind::Annotation
| ScopeKind::TypeAlias
| ScopeKind::Function
| ScopeKind::Lambda
| ScopeKind::Comprehension => ScopeVisibility::Private,
}
}
@ -842,6 +868,9 @@ impl PlaceTableBuilder {
}
pub(super) fn mark_place_bound(&mut self, id: ScopedPlaceId) {
if self.table.places[id].is_bound() {
self.table.places[id].insert_flags(PlaceFlags::IS_REASSIGNED);
}
self.table.places[id].insert_flags(PlaceFlags::IS_BOUND);
}
@ -873,6 +902,10 @@ impl PlaceTableBuilder {
self.table.place_expr(place_id)
}
pub(super) fn is_place_reassigned(&self, place_id: ScopedPlaceId) -> bool {
self.table.places[place_id].is_reassigned()
}
/// Returns the place IDs associated with the place (e.g. `x.y`, `x.y.z`, `x.y.z[0]` for `x`).
pub(super) fn associated_place_ids(
&self,

View file

@ -244,7 +244,7 @@ use ruff_index::{IndexVec, newtype_index};
use rustc_hash::FxHashMap;
use self::place_state::{
Bindings, Declarations, EagerSnapshot, LiveBindingsIterator, LiveDeclaration,
Bindings, Declarations, EnclosingSnapshot, LiveBindingsIterator, LiveDeclaration,
LiveDeclarationsIterator, PlaceState, ScopedDefinitionId,
};
use crate::node_key::NodeKey;
@ -264,7 +264,7 @@ use crate::semantic_index::reachability_constraints::{
ReachabilityConstraints, ReachabilityConstraintsBuilder, ScopedReachabilityConstraintId,
};
use crate::semantic_index::use_def::place_state::PreviousDefinitions;
use crate::semantic_index::{EagerSnapshotResult, SemanticIndex};
use crate::semantic_index::{EnclosingSnapshotResult, ScopeLaziness, SemanticIndex};
use crate::types::{IntersectionBuilder, Truthiness, Type, infer_narrowing_constraint};
mod place_state;
@ -318,8 +318,8 @@ pub(crate) struct UseDefMap<'db> {
reachable_definitions: IndexVec<ScopedPlaceId, ReachableDefinitions>,
/// Snapshot of bindings in this scope that can be used to resolve a reference in a nested
/// eager scope.
eager_snapshots: EagerSnapshots,
/// scope.
enclosing_snapshots: EnclosingSnapshots,
/// Whether or not the end of the scope is reachable.
///
@ -371,9 +371,9 @@ impl<'db> UseDefMap<'db> {
constraint_ids: self.narrowing_constraints.iter_predicates(constraint),
})
}
ConstraintKey::EagerNestedScope(nested_scope) => {
let EagerSnapshotResult::FoundBindings(bindings) =
index.eager_snapshot(enclosing_scope, expr, nested_scope)
ConstraintKey::NestedScope(nested_scope) => {
let EnclosingSnapshotResult::FoundBindings(bindings) =
index.enclosing_snapshot(enclosing_scope, expr, nested_scope)
else {
unreachable!(
"The result of `SemanticIndex::eager_snapshot` must be `FoundBindings`"
@ -436,18 +436,25 @@ impl<'db> UseDefMap<'db> {
)
}
pub(crate) fn eager_snapshot(
pub(crate) fn enclosing_snapshot(
&self,
eager_bindings: ScopedEagerSnapshotId,
) -> EagerSnapshotResult<'_, 'db> {
match self.eager_snapshots.get(eager_bindings) {
Some(EagerSnapshot::Constraint(constraint)) => {
EagerSnapshotResult::FoundConstraint(*constraint)
snapshot_id: ScopedEnclosingSnapshotId,
nested_laziness: ScopeLaziness,
) -> EnclosingSnapshotResult<'_, 'db> {
let boundness_analysis = if nested_laziness.is_eager() {
BoundnessAnalysis::BasedOnUnboundVisibility
} else {
// TODO: We haven't implemented proper boundness analysis for nonlocal symbols, so we assume the boundness is bound for now.
BoundnessAnalysis::AssumeBound
};
match self.enclosing_snapshots.get(snapshot_id) {
Some(EnclosingSnapshot::Constraint(constraint)) => {
EnclosingSnapshotResult::FoundConstraint(*constraint)
}
Some(EagerSnapshot::Bindings(bindings)) => EagerSnapshotResult::FoundBindings(
self.bindings_iterator(bindings, BoundnessAnalysis::BasedOnUnboundVisibility),
Some(EnclosingSnapshot::Bindings(bindings)) => EnclosingSnapshotResult::FoundBindings(
self.bindings_iterator(bindings, boundness_analysis),
),
None => EagerSnapshotResult::NotFound,
None => EnclosingSnapshotResult::NotFound,
}
}
@ -566,30 +573,37 @@ impl<'db> UseDefMap<'db> {
}
}
/// Uniquely identifies a snapshot of a place state that can be used to resolve a reference in a
/// nested eager scope.
/// Uniquely identifies a snapshot of an enclosing scope place state that can be used to resolve a reference in a
/// nested scope.
///
/// An eager scope has its entire body executed immediately at the location where it is defined.
/// For any free references in the nested scope, we use the bindings that are visible at the point
/// where the nested scope is defined, instead of using the public type of the place.
///
/// There is a unique ID for each distinct [`EagerSnapshotKey`] in the file.
/// There is a unique ID for each distinct [`EnclosingSnapshotKey`] in the file.
#[newtype_index]
#[derive(get_size2::GetSize)]
pub(crate) struct ScopedEagerSnapshotId;
pub(crate) struct ScopedEnclosingSnapshotId;
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, get_size2::GetSize)]
pub(crate) struct EagerSnapshotKey {
pub(crate) struct EnclosingSnapshotKey {
/// The enclosing scope containing the bindings
pub(crate) enclosing_scope: FileScopeId,
/// The referenced place (in the enclosing scope)
pub(crate) enclosing_place: ScopedPlaceId,
/// The nested eager scope containing the reference
/// The nested scope containing the reference
pub(crate) nested_scope: FileScopeId,
/// Laziness of the nested scope
pub(crate) nested_laziness: ScopeLaziness,
}
/// A snapshot of place states that can be used to resolve a reference in a nested eager scope.
type EagerSnapshots = IndexVec<ScopedEagerSnapshotId, EagerSnapshot>;
/// A snapshot of enclosing scope place states that can be used to resolve a reference in a nested scope.
/// Normally, if the current scope is lazily evaluated,
/// we do not snapshot the place states from the enclosing scope,
/// and infer the type of the place from its reachable definitions
/// (and any narrowing constraints introduced in the enclosing scope do not apply to the current scope).
/// The exception is if the symbol has never been reassigned, in which case it is snapshotted.
type EnclosingSnapshots = IndexVec<ScopedEnclosingSnapshotId, EnclosingSnapshot>;
#[derive(Debug)]
pub(crate) struct BindingWithConstraintsIterator<'map, 'db> {
@ -757,8 +771,8 @@ pub(super) struct UseDefMapBuilder<'db> {
reachable_definitions: IndexVec<ScopedPlaceId, ReachableDefinitions>,
/// Snapshots of place states in this scope that can be used to resolve a reference in a
/// nested eager scope.
eager_snapshots: EagerSnapshots,
/// nested scope.
enclosing_snapshots: EnclosingSnapshots,
/// Is this a class scope?
is_class_scope: bool,
@ -778,10 +792,11 @@ impl<'db> UseDefMapBuilder<'db> {
bindings_by_definition: FxHashMap::default(),
place_states: IndexVec::new(),
reachable_definitions: IndexVec::new(),
eager_snapshots: EagerSnapshots::default(),
enclosing_snapshots: EnclosingSnapshots::default(),
is_class_scope,
}
}
pub(super) fn mark_unreachable(&mut self) {
self.reachability = ScopedReachabilityConstraintId::ALWAYS_FALSE;
@ -1022,23 +1037,23 @@ impl<'db> UseDefMapBuilder<'db> {
self.node_reachability.insert(node_key, self.reachability);
}
pub(super) fn snapshot_eager_state(
pub(super) fn snapshot_outer_state(
&mut self,
enclosing_place: ScopedPlaceId,
scope: ScopeKind,
enclosing_place_expr: &PlaceExprWithFlags,
) -> ScopedEagerSnapshotId {
) -> ScopedEnclosingSnapshotId {
// Names bound in class scopes are never visible to nested scopes (but attributes/subscripts are visible),
// so we never need to save eager scope bindings in a class scope.
if (scope.is_class() && enclosing_place_expr.is_name()) || !enclosing_place_expr.is_bound()
{
self.eager_snapshots.push(EagerSnapshot::Constraint(
self.enclosing_snapshots.push(EnclosingSnapshot::Constraint(
self.place_states[enclosing_place]
.bindings()
.unbound_narrowing_constraint(),
))
} else {
self.eager_snapshots.push(EagerSnapshot::Bindings(
self.enclosing_snapshots.push(EnclosingSnapshot::Bindings(
self.place_states[enclosing_place].bindings().clone(),
))
}
@ -1144,7 +1159,7 @@ impl<'db> UseDefMapBuilder<'db> {
for bindings in self.bindings_by_definition.values_mut() {
bindings.finish(&mut self.reachability_constraints);
}
for eager_snapshot in &mut self.eager_snapshots {
for eager_snapshot in &mut self.enclosing_snapshots {
eager_snapshot.finish(&mut self.reachability_constraints);
}
self.reachability_constraints.mark_used(self.reachability);
@ -1160,7 +1175,7 @@ impl<'db> UseDefMapBuilder<'db> {
self.node_reachability.shrink_to_fit();
self.declarations_by_binding.shrink_to_fit();
self.bindings_by_definition.shrink_to_fit();
self.eager_snapshots.shrink_to_fit();
self.enclosing_snapshots.shrink_to_fit();
UseDefMap {
all_definitions: self.all_definitions,
@ -1173,7 +1188,7 @@ impl<'db> UseDefMapBuilder<'db> {
reachable_definitions: self.reachable_definitions,
declarations_by_binding: self.declarations_by_binding,
bindings_by_definition: self.bindings_by_definition,
eager_snapshots: self.eager_snapshots,
enclosing_snapshots: self.enclosing_snapshots,
end_of_scope_reachability: self.reachability,
}
}

View file

@ -181,22 +181,22 @@ impl Declarations {
}
}
/// A snapshot of a place state that can be used to resolve a reference in a nested eager scope.
/// If there are bindings in a (non-class) scope , they are stored in `Bindings`.
/// A snapshot of a place state that can be used to resolve a reference in a nested scope.
/// If there are bindings in a (non-class) scope, they are stored in `Bindings`.
/// Even if it's a class scope (class variables are not visible to nested scopes) or there are no
/// bindings, the current narrowing constraint is necessary for narrowing, so it's stored in
/// `Constraint`.
#[derive(Clone, Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)]
pub(super) enum EagerSnapshot {
pub(super) enum EnclosingSnapshot {
Constraint(ScopedNarrowingConstraint),
Bindings(Bindings),
}
impl EagerSnapshot {
impl EnclosingSnapshot {
pub(super) fn finish(&mut self, reachability_constraints: &mut ReachabilityConstraintsBuilder) {
match self {
EagerSnapshot::Constraint(_) => {}
EagerSnapshot::Bindings(bindings) => {
Self::Constraint(_) => {}
Self::Bindings(bindings) => {
bindings.finish(reachability_constraints);
}
}