[ty] Use an interval map for scopes by expression (#19025)

This commit is contained in:
Micha Reiser 2025-07-14 13:50:58 +02:00 committed by GitHub
parent f22da352db
commit 3560f86450
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 136 additions and 22 deletions

View file

@ -5,6 +5,7 @@ use ruff_db::files::File;
use ruff_db::parsed::parsed_module;
use ruff_index::{IndexSlice, IndexVec};
use ruff_python_ast::NodeIndex;
use ruff_python_parser::semantic_errors::SemanticSyntaxError;
use rustc_hash::{FxHashMap, FxHashSet};
use salsa::Update;
@ -24,6 +25,7 @@ use crate::semantic_index::place::{
ScopeKind, ScopedPlaceId,
};
use crate::semantic_index::use_def::{EagerSnapshotKey, ScopedEagerSnapshotId, UseDefMap};
use crate::semantic_model::HasTrackedScope;
use crate::util::get_size::untracked_arc_size;
pub mod ast_ids;
@ -203,7 +205,7 @@ pub(crate) struct SemanticIndex<'db> {
scopes: IndexVec<FileScopeId, Scope>,
/// Map expressions to their corresponding scope.
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
scopes_by_expression: ExpressionsScopeMap,
/// Map from a node creating a definition to its definition.
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
@ -268,25 +270,26 @@ impl<'db> SemanticIndex<'db> {
/// Returns the ID of the `expression`'s enclosing scope.
#[track_caller]
pub(crate) fn expression_scope_id(
&self,
expression: impl Into<ExpressionNodeKey>,
) -> FileScopeId {
self.scopes_by_expression[&expression.into()]
pub(crate) fn expression_scope_id<E>(&self, expression: &E) -> FileScopeId
where
E: HasTrackedScope,
{
self.try_expression_scope_id(expression)
.expect("Expression to be part of a scope if it is from the same module")
}
/// Returns the ID of the `expression`'s enclosing scope.
pub(crate) fn try_expression_scope_id(
&self,
expression: impl Into<ExpressionNodeKey>,
) -> Option<FileScopeId> {
self.scopes_by_expression.get(&expression.into()).copied()
pub(crate) fn try_expression_scope_id<E>(&self, expression: &E) -> Option<FileScopeId>
where
E: HasTrackedScope,
{
self.scopes_by_expression.try_get(expression)
}
/// Returns the [`Scope`] of the `expression`'s enclosing scope.
#[allow(unused)]
#[track_caller]
pub(crate) fn expression_scope(&self, expression: impl Into<ExpressionNodeKey>) -> &Scope {
pub(crate) fn expression_scope(&self, expression: &impl HasTrackedScope) -> &Scope {
&self.scopes[self.expression_scope_id(expression)]
}
@ -614,6 +617,38 @@ impl<'a> Iterator for ChildrenIter<'a> {
impl FusedIterator for ChildrenIter<'_> {}
/// Interval map that maps a range of expression node ids to their corresponding scopes.
///
/// Lookups require `O(log n)` time, where `n` is roughly the number of scopes (roughly
/// because sub-scopes can be interleaved with expressions in the outer scope, e.g. function, some statements, a function).
#[derive(Eq, PartialEq, Debug, get_size2::GetSize, Default)]
struct ExpressionsScopeMap(Box<[(std::ops::RangeInclusive<NodeIndex>, FileScopeId)]>);
impl ExpressionsScopeMap {
fn try_get<E>(&self, node: &E) -> Option<FileScopeId>
where
E: HasTrackedScope,
{
let node_index = node.node_index().load();
let entry = self
.0
.binary_search_by_key(&node_index, |(range, _)| *range.start());
let index = match entry {
Ok(index) => index,
Err(index) => index.checked_sub(1)?,
};
let (range, scope) = &self.0[index];
if range.contains(&node_index) {
Some(*scope)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use ruff_db::files::{File, system_path_to_file};

View file

@ -10,7 +10,7 @@ use ruff_db::source::{SourceText, source_text};
use ruff_index::IndexVec;
use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{Visitor, walk_expr, walk_pattern, walk_stmt};
use ruff_python_ast::{self as ast, PySourceType, PythonVersion};
use ruff_python_ast::{self as ast, NodeIndex, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
};
@ -45,7 +45,8 @@ use crate::semantic_index::reachability_constraints::{
use crate::semantic_index::use_def::{
EagerSnapshotKey, FlowSnapshot, ScopedEagerSnapshotId, UseDefMapBuilder,
};
use crate::semantic_index::{ArcUseDefMap, SemanticIndex};
use crate::semantic_index::{ArcUseDefMap, ExpressionsScopeMap, SemanticIndex};
use crate::semantic_model::HasTrackedScope;
use crate::unpack::{Unpack, UnpackKind, UnpackPosition, UnpackValue};
use crate::{Db, Program};
@ -102,7 +103,7 @@ pub(super) struct SemanticIndexBuilder<'db, 'ast> {
ast_ids: IndexVec<FileScopeId, AstIdsBuilder>,
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
scopes_by_expression: ExpressionsScopeMapBuilder,
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
imported_modules: FxHashSet<ModuleName>,
@ -136,7 +137,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
scope_ids_by_scope: IndexVec::new(),
use_def_maps: IndexVec::new(),
scopes_by_expression: FxHashMap::default(),
scopes_by_expression: ExpressionsScopeMapBuilder::new(),
scopes_by_node: FxHashMap::default(),
definitions_by_node: FxHashMap::default(),
expressions_by_node: FxHashMap::default(),
@ -1038,7 +1039,6 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
place_tables.shrink_to_fit();
use_def_maps.shrink_to_fit();
ast_ids.shrink_to_fit();
self.scopes_by_expression.shrink_to_fit();
self.definitions_by_node.shrink_to_fit();
self.scope_ids_by_scope.shrink_to_fit();
@ -1053,7 +1053,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
expressions_by_node: self.expressions_by_node,
scope_ids_by_scope: self.scope_ids_by_scope,
ast_ids,
scopes_by_expression: self.scopes_by_expression,
scopes_by_expression: self.scopes_by_expression.build(),
scopes_by_node: self.scopes_by_node,
use_def_maps,
imported_modules: Arc::new(self.imported_modules),
@ -2016,7 +2016,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
self.with_semantic_checker(|semantic, context| semantic.visit_expr(expr, context));
self.scopes_by_expression
.insert(expr.into(), self.current_scope());
.record_expression(expr, self.current_scope());
let node_key = NodeKey::from_node(expr);
@ -2654,3 +2654,60 @@ fn dunder_all_extend_argument(value: &ast::Expr) -> Option<&ast::Expr> {
(attr == "__all__").then_some(value)
}
/// Builds an interval-map that matches expressions (by their node index) to their enclosing scopes.
///
/// The interval map is built in a two-step process because the expression ids are assigned in source order,
/// but we visit the expressions in semantic order. Few expressions are registered out of order.
///
/// 1. build a point vector that maps node indices to their corresponding file scopes.
/// 2. Sort the expressions by their starting id. Then condense the point vector into an interval map
/// by collapsing adjacent node indices with the same scope
/// into a single interval.
struct ExpressionsScopeMapBuilder {
expression_and_scope: Vec<(NodeIndex, FileScopeId)>,
}
impl ExpressionsScopeMapBuilder {
fn new() -> Self {
Self {
expression_and_scope: vec![],
}
}
fn record_expression(&mut self, expression: &impl HasTrackedScope, scope: FileScopeId) {
self.expression_and_scope
.push((expression.node_index().load(), scope));
}
fn build(mut self) -> ExpressionsScopeMap {
self.expression_and_scope
.sort_unstable_by_key(|(index, _)| *index);
let mut iter = self.expression_and_scope.into_iter();
let Some(first) = iter.next() else {
return ExpressionsScopeMap::default();
};
let mut interval_map = Vec::new();
let mut current_scope = first.1;
let mut range = first.0..=NodeIndex::from(first.0.as_u32() + 1);
for (index, scope) in iter {
if scope == current_scope {
range = *range.start()..=index;
continue;
}
interval_map.push((range, current_scope));
current_scope = scope;
range = index..=index;
}
interval_map.push((range, current_scope));
ExpressionsScopeMap(interval_map.into_boxed_slice())
}
}

View file

@ -1,7 +1,7 @@
use ruff_db::files::{File, FilePath};
use ruff_db::source::line_index;
use ruff_python_ast as ast;
use ruff_python_ast::{Expr, ExprRef, name::Name};
use ruff_python_ast::{Expr, ExprRef, HasNodeIndex, name::Name};
use ruff_source_file::LineIndex;
use crate::Db;
@ -126,7 +126,7 @@ impl<'db> SemanticModel<'db> {
// expression that we're in, then just
// fall back to the global scope.
None => Some(FileScopeId::global()),
Some(expr) => index.try_expression_scope_id(expr),
Some(expr) => index.try_expression_scope_id(&expr),
},
}) else {
return vec![];
@ -297,7 +297,7 @@ pub trait HasType {
impl HasType for ast::ExprRef<'_> {
fn inferred_type<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
let index = semantic_index(model.db, model.file);
let file_scope = index.expression_scope_id(*self);
let file_scope = index.expression_scope_id(self);
let scope = file_scope.to_scope_id(model.db, model.file);
infer_scope_types(model.db, scope).expression_type(*self)
@ -419,6 +419,18 @@ impl HasType for ast::Alias {
}
}
/// Implemented by types for which the semantic index tracks their scope.
pub(crate) trait HasTrackedScope: HasNodeIndex {}
impl HasTrackedScope for ast::Expr {}
impl HasTrackedScope for ast::ExprRef<'_> {}
impl HasTrackedScope for &ast::ExprRef<'_> {}
// See https://github.com/astral-sh/ty/issues/572 why this implementation exists
// even when we never register identifiers during semantic index building.
impl HasTrackedScope for ast::Identifier {}
#[cfg(test)]
mod tests {
use ruff_db::files::system_path_to_file;