Use separate structs for expression and statement tracking (#6351)

## Summary This PR fixes the performance degradation introduced in https://github.com/astral-sh/ruff/pull/6345. Instead of using the generic `Nodes` structs, we now use separate `Statement` and `Expression` structs. Importantly, we can avoid tracking a bunch of state for expressions that we need for parents: we don't need to track reference-to-ID pointers (we just have no use-case for this -- I'd actually like to remove this from statements too, but we need it for branch detection right now), we don't need to track depth, etc. In my testing, this entirely removes the regression on all-rules, and gets us down to 2ms slower on the default rules (as a crude hyperfine benchmark, so this is within margin of error IMO). No behavioral changes.
2025-09-29 13:25:17 +00:00 · 2023-08-07 11:27:42 -04:00 · 2023-08-07 11:27:42 -04:00 · b21abe0a57
commit b21abe0a57
parent 61d3977f95
11 changed files with 223 additions and 157 deletions
--- a/crates/ruff_python_semantic/src/model.rs
+++ b/crates/ruff_python_semantic/src/model.rs
@ -17,13 +17,15 @@ use crate::binding::{
 };
 use crate::context::ExecutionContext;
 use crate::definition::{Definition, DefinitionId, Definitions, Member, Module};
+use crate::expressions::{ExpressionId, Expressions};
 use crate::globals::{Globals, GlobalsArena};
-use crate::node::{NodeId, Nodes};
 use crate::reference::{
-    ResolvedReference, ResolvedReferenceId, ResolvedReferences, UnresolvedReferences,
+    ResolvedReference, ResolvedReferenceId, ResolvedReferences, UnresolvedReference,
+    UnresolvedReferenceFlags, UnresolvedReferences,
 };
 use crate::scope::{Scope, ScopeId, ScopeKind, Scopes};
-use crate::{Imported, UnresolvedReference, UnresolvedReferenceFlags};
+use crate::statements::{StatementId, Statements};
+use crate::Imported;

 /// A semantic model for a Python module, to enable querying the module's semantic information.
 pub struct SemanticModel<'a> {
@ -31,16 +33,16 @@ pub struct SemanticModel<'a> {
    module_path: Option<&'a [String]>,

    /// Stack of all visited statements.
-    statements: Nodes<'a, Stmt>,
+    statements: Statements<'a>,

    /// The identifier of the current statement.
-    statement_id: Option<NodeId>,
+    statement_id: Option<StatementId>,

    /// Stack of all visited expressions.
-    expressions: Nodes<'a, Expr>,
+    expressions: Expressions<'a>,

    /// The identifier of the current expression.
-    expression_id: Option<NodeId>,
+    expression_id: Option<ExpressionId>,

    /// Stack of all scopes, along with the identifier of the current scope.
    pub scopes: Scopes<'a>,
@ -132,9 +134,9 @@ impl<'a> SemanticModel<'a> {
        Self {
            typing_modules,
            module_path: module.path(),
-            statements: Nodes::<Stmt>::default(),
+            statements: Statements::default(),
            statement_id: None,
-            expressions: Nodes::<Expr>::default(),
+            expressions: Expressions::default(),
            expression_id: None,
            scopes: Scopes::default(),
            scope_id: ScopeId::global(),
@ -919,20 +921,20 @@ impl<'a> SemanticModel<'a> {
        None
    }

-    /// Return the [`Nodes`] vector of all statements.
-    pub const fn statements(&self) -> &Nodes<'a, Stmt> {
+    /// Return the [`Statements`] vector of all statements.
+    pub const fn statements(&self) -> &Statements<'a> {
        &self.statements
    }

-    /// Return the [`NodeId`] corresponding to the given [`Stmt`].
+    /// Return the [`StatementId`] corresponding to the given [`Stmt`].
    #[inline]
-    pub fn statement_id(&self, statement: &Stmt) -> Option<NodeId> {
-        self.statements.node_id(statement)
+    pub fn statement_id(&self, statement: &Stmt) -> Option<StatementId> {
+        self.statements.statement_id(statement)
    }

-    /// Return the [`Stmt]` corresponding to the given [`NodeId`].
+    /// Return the [`Stmt]` corresponding to the given [`StatementId`].
    #[inline]
-    pub fn statement(&self, statement_id: NodeId) -> &'a Stmt {
+    pub fn statement(&self, statement_id: StatementId) -> &'a Stmt {
        self.statements[statement_id]
    }

@ -1519,8 +1521,8 @@ impl SemanticModelFlags {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Snapshot {
    scope_id: ScopeId,
-    stmt_id: Option<NodeId>,
-    expr_id: Option<NodeId>,
+    stmt_id: Option<StatementId>,
+    expr_id: Option<ExpressionId>,
    definition_id: DefinitionId,
    flags: SemanticModelFlags,
 }