mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-11 06:08:34 +00:00
Use separate structs for expression and statement tracking (#6351)
## Summary This PR fixes the performance degradation introduced in https://github.com/astral-sh/ruff/pull/6345. Instead of using the generic `Nodes` structs, we now use separate `Statement` and `Expression` structs. Importantly, we can avoid tracking a bunch of state for expressions that we need for parents: we don't need to track reference-to-ID pointers (we just have no use-case for this -- I'd actually like to remove this from statements too, but we need it for branch detection right now), we don't need to track depth, etc. In my testing, this entirely removes the regression on all-rules, and gets us down to 2ms slower on the default rules (as a crude hyperfine benchmark, so this is within margin of error IMO). No behavioral changes.
This commit is contained in:
parent
61d3977f95
commit
b21abe0a57
11 changed files with 223 additions and 157 deletions
58
crates/ruff_python_semantic/src/expressions.rs
Normal file
58
crates/ruff_python_semantic/src/expressions.rs
Normal file
|
@ -0,0 +1,58 @@
|
|||
use std::ops::Index;
|
||||
|
||||
use ruff_index::{newtype_index, IndexVec};
|
||||
use ruff_python_ast::Expr;
|
||||
|
||||
/// Id uniquely identifying an expression in a program.
|
||||
///
|
||||
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max
|
||||
/// `u32::max` and it is impossible to have more nodes than characters in the file. We use a
|
||||
/// `NonZeroU32` to take advantage of memory layout optimizations.
|
||||
#[newtype_index]
|
||||
#[derive(Ord, PartialOrd)]
|
||||
pub struct ExpressionId;
|
||||
|
||||
/// An [`Expr`] AST node in a program, along with a pointer to its parent expression (if any).
|
||||
#[derive(Debug)]
|
||||
struct ExpressionWithParent<'a> {
|
||||
/// A pointer to the AST node.
|
||||
node: &'a Expr,
|
||||
/// The ID of the parent of this node, if any.
|
||||
parent: Option<ExpressionId>,
|
||||
}
|
||||
|
||||
/// The nodes of a program indexed by [`ExpressionId`]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Expressions<'a> {
|
||||
nodes: IndexVec<ExpressionId, ExpressionWithParent<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Expressions<'a> {
|
||||
/// Inserts a new expression into the node tree and returns its unique id.
|
||||
pub(crate) fn insert(&mut self, node: &'a Expr, parent: Option<ExpressionId>) -> ExpressionId {
|
||||
self.nodes.push(ExpressionWithParent { node, parent })
|
||||
}
|
||||
|
||||
/// Return the [`ExpressionId`] of the parent node.
|
||||
#[inline]
|
||||
pub fn parent_id(&self, node_id: ExpressionId) -> Option<ExpressionId> {
|
||||
self.nodes[node_id].parent
|
||||
}
|
||||
|
||||
/// Returns an iterator over all [`ExpressionId`] ancestors, starting from the given [`ExpressionId`].
|
||||
pub(crate) fn ancestor_ids(
|
||||
&self,
|
||||
node_id: ExpressionId,
|
||||
) -> impl Iterator<Item = ExpressionId> + '_ {
|
||||
std::iter::successors(Some(node_id), |&node_id| self.nodes[node_id].parent)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Index<ExpressionId> for Expressions<'a> {
|
||||
type Output = &'a Expr;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, index: ExpressionId) -> &Self::Output {
|
||||
&self.nodes[index].node
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue