Use separate structs for expression and statement tracking (#6351)

## Summary

This PR fixes the performance degradation introduced in
https://github.com/astral-sh/ruff/pull/6345. Instead of using the
generic `Nodes` structs, we now use separate `Statement` and
`Expression` structs. Importantly, we can avoid tracking a bunch of
state for expressions that we need for parents: we don't need to track
reference-to-ID pointers (we just have no use-case for this -- I'd
actually like to remove this from statements too, but we need it for
branch detection right now), we don't need to track depth, etc.

In my testing, this entirely removes the regression on all-rules, and
gets us down to 2ms slower on the default rules (as a crude hyperfine
benchmark, so this is within margin of error IMO).

No behavioral changes.
This commit is contained in:
Charlie Marsh 2023-08-07 11:27:42 -04:00 committed by GitHub
parent 61d3977f95
commit b21abe0a57
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 223 additions and 157 deletions

View file

@ -0,0 +1,58 @@
use std::ops::Index;
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast::Expr;
/// Id uniquely identifying an expression in a program.
///
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max
/// `u32::max` and it is impossible to have more nodes than characters in the file. We use a
/// `NonZeroU32` to take advantage of memory layout optimizations.
#[newtype_index]
#[derive(Ord, PartialOrd)]
pub struct ExpressionId;
/// An [`Expr`] AST node in a program, along with a pointer to its parent expression (if any).
#[derive(Debug)]
struct ExpressionWithParent<'a> {
/// A pointer to the AST node.
node: &'a Expr,
/// The ID of the parent of this node, if any.
parent: Option<ExpressionId>,
}
/// The nodes of a program indexed by [`ExpressionId`]
#[derive(Debug, Default)]
pub struct Expressions<'a> {
nodes: IndexVec<ExpressionId, ExpressionWithParent<'a>>,
}
impl<'a> Expressions<'a> {
/// Inserts a new expression into the node tree and returns its unique id.
pub(crate) fn insert(&mut self, node: &'a Expr, parent: Option<ExpressionId>) -> ExpressionId {
self.nodes.push(ExpressionWithParent { node, parent })
}
/// Return the [`ExpressionId`] of the parent node.
#[inline]
pub fn parent_id(&self, node_id: ExpressionId) -> Option<ExpressionId> {
self.nodes[node_id].parent
}
/// Returns an iterator over all [`ExpressionId`] ancestors, starting from the given [`ExpressionId`].
pub(crate) fn ancestor_ids(
&self,
node_id: ExpressionId,
) -> impl Iterator<Item = ExpressionId> + '_ {
std::iter::successors(Some(node_id), |&node_id| self.nodes[node_id].parent)
}
}
impl<'a> Index<ExpressionId> for Expressions<'a> {
type Output = &'a Expr;
#[inline]
fn index(&self, index: ExpressionId) -> &Self::Output {
&self.nodes[index].node
}
}