mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-04 01:20:57 +00:00
Use a single node hierarchy to track statements and expressions (#6709)
## Summary This PR is a follow-up to the suggestion in https://github.com/astral-sh/ruff/pull/6345#discussion_r1285470953 to use a single stack to store all statements and expressions, rather than using separate vectors for each, which gives us something closer to a full-fidelity chain. (We can then generalize this concept to include all other AST nodes too.) This is in part made possible by the removal of the hash map from `&Stmt` to `StatementId` (#6694), which makes it much cheaper to store these using a single interface (since doing so no longer introduces the requirement that we hash all expressions). I'll follow-up with some profiling, but a few notes on how the data requirements have changed: - We now store a `BranchId` for every expression, not just every statement, so that's an extra `u32`. - We now store a single `NodeId` on every snapshot, rather than separate `StatementId` and `ExpressionId` IDs, so that's one fewer `u32` for each snapshot. - We're probably doing a few more lookups in general, since any calls to `current_statement()` etc. now have to iterate up the node hierarchy until they identify the first statement. ## Test Plan `cargo test`
This commit is contained in:
parent
abc5065fc7
commit
424b8d4ad2
12 changed files with 268 additions and 299 deletions
136
crates/ruff_python_semantic/src/nodes.rs
Normal file
136
crates/ruff_python_semantic/src/nodes.rs
Normal file
|
@ -0,0 +1,136 @@
|
|||
use std::ops::Index;
|
||||
|
||||
use ruff_index::{newtype_index, IndexVec};
|
||||
use ruff_python_ast::{Expr, Ranged, Stmt};
|
||||
use ruff_text_size::TextRange;
|
||||
|
||||
use crate::BranchId;
|
||||
|
||||
/// Id uniquely identifying an AST node in a program.
|
||||
///
|
||||
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max
|
||||
/// `u32::max` and it is impossible to have more nodes than characters in the file. We use a
|
||||
/// `NonZeroU32` to take advantage of memory layout optimizations.
|
||||
#[newtype_index]
|
||||
#[derive(Ord, PartialOrd)]
|
||||
pub struct NodeId;
|
||||
|
||||
/// An AST node in a program, along with a pointer to its parent node (if any).
|
||||
#[derive(Debug)]
|
||||
struct NodeWithParent<'a> {
|
||||
/// A pointer to the AST node.
|
||||
node: NodeRef<'a>,
|
||||
/// The ID of the parent of this node, if any.
|
||||
parent: Option<NodeId>,
|
||||
/// The branch ID of this node, if any.
|
||||
branch: Option<BranchId>,
|
||||
}
|
||||
|
||||
/// The nodes of a program indexed by [`NodeId`]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Nodes<'a> {
|
||||
nodes: IndexVec<NodeId, NodeWithParent<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Nodes<'a> {
|
||||
/// Inserts a new AST node into the tree and returns its unique ID.
|
||||
pub(crate) fn insert(
|
||||
&mut self,
|
||||
node: NodeRef<'a>,
|
||||
parent: Option<NodeId>,
|
||||
branch: Option<BranchId>,
|
||||
) -> NodeId {
|
||||
self.nodes.push(NodeWithParent {
|
||||
node,
|
||||
parent,
|
||||
branch,
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the [`NodeId`] of the parent node.
|
||||
#[inline]
|
||||
pub fn parent_id(&self, node_id: NodeId) -> Option<NodeId> {
|
||||
self.nodes[node_id].parent
|
||||
}
|
||||
|
||||
/// Return the [`BranchId`] of the branch node.
|
||||
#[inline]
|
||||
pub(crate) fn branch_id(&self, node_id: NodeId) -> Option<BranchId> {
|
||||
self.nodes[node_id].branch
|
||||
}
|
||||
|
||||
/// Returns an iterator over all [`NodeId`] ancestors, starting from the given [`NodeId`].
|
||||
pub(crate) fn ancestor_ids(&self, node_id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
|
||||
std::iter::successors(Some(node_id), |&node_id| self.nodes[node_id].parent)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Index<NodeId> for Nodes<'a> {
|
||||
type Output = NodeRef<'a>;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, index: NodeId) -> &Self::Output {
|
||||
&self.nodes[index].node
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to an AST node. Like [`ruff_python_ast::node::AnyNodeRef`], but wraps the node
|
||||
/// itself (like [`Stmt`]) rather than the narrowed type (like [`ruff_python_ast::StmtAssign`]).
|
||||
///
|
||||
/// TODO(charlie): Replace with [`ruff_python_ast::node::AnyNodeRef`]. This requires migrating
|
||||
/// the rest of the codebase to use [`ruff_python_ast::node::AnyNodeRef`] and related abstractions,
|
||||
/// like [`ruff_python_ast::ExpressionRef`] instead of [`Expr`].
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
pub enum NodeRef<'a> {
|
||||
Stmt(&'a Stmt),
|
||||
Expr(&'a Expr),
|
||||
}
|
||||
|
||||
impl<'a> NodeRef<'a> {
|
||||
/// Returns the [`Stmt`] if this is a statement, or `None` if the reference is to another
|
||||
/// kind of AST node.
|
||||
pub fn as_statement(&self) -> Option<&'a Stmt> {
|
||||
match self {
|
||||
NodeRef::Stmt(stmt) => Some(stmt),
|
||||
NodeRef::Expr(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [`Expr`] if this is a expression, or `None` if the reference is to another
|
||||
/// kind of AST node.
|
||||
pub fn as_expression(&self) -> Option<&'a Expr> {
|
||||
match self {
|
||||
NodeRef::Stmt(_) => None,
|
||||
NodeRef::Expr(expr) => Some(expr),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_statement(&self) -> bool {
|
||||
self.as_statement().is_some()
|
||||
}
|
||||
|
||||
pub fn is_expression(&self) -> bool {
|
||||
self.as_expression().is_some()
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for NodeRef<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
match self {
|
||||
NodeRef::Stmt(stmt) => stmt.range(),
|
||||
NodeRef::Expr(expr) => expr.range(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a Expr> for NodeRef<'a> {
|
||||
fn from(expr: &'a Expr) -> Self {
|
||||
NodeRef::Expr(expr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a Stmt> for NodeRef<'a> {
|
||||
fn from(stmt: &'a Stmt) -> Self {
|
||||
NodeRef::Stmt(stmt)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue