Replace parents statement stack with a Nodes abstraction (#4233)

This commit is contained in:
Charlie Marsh 2023-05-06 12:12:41 -04:00 committed by GitHub
parent 2c91412321
commit c1f0661225
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 362 additions and 279 deletions

View file

@ -0,0 +1,104 @@
use std::cmp::Ordering;
use ruff_python_ast::types::RefEquality;
use rustpython_parser::ast::ExcepthandlerKind::ExceptHandler;
use rustpython_parser::ast::{Stmt, StmtKind};
use crate::node::Nodes;
/// Return the common ancestor of `left` and `right` below `stop`, or `None`.
fn common_ancestor<'a>(
left: &'a Stmt,
right: &'a Stmt,
stop: Option<&'a Stmt>,
node_tree: &Nodes<'a>,
) -> Option<&'a Stmt> {
if stop.map_or(false, |stop| {
RefEquality(left) == RefEquality(stop) || RefEquality(right) == RefEquality(stop)
}) {
return None;
}
if RefEquality(left) == RefEquality(right) {
return Some(left);
}
let left_id = node_tree.node_id(left)?;
let right_id = node_tree.node_id(right)?;
let left_depth = node_tree.depth(left_id);
let right_depth = node_tree.depth(right_id);
match left_depth.cmp(&right_depth) {
Ordering::Less => {
let right_id = node_tree.parent_id(right_id)?;
common_ancestor(left, node_tree[right_id], stop, node_tree)
}
Ordering::Equal => {
let left_id = node_tree.parent_id(left_id)?;
let right_id = node_tree.parent_id(right_id)?;
common_ancestor(node_tree[left_id], node_tree[right_id], stop, node_tree)
}
Ordering::Greater => {
let left_id = node_tree.parent_id(left_id)?;
common_ancestor(node_tree[left_id], right, stop, node_tree)
}
}
}
/// Return the alternative branches for a given node.
fn alternatives(stmt: &Stmt) -> Vec<Vec<&Stmt>> {
match &stmt.node {
StmtKind::If { body, .. } => vec![body.iter().collect()],
StmtKind::Try {
body,
handlers,
orelse,
..
}
| StmtKind::TryStar {
body,
handlers,
orelse,
..
} => vec![body.iter().chain(orelse.iter()).collect()]
.into_iter()
.chain(handlers.iter().map(|handler| {
let ExceptHandler { body, .. } = &handler.node;
body.iter().collect()
}))
.collect(),
StmtKind::Match { cases, .. } => cases
.iter()
.map(|case| case.body.iter().collect())
.collect(),
_ => vec![],
}
}
/// Return `true` if `stmt` is a descendent of any of the nodes in `ancestors`.
fn descendant_of<'a>(
stmt: &'a Stmt,
ancestors: &[&'a Stmt],
stop: &'a Stmt,
node_tree: &Nodes<'a>,
) -> bool {
ancestors
.iter()
.any(|ancestor| common_ancestor(stmt, ancestor, Some(stop), node_tree).is_some())
}
/// Return `true` if `left` and `right` are on different branches of an `if` or
/// `try` statement.
pub fn different_forks<'a>(left: &'a Stmt, right: &'a Stmt, node_tree: &Nodes<'a>) -> bool {
if let Some(ancestor) = common_ancestor(left, right, None, node_tree) {
for items in alternatives(ancestor) {
let l = descendant_of(left, &items, ancestor, node_tree);
let r = descendant_of(right, &items, ancestor, node_tree);
if l ^ r {
return true;
}
}
}
false
}

View file

@ -1,3 +1,4 @@
pub mod branch_detection;
pub mod function_type;
pub mod logging;
pub mod typing;

View file

@ -5,8 +5,6 @@ use bitflags::bitflags;
use ruff_text_size::TextRange;
use rustpython_parser::ast::Stmt;
use ruff_python_ast::types::RefEquality;
use crate::scope::ScopeId;
#[derive(Debug, Clone)]
@ -16,7 +14,7 @@ pub struct Binding<'a> {
/// The context in which the binding was created.
pub context: ExecutionContext,
/// The statement in which the [`Binding`] was defined.
pub source: Option<RefEquality<'a, Stmt>>,
pub source: Option<&'a Stmt>,
/// Tuple of (scope index, range) indicating the scope and range at which
/// the binding was last used in a runtime context.
pub runtime_usage: Option<(ScopeId, TextRange)>,

View file

@ -1,7 +1,6 @@
use std::path::Path;
use nohash_hasher::{BuildNoHashHasher, IntMap};
use rustc_hash::FxHashMap;
use rustpython_parser::ast::{Expr, Stmt};
use smallvec::smallvec;
@ -17,26 +16,26 @@ use crate::binding::{
Binding, BindingId, BindingKind, Bindings, Exceptions, ExecutionContext, FromImportation,
Importation, SubmoduleImportation,
};
use crate::node::{NodeId, Nodes};
use crate::scope::{Scope, ScopeId, ScopeKind, Scopes};
#[allow(clippy::struct_excessive_bools)]
pub struct Context<'a> {
pub typing_modules: &'a [String],
pub module_path: Option<Vec<String>>,
// Retain all scopes and parent nodes, along with a stack of indices to track which are active
// at various points in time.
pub parents: Vec<RefEquality<'a, Stmt>>,
pub depths: FxHashMap<RefEquality<'a, Stmt>, usize>,
pub child_to_parent: FxHashMap<RefEquality<'a, Stmt>, RefEquality<'a, Stmt>>,
// Stack of all visited statements, along with the identifier of the current statement.
pub stmts: Nodes<'a>,
pub stmt_id: Option<NodeId>,
// Stack of all scopes, along with the identifier of the current scope.
pub scopes: Scopes<'a>,
pub scope_id: ScopeId,
pub dead_scopes: Vec<ScopeId>,
// A stack of all bindings created in any scope, at any point in execution.
pub bindings: Bindings<'a>,
// Map from binding index to indexes of bindings that shadow it in other scopes.
pub shadowed_bindings:
std::collections::HashMap<BindingId, Vec<BindingId>, BuildNoHashHasher<BindingId>>,
pub exprs: Vec<RefEquality<'a, Expr>>,
pub scopes: Scopes<'a>,
pub scope_id: ScopeId,
pub dead_scopes: Vec<ScopeId>,
// Body iteration; used to peek at siblings.
pub body: &'a [Stmt],
pub body_index: usize,
@ -68,15 +67,14 @@ impl<'a> Context<'a> {
Self {
typing_modules,
module_path,
parents: Vec::default(),
depths: FxHashMap::default(),
child_to_parent: FxHashMap::default(),
bindings: Bindings::default(),
shadowed_bindings: IntMap::default(),
exprs: Vec::default(),
stmts: Nodes::default(),
stmt_id: None,
scopes: Scopes::default(),
scope_id: ScopeId::global(),
dead_scopes: Vec::default(),
bindings: Bindings::default(),
shadowed_bindings: IntMap::default(),
exprs: Vec::default(),
body: &[],
body_index: 0,
visible_scope: VisibleScope {
@ -254,10 +252,7 @@ impl<'a> Context<'a> {
.take(scope_index)
.all(|scope| scope.get(name).is_none())
{
return Some((
binding.source.as_ref().unwrap().into(),
format!("{name}.{member}"),
));
return Some((binding.source.unwrap(), format!("{name}.{member}")));
}
}
}
@ -273,10 +268,7 @@ impl<'a> Context<'a> {
.take(scope_index)
.all(|scope| scope.get(name).is_none())
{
return Some((
binding.source.as_ref().unwrap().into(),
(*name).to_string(),
));
return Some((binding.source.unwrap(), (*name).to_string()));
}
}
}
@ -291,10 +283,7 @@ impl<'a> Context<'a> {
.take(scope_index)
.all(|scope| scope.get(name).is_none())
{
return Some((
binding.source.as_ref().unwrap().into(),
format!("{name}.{member}"),
));
return Some((binding.source.unwrap(), format!("{name}.{member}")));
}
}
}
@ -306,18 +295,15 @@ impl<'a> Context<'a> {
})
}
pub fn push_parent(&mut self, parent: &'a Stmt) {
let num_existing = self.parents.len();
self.parents.push(RefEquality(parent));
self.depths.insert(self.parents[num_existing], num_existing);
if num_existing > 0 {
self.child_to_parent
.insert(self.parents[num_existing], self.parents[num_existing - 1]);
}
/// Push a [`Stmt`] onto the stack.
pub fn push_stmt(&mut self, stmt: &'a Stmt) {
self.stmt_id = Some(self.stmts.insert(stmt, self.stmt_id));
}
pub fn pop_parent(&mut self) {
self.parents.pop().expect("Attempted to pop without parent");
/// Pop the current [`Stmt`] off the stack.
pub fn pop_stmt(&mut self) {
let node_id = self.stmt_id.expect("Attempted to pop without statement");
self.stmt_id = self.stmts.parent_id(node_id);
}
pub fn push_expr(&mut self, expr: &'a Expr) {
@ -345,13 +331,16 @@ impl<'a> Context<'a> {
}
/// Return the current `Stmt`.
pub fn current_stmt(&self) -> &RefEquality<'a, Stmt> {
self.parents.iter().rev().next().expect("No parent found")
pub fn current_stmt(&self) -> &'a Stmt {
let node_id = self.stmt_id.expect("No current statement");
self.stmts[node_id]
}
/// Return the parent `Stmt` of the current `Stmt`, if any.
pub fn current_stmt_parent(&self) -> Option<&RefEquality<'a, Stmt>> {
self.parents.iter().rev().nth(1)
pub fn current_stmt_parent(&self) -> Option<&'a Stmt> {
let node_id = self.stmt_id.expect("No current statement");
let parent_id = self.stmts.parent_id(node_id)?;
Some(self.stmts[parent_id])
}
/// Return the parent `Expr` of the current `Expr`.
@ -399,6 +388,11 @@ impl<'a> Context<'a> {
self.scopes.ancestors(self.scope_id)
}
pub fn parents(&self) -> impl Iterator<Item = &Stmt> + '_ {
let node_id = self.stmt_id.expect("No current statement");
self.stmts.ancestor_ids(node_id).map(|id| self.stmts[id])
}
/// Returns `true` if the context is in an exception handler.
pub const fn in_exception_handler(&self) -> bool {
self.in_exception_handler

View file

@ -1,4 +1,5 @@
pub mod analyze;
pub mod binding;
pub mod context;
pub mod node;
pub mod scope;

View file

@ -0,0 +1,112 @@
use std::num::{NonZeroU32, TryFromIntError};
use std::ops::{Index, IndexMut};
use rustc_hash::FxHashMap;
use rustpython_parser::ast::Stmt;
use ruff_python_ast::types::RefEquality;
/// Id uniquely identifying a statement in a program.
///
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max`
/// and it is impossible to have more statements than characters in the file. We use a `NonZeroU32` to
/// take advantage of memory layout optimizations.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub struct NodeId(NonZeroU32);
/// Convert a `usize` to a `NodeId` (by adding 1 to the value, and casting to `NonZeroU32`).
impl TryFrom<usize> for NodeId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(NonZeroU32::try_from(u32::try_from(value)? + 1)?))
}
}
/// Convert a `NodeId` to a `usize` (by subtracting 1 from the value, and casting to `usize`).
impl From<NodeId> for usize {
fn from(value: NodeId) -> Self {
value.0.get() as usize - 1
}
}
#[derive(Debug)]
struct Node<'a> {
/// The statement this node represents.
stmt: &'a Stmt,
/// The ID of the parent of this node, if any.
parent: Option<NodeId>,
/// The depth of this node in the tree.
depth: u32,
}
/// The nodes of a program indexed by [`NodeId`]
#[derive(Debug, Default)]
pub struct Nodes<'a> {
nodes: Vec<Node<'a>>,
node_to_id: FxHashMap<RefEquality<'a, Stmt>, NodeId>,
}
impl<'a> Nodes<'a> {
/// Inserts a new node into the node tree and returns its unique id.
///
/// Panics if a node with the same pointer already exists.
pub fn insert(&mut self, stmt: &'a Stmt, parent: Option<NodeId>) -> NodeId {
let next_id = NodeId::try_from(self.nodes.len()).unwrap();
if let Some(existing_id) = self.node_to_id.insert(RefEquality(stmt), next_id) {
panic!("Node already exists with id {existing_id:?}");
}
self.nodes.push(Node {
stmt,
parent,
depth: parent.map_or(0, |parent| self.nodes[usize::from(parent)].depth + 1),
});
next_id
}
/// Returns the [`NodeId`] of the given node.
#[inline]
pub fn node_id(&self, node: &'a Stmt) -> Option<NodeId> {
self.node_to_id.get(&RefEquality(node)).copied()
}
/// Return the [`NodeId`] of the parent node.
#[inline]
pub fn parent_id(&self, node_id: NodeId) -> Option<NodeId> {
self.nodes[usize::from(node_id)].parent
}
/// Return the depth of the node.
#[inline]
pub fn depth(&self, node_id: NodeId) -> u32 {
self.nodes[usize::from(node_id)].depth
}
/// Returns an iterator over all [`NodeId`] ancestors, starting from the given [`NodeId`].
pub fn ancestor_ids(&self, node_id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
std::iter::successors(Some(node_id), |&node_id| {
self.nodes[usize::from(node_id)].parent
})
}
/// Return the parent of the given node.
pub fn parent(&self, node: &'a Stmt) -> Option<&'a Stmt> {
let node_id = self.node_to_id.get(&RefEquality(node))?;
let parent_id = self.nodes[usize::from(*node_id)].parent?;
Some(self[parent_id])
}
}
impl<'a> Index<NodeId> for Nodes<'a> {
type Output = &'a Stmt;
fn index(&self, index: NodeId) -> &Self::Output {
&self.nodes[usize::from(index)].stmt
}
}
impl<'a> IndexMut<NodeId> for Nodes<'a> {
fn index_mut(&mut self, index: NodeId) -> &mut Self::Output {
&mut self.nodes[usize::from(index)].stmt
}
}