Store expression hierarchy in semantic model snapshots (#6345)

## Summary

When we iterate over the AST for analysis, we often process nodes in a
"deferred" manner. For example, if we're analyzing a function, we push
the function body onto a deferred stack, along with a snapshot of the
current semantic model state. Later, when we analyze the body, we
restore the semantic model state from the snapshot. This ensures that we
know the correct scope, hierarchy of statement parents, etc., when we go
to analyze the function body.

Historically, we _haven't_ included the _expression_ hierarchy in the
model snapshot -- so we track the current expression parents in the
visitor, but we never save and restore them when processing deferred
nodes. This can lead to subtle bugs, in that methods like
`expr_parent()` aren't guaranteed to be correct, if you're in a deferred
visitor.

This PR migrates expression tracking to mirror statement tracking
exactly. So we push all expressions onto an `IndexVec`, and include the
current expression on the snapshot. This ensures that `expr_parent()`
and related methods are "always correct" rather than "sometimes
correct".

There's a performance cost here, both at runtime and in terms of memory
consumption (we now store an additional pointer for every expression).
In my hyperfine testing, it's about a 1% performance decrease for
all-rules on CPython (up to 533.8ms, from 528.3ms) and a 4% performance
decrease for default-rules on CPython (up to 212ms, from 204ms).
However... I think this is worth it given the incorrectness of our
current approach. In the future, we may want to reconsider how we do
these upward traversals (e.g., with something like a red-green tree).
(**Note**: in https://github.com/astral-sh/ruff/pull/6351, the slowdown
seems to be entirely removed.)
This commit is contained in:
Charlie Marsh 2023-08-07 09:42:04 -04:00 committed by GitHub
parent 5d2a4ebc99
commit 89e4e038b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 34 additions and 27 deletions

View file

@ -1696,11 +1696,7 @@ impl<'a> Checker<'a> {
return;
}
if self
.semantic
.expr_ancestors()
.any(|expr| expr.is_named_expr_expr())
{
if self.semantic.expr_ancestors().any(Expr::is_named_expr_expr) {
self.add_binding(
id,
expr.range(),

View file

@ -248,7 +248,6 @@ pub(crate) fn unittest_assertion(
// the assertion is part of a larger expression.
if checker.semantic().stmt().is_expr_stmt()
&& checker.semantic().expr_parent().is_none()
&& !checker.semantic().scope().kind.is_lambda()
&& !checker.indexer().comment_ranges().intersects(expr.range())
{
if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) {

View file

@ -84,14 +84,9 @@ pub(crate) fn inplace_argument(checker: &mut Checker, call: &ast::ExprCall) {
// the star argument _doesn't_ contain an override).
// 2. The call is part of a larger expression (we're converting an expression to a
// statement, and expressions can't contain statements).
// 3. The call is in a lambda (we can't assign to a variable in a lambda). This
// should be unnecessary, as lambdas are expressions, and so (2) should apply,
// but we don't currently restore expression stacks when parsing deferred nodes,
// and so the parent is lost.
if !seen_star
&& checker.semantic().stmt().is_expr_stmt()
&& checker.semantic().expr_parent().is_none()
&& !checker.semantic().scope().kind.is_lambda()
{
if let Some(fix) = convert_inplace_argument_to_assignment(
call,

View file

@ -68,7 +68,7 @@ pub(crate) fn compare_to_empty_string(
if checker
.semantic()
.expr_ancestors()
.any(|parent| parent.is_subscript_expr())
.any(Expr::is_subscript_expr)
{
return;
}

View file

@ -36,8 +36,11 @@ pub struct SemanticModel<'a> {
/// The identifier of the current statement.
stmt_id: Option<NodeId>,
/// Stack of current expressions.
exprs: Vec<&'a Expr>,
/// Stack of all visited expressions.
exprs: Nodes<'a, Expr>,
/// The identifier of the current expression.
expr_id: Option<NodeId>,
/// Stack of all scopes, along with the identifier of the current scope.
pub scopes: Scopes<'a>,
@ -131,7 +134,8 @@ impl<'a> SemanticModel<'a> {
module_path: module.path(),
stmts: Nodes::<Stmt>::default(),
stmt_id: None,
exprs: Vec::default(),
exprs: Nodes::<Expr>::default(),
expr_id: None,
scopes: Scopes::default(),
scope_id: ScopeId::global(),
definitions: Definitions::for_module(module),
@ -769,16 +773,15 @@ impl<'a> SemanticModel<'a> {
self.stmt_id = self.stmts.parent_id(node_id);
}
/// Push an [`Expr`] onto the stack.
/// Push a [`Expr`] onto the stack.
pub fn push_expr(&mut self, expr: &'a Expr) {
self.exprs.push(expr);
self.expr_id = Some(self.exprs.insert(expr, self.expr_id));
}
/// Pop the current [`Expr`] off the stack.
pub fn pop_expr(&mut self) {
self.exprs
.pop()
.expect("Attempted to pop without expression");
let node_id = self.expr_id.expect("Attempted to pop without expression");
self.expr_id = self.exprs.parent_id(node_id);
}
/// Push a [`Scope`] with the given [`ScopeKind`] onto the stack.
@ -822,22 +825,32 @@ impl<'a> SemanticModel<'a> {
/// Return the current `Expr`.
pub fn expr(&self) -> Option<&'a Expr> {
self.exprs.iter().last().copied()
let node_id = self.expr_id?;
Some(self.exprs[node_id])
}
/// Return the parent `Expr` of the current `Expr`.
/// Return the parent `Expr` of the current `Expr`, if any.
pub fn expr_parent(&self) -> Option<&'a Expr> {
self.exprs.iter().rev().nth(1).copied()
self.expr_ancestors().next()
}
/// Return the grandparent `Expr` of the current `Expr`.
/// Return the grandparent `Expr` of the current `Expr`, if any.
pub fn expr_grandparent(&self) -> Option<&'a Expr> {
self.exprs.iter().rev().nth(2).copied()
self.expr_ancestors().nth(1)
}
/// Return an [`Iterator`] over the current `Expr` parents.
pub fn expr_ancestors(&self) -> impl Iterator<Item = &&Expr> {
self.exprs.iter().rev().skip(1)
pub fn expr_ancestors(&self) -> impl Iterator<Item = &'a Expr> + '_ {
self.expr_id
.iter()
.copied()
.flat_map(|id| {
self.exprs
.ancestor_ids(id)
.skip(1)
.map(|id| &self.exprs[id])
})
.copied()
}
/// Returns a reference to the global scope
@ -1036,6 +1049,7 @@ impl<'a> SemanticModel<'a> {
Snapshot {
scope_id: self.scope_id,
stmt_id: self.stmt_id,
expr_id: self.expr_id,
definition_id: self.definition_id,
flags: self.flags,
}
@ -1046,11 +1060,13 @@ impl<'a> SemanticModel<'a> {
let Snapshot {
scope_id,
stmt_id,
expr_id,
definition_id,
flags,
} = snapshot;
self.scope_id = scope_id;
self.stmt_id = stmt_id;
self.expr_id = expr_id;
self.definition_id = definition_id;
self.flags = flags;
}
@ -1464,6 +1480,7 @@ impl SemanticModelFlags {
pub struct Snapshot {
scope_id: ScopeId,
stmt_id: Option<NodeId>,
expr_id: Option<NodeId>,
definition_id: DefinitionId,
flags: SemanticModelFlags,
}