Recursively visit deferred AST nodes (#9541)

## Summary

This PR is a more holistic fix for
https://github.com/astral-sh/ruff/issues/9534 and
https://github.com/astral-sh/ruff/issues/9159.

When we visit the AST, we track nodes that we need to visit _later_
(deferred nodes). For example, when visiting a function, we defer the
function body, since we don't want to visit the body until we've visited
the rest of the statements in the containing scope.

However, deferred nodes can themselves contain deferred nodes... For
example, a function body can contain a lambda (which contains a deferred
body). And then there are rarer cases, like a lambda inside of a type
annotation.

The aforementioned issues were fixed by reordering the deferral visits
to catch common cases. But even with those fixes, we still fail on cases
like:

```python
from __future__ import annotations

import re
from typing import cast

cast(lambda: re.match, 1)
```

Since we don't expect lambdas to appear inside of type definitions.

This PR modifies the `Checker` to keep visiting until all the deferred
stacks are empty. We _already_ do this for any one kind of deferred
node; now, we do it for _all_ of them at a level above.
This commit is contained in:
Charlie Marsh 2024-01-15 20:34:38 -05:00 committed by GitHub
parent da275b8572
commit f9331c7683
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 92 additions and 48 deletions

View file

@ -1,3 +1,5 @@
"""Test: ensure that we visit type definitions and lambdas recursively."""
from __future__ import annotations
from collections import Counter

View file

@ -0,0 +1,11 @@
"""Test: ensure that we visit type definitions and lambdas recursively."""
from __future__ import annotations
import re
import os
from typing import cast
cast(lambda: re.match, 1)
cast(lambda: cast(lambda: os.path, 1), 1)

View file

@ -6,8 +6,8 @@ use crate::rules::{flake8_bugbear, flake8_simplify, perflint, pyupgrade, refurb}
/// Run lint rules over all deferred for-loops in the [`SemanticModel`].
pub(crate) fn deferred_for_loops(checker: &mut Checker) {
while !checker.deferred.for_loops.is_empty() {
let for_loops = std::mem::take(&mut checker.deferred.for_loops);
while !checker.analyze.for_loops.is_empty() {
let for_loops = std::mem::take(&mut checker.analyze.for_loops);
for snapshot in for_loops {
checker.semantic.restore(snapshot);

View file

@ -6,8 +6,8 @@ use crate::rules::{flake8_pie, pylint, refurb};
/// Run lint rules over all deferred lambdas in the [`SemanticModel`].
pub(crate) fn deferred_lambdas(checker: &mut Checker) {
while !checker.deferred.lambdas.is_empty() {
let lambdas = std::mem::take(&mut checker.deferred.lambdas);
while !checker.analyze.lambdas.is_empty() {
let lambdas = std::mem::take(&mut checker.analyze.lambdas);
for snapshot in lambdas {
checker.semantic.restore(snapshot);

View file

@ -77,7 +77,7 @@ pub(crate) fn deferred_scopes(checker: &mut Checker) {
};
let mut diagnostics: Vec<Diagnostic> = vec![];
for scope_id in checker.deferred.scopes.iter().rev().copied() {
for scope_id in checker.analyze.scopes.iter().rev().copied() {
let scope = &checker.semantic.scopes[scope_id];
if checker.enabled(Rule::UndefinedLocal) {

View file

@ -1264,7 +1264,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
Rule::UnusedLoopControlVariable,
Rule::YieldInForLoop,
]) {
checker.deferred.for_loops.push(checker.semantic.snapshot());
checker.analyze.for_loops.push(checker.semantic.snapshot());
}
if checker.enabled(Rule::LoopVariableOverridesIterator) {
flake8_bugbear::rules::loop_variable_overrides_iterator(checker, target, iter);

View file

@ -2,16 +2,34 @@ use ruff_python_ast::Expr;
use ruff_python_semantic::{ScopeId, Snapshot};
use ruff_text_size::TextRange;
/// A collection of AST nodes that are deferred for later analysis.
/// Used to, e.g., store functions, whose bodies shouldn't be analyzed until all
/// module-level definitions have been analyzed.
/// A collection of AST nodes that are deferred for later visitation. Used to, e.g., store
/// functions, whose bodies shouldn't be visited until all module-level definitions have been
/// visited.
#[derive(Debug, Default)]
pub(crate) struct Deferred<'a> {
pub(crate) scopes: Vec<ScopeId>,
pub(crate) struct Visit<'a> {
pub(crate) string_type_definitions: Vec<(TextRange, &'a str, Snapshot)>,
pub(crate) future_type_definitions: Vec<(&'a Expr, Snapshot)>,
pub(crate) type_param_definitions: Vec<(&'a Expr, Snapshot)>,
pub(crate) functions: Vec<Snapshot>,
pub(crate) lambdas: Vec<Snapshot>,
}
impl Visit<'_> {
/// Returns `true` if there are no deferred nodes.
pub(crate) fn is_empty(&self) -> bool {
self.string_type_definitions.is_empty()
&& self.future_type_definitions.is_empty()
&& self.type_param_definitions.is_empty()
&& self.functions.is_empty()
&& self.lambdas.is_empty()
}
}
/// A collection of AST nodes to be analyzed after the AST traversal. Used to, e.g., store
/// all `for` loops, so that they can be analyzed after the entire AST has been visited.
#[derive(Debug, Default)]
pub(crate) struct Analyze {
pub(crate) scopes: Vec<ScopeId>,
pub(crate) lambdas: Vec<Snapshot>,
pub(crate) for_loops: Vec<Snapshot>,
}

View file

@ -52,14 +52,13 @@ use ruff_python_parser::typing::{parse_type_annotation, AnnotationKind};
use ruff_python_semantic::analyze::{imports, typing, visibility};
use ruff_python_semantic::{
BindingFlags, BindingId, BindingKind, Exceptions, Export, FromImport, Globals, Import, Module,
ModuleKind, NodeId, ScopeId, ScopeKind, SemanticModel, SemanticModelFlags, Snapshot,
StarImport, SubmoduleImport,
ModuleKind, NodeId, ScopeId, ScopeKind, SemanticModel, SemanticModelFlags, StarImport,
SubmoduleImport,
};
use ruff_python_stdlib::builtins::{IPYTHON_BUILTINS, MAGIC_GLOBALS, PYTHON_BUILTINS};
use ruff_source_file::{Locator, OneIndexed, SourceRow};
use crate::checkers::ast::annotation::AnnotationContext;
use crate::checkers::ast::deferred::Deferred;
use crate::docstrings::extraction::ExtractionTarget;
use crate::importer::Importer;
use crate::noqa::NoqaMapping;
@ -105,8 +104,10 @@ pub(crate) struct Checker<'a> {
importer: Importer<'a>,
/// The [`SemanticModel`], built up over the course of the AST traversal.
semantic: SemanticModel<'a>,
/// A set of deferred nodes to be processed after the current traversal (e.g., function bodies).
deferred: Deferred<'a>,
/// A set of deferred nodes to be visited after the current traversal (e.g., function bodies).
visit: deferred::Visit<'a>,
/// A set of deferred nodes to be analyzed after the AST traversal (e.g., `for` loops).
analyze: deferred::Analyze,
/// The cumulative set of diagnostics computed across all lint rules.
pub(crate) diagnostics: Vec<Diagnostic>,
/// The list of names already seen by flake8-bugbear diagnostics, to avoid duplicate violations..
@ -145,7 +146,8 @@ impl<'a> Checker<'a> {
indexer,
importer,
semantic: SemanticModel::new(&settings.typing_modules, path, module),
deferred: Deferred::default(),
visit: deferred::Visit::default(),
analyze: deferred::Analyze::default(),
diagnostics: Vec::default(),
flake8_bugbear_seen: Vec::default(),
cell_offsets,
@ -596,7 +598,7 @@ where
self.semantic.push_scope(ScopeKind::Function(function_def));
self.semantic.flags -= SemanticModelFlags::EXCEPTION_HANDLER;
self.deferred.functions.push(self.semantic.snapshot());
self.visit.functions.push(self.semantic.snapshot());
// Extract any global bindings from the function body.
if let Some(globals) = Globals::from_body(body) {
@ -652,7 +654,7 @@ where
if let Some(type_params) = type_params {
self.visit_type_params(type_params);
}
self.deferred
self.visit
.type_param_definitions
.push((value, self.semantic.snapshot()));
self.semantic.pop_scope();
@ -785,7 +787,7 @@ where
match stmt {
Stmt::FunctionDef(ast::StmtFunctionDef { name, .. }) => {
let scope_id = self.semantic.scope_id;
self.deferred.scopes.push(scope_id);
self.analyze.scopes.push(scope_id);
self.semantic.pop_scope(); // Function scope
self.semantic.pop_definition();
self.semantic.pop_scope(); // Type parameter scope
@ -798,7 +800,7 @@ where
}
Stmt::ClassDef(ast::StmtClassDef { name, .. }) => {
let scope_id = self.semantic.scope_id;
self.deferred.scopes.push(scope_id);
self.analyze.scopes.push(scope_id);
self.semantic.pop_scope(); // Class scope
self.semantic.pop_definition();
self.semantic.pop_scope(); // Type parameter scope
@ -835,13 +837,13 @@ where
&& self.semantic.future_annotations()
{
if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = expr {
self.deferred.string_type_definitions.push((
self.visit.string_type_definitions.push((
expr.range(),
value.to_str(),
self.semantic.snapshot(),
));
} else {
self.deferred
self.visit
.future_type_definitions
.push((expr, self.semantic.snapshot()));
}
@ -945,7 +947,8 @@ where
}
self.semantic.push_scope(ScopeKind::Lambda(lambda));
self.deferred.lambdas.push(self.semantic.snapshot());
self.visit.lambdas.push(self.semantic.snapshot());
self.analyze.lambdas.push(self.semantic.snapshot());
}
Expr::IfExp(ast::ExprIfExp {
test,
@ -1252,7 +1255,7 @@ where
}
Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => {
if self.semantic.in_type_definition() && !self.semantic.in_typing_literal() {
self.deferred.string_type_definitions.push((
self.visit.string_type_definitions.push((
expr.range(),
value.to_str(),
self.semantic.snapshot(),
@ -1273,7 +1276,7 @@ where
| Expr::ListComp(_)
| Expr::DictComp(_)
| Expr::SetComp(_) => {
self.deferred.scopes.push(self.semantic.scope_id);
self.analyze.scopes.push(self.semantic.scope_id);
self.semantic.pop_scope();
}
_ => {}
@ -1447,7 +1450,7 @@ where
bound: Some(bound), ..
}) = type_param
{
self.deferred
self.visit
.type_param_definitions
.push((bound, self.semantic.snapshot()));
}
@ -1809,8 +1812,8 @@ impl<'a> Checker<'a> {
fn visit_deferred_future_type_definitions(&mut self) {
let snapshot = self.semantic.snapshot();
while !self.deferred.future_type_definitions.is_empty() {
let type_definitions = std::mem::take(&mut self.deferred.future_type_definitions);
while !self.visit.future_type_definitions.is_empty() {
let type_definitions = std::mem::take(&mut self.visit.future_type_definitions);
for (expr, snapshot) in type_definitions {
self.semantic.restore(snapshot);
@ -1824,8 +1827,8 @@ impl<'a> Checker<'a> {
fn visit_deferred_type_param_definitions(&mut self) {
let snapshot = self.semantic.snapshot();
while !self.deferred.type_param_definitions.is_empty() {
let type_params = std::mem::take(&mut self.deferred.type_param_definitions);
while !self.visit.type_param_definitions.is_empty() {
let type_params = std::mem::take(&mut self.visit.type_param_definitions);
for (type_param, snapshot) in type_params {
self.semantic.restore(snapshot);
@ -1839,8 +1842,8 @@ impl<'a> Checker<'a> {
fn visit_deferred_string_type_definitions(&mut self, allocator: &'a typed_arena::Arena<Expr>) {
let snapshot = self.semantic.snapshot();
while !self.deferred.string_type_definitions.is_empty() {
let type_definitions = std::mem::take(&mut self.deferred.string_type_definitions);
while !self.visit.string_type_definitions.is_empty() {
let type_definitions = std::mem::take(&mut self.visit.string_type_definitions);
for (range, value, snapshot) in type_definitions {
if let Ok((expr, kind)) =
parse_type_annotation(value, range, self.locator.contents())
@ -1887,8 +1890,8 @@ impl<'a> Checker<'a> {
fn visit_deferred_functions(&mut self) {
let snapshot = self.semantic.snapshot();
while !self.deferred.functions.is_empty() {
let deferred_functions = std::mem::take(&mut self.deferred.functions);
while !self.visit.functions.is_empty() {
let deferred_functions = std::mem::take(&mut self.visit.functions);
for snapshot in deferred_functions {
self.semantic.restore(snapshot);
@ -1906,11 +1909,12 @@ impl<'a> Checker<'a> {
self.semantic.restore(snapshot);
}
/// Visit all deferred lambdas. Returns a list of snapshots, such that the caller can restore
/// the semantic model to the state it was in before visiting the deferred lambdas.
fn visit_deferred_lambdas(&mut self) {
let snapshot = self.semantic.snapshot();
let mut deferred: Vec<Snapshot> = Vec::with_capacity(self.deferred.lambdas.len());
while !self.deferred.lambdas.is_empty() {
let lambdas = std::mem::take(&mut self.deferred.lambdas);
while !self.visit.lambdas.is_empty() {
let lambdas = std::mem::take(&mut self.visit.lambdas);
for snapshot in lambdas {
self.semantic.restore(snapshot);
@ -1927,15 +1931,23 @@ impl<'a> Checker<'a> {
self.visit_parameters(parameters);
}
self.visit_expr(body);
deferred.push(snapshot);
}
}
// Reset the deferred lambdas, so we can analyze them later on.
self.deferred.lambdas = deferred;
self.semantic.restore(snapshot);
}
/// Recursively visit all deferred AST nodes, including lambdas, functions, and type
/// annotations.
fn visit_deferred(&mut self, allocator: &'a typed_arena::Arena<Expr>) {
while !self.visit.is_empty() {
self.visit_deferred_functions();
self.visit_deferred_type_param_definitions();
self.visit_deferred_lambdas();
self.visit_deferred_future_type_definitions();
self.visit_deferred_string_type_definitions(allocator);
}
}
/// Run any lint rules that operate over the module exports (i.e., members of `__all__`).
fn visit_exports(&mut self) {
let snapshot = self.semantic.snapshot();
@ -2043,12 +2055,8 @@ pub(crate) fn check_ast(
// Visit any deferred syntax nodes. Take care to visit in order, such that we avoid adding
// new deferred nodes after visiting nodes of that kind. For example, visiting a deferred
// function can add a deferred lambda, but the opposite is not true.
checker.visit_deferred_functions();
checker.visit_deferred_type_param_definitions();
checker.visit_deferred_lambdas();
checker.visit_deferred_future_type_definitions();
let allocator = typed_arena::Arena::new();
checker.visit_deferred_string_type_definitions(&allocator);
checker.visit_deferred(&allocator);
checker.visit_exports();
// Check docstrings, bindings, and unresolved references.
@ -2060,7 +2068,7 @@ pub(crate) fn check_ast(
// Reset the scope to module-level, and check all consumed scopes.
checker.semantic.scope_id = ScopeId::global();
checker.deferred.scopes.push(ScopeId::global());
checker.analyze.scopes.push(ScopeId::global());
analyze::deferred_scopes(&mut checker);
checker.diagnostics

View file

@ -54,6 +54,7 @@ mod tests {
#[test_case(Rule::UnusedImport, Path::new("F401_19.py"))]
#[test_case(Rule::UnusedImport, Path::new("F401_20.py"))]
#[test_case(Rule::UnusedImport, Path::new("F401_21.py"))]
#[test_case(Rule::UnusedImport, Path::new("F401_22.py"))]
#[test_case(Rule::ImportShadowedByLoopVar, Path::new("F402.py"))]
#[test_case(Rule::ImportShadowedByLoopVar, Path::new("F402.ipynb"))]
#[test_case(Rule::UndefinedLocalWithImportStar, Path::new("F403.py"))]

View file

@ -0,0 +1,4 @@
---
source: crates/ruff_linter/src/rules/pyflakes/mod.rs
---