[pylint] redefined-loop-name (W2901) (#3022)

Slightly broadens W2901 to cover `with` statements too.

Closes #2972.
This commit is contained in:
Matthew Lloyd 2023-02-21 22:23:47 -05:00 committed by GitHub
parent 9645790a8b
commit 97338e4cd6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 809 additions and 5 deletions

View file

@ -0,0 +1,155 @@
# For -> for, variable reused
for i in []:
for i in []: # error
pass
# With -> for, variable reused
with None as i:
for i in []: # error
pass
# For -> with, variable reused
for i in []:
with None as i: # error
pass
# With -> with, variable reused
with None as i:
with None as i: # error
pass
# For -> for, different variable
for i in []:
for j in []: # ok
pass
# With -> with, different variable
with None as i:
with None as j: # ok
pass
# For -> for -> for, doubly nested variable reuse
for i in []:
for j in []:
for i in []: # error
pass
# For -> for -> for -> for, doubly nested variable reuse x2
for i in []:
for j in []:
for i in []: # error
for j in []: # error
pass
# For -> assignment
for i in []:
i = 5 # error
# For -> augmented assignment
for i in []:
i += 5 # error
# For -> annotated assignment
for i in []:
i: int = 5 # error
# Async for -> for, variable reused
async for i in []:
for i in []: # error
pass
# For -> async for, variable reused
for i in []:
async for i in []: # error
pass
# For -> for, outer loop unpacks tuple
for i, j in enumerate([]):
for i in []: # error
pass
# For -> for, inner loop unpacks tuple
for i in []:
for i, j in enumerate([]): # error
pass
# For -> for, both loops unpack tuple
for (i, (j, k)) in []:
for i, j in enumerate([]): # two errors
pass
# For else -> for, variable reused in else
for i in []:
pass
else:
for i in []: # no error
pass
# For -> for, ignore dummy variables
for _ in []:
for _ in []: # no error
pass
# For -> for, outer loop unpacks with asterisk
for i, *j in []:
for j in []: # error
pass
# For -> function definition
for i in []:
def f():
i = 2 # no error
# For -> class definition
for i in []:
class A:
i = 2 # no error
# For -> function definition -> for -> assignment
for i in []:
def f():
for i in []: # no error
i = 2 # error
# For -> class definition -> for -> for
for i in []:
class A:
for i in []: # no error
for i in []: # error
pass
# For -> use in assignment target without actual assignment; subscript
for i in []:
a[i] = 2 # no error
i[a] = 2 # no error
# For -> use in assignment target without actual assignment; attribute
for i in []:
a.i = 2 # no error
i.a = 2 # no error
# For target with subscript -> assignment
for a[0] in []:
a[0] = 2 # error
a[1] = 2 # no error
# For target with subscript -> assignment
for a['i'] in []:
a['i'] = 2 # error
a['j'] = 2 # no error
# For target with attribute -> assignment
for a.i in []:
a.i = 2 # error
a.j = 2 # no error
# For target with double nested attribute -> assignment
for a.i.j in []:
a.i.j = 2 # error
a.j.i = 2 # no error
# For target with attribute -> assignment with different spacing
for a.i in []:
a. i = 2 # error
for a. i in []:
a.i = 2 # error

View file

@ -928,7 +928,10 @@ pub fn binding_range(binding: &Binding, locator: &Locator) -> Range {
}
// Return the ranges of `Name` tokens within a specified node.
pub fn find_names<T>(located: &Located<T>, locator: &Locator) -> Vec<Range> {
pub fn find_names<'a, T, U>(
located: &'a Located<T, U>,
locator: &'a Locator,
) -> impl Iterator<Item = Range> + 'a {
let contents = locator.slice(&Range::from_located(located));
lexer::make_tokenizer_located(contents, located.location)
.flatten()
@ -937,7 +940,6 @@ pub fn find_names<T>(located: &Located<T>, locator: &Locator) -> Vec<Range> {
location: start,
end_location: end,
})
.collect()
}
/// Return the `Range` of `name` in `Excepthandler`.

View file

@ -29,7 +29,7 @@ impl Range {
}
}
pub fn from_located<T>(located: &Located<T>) -> Self {
pub fn from_located<T, U>(located: &Located<T, U>) -> Self {
Range::new(located.location, located.end_location.unwrap())
}

View file

@ -342,7 +342,7 @@ where
match &stmt.node {
StmtKind::Global { names } => {
let scope_index = *self.scope_stack.last().expect("No current scope found");
let ranges = helpers::find_names(stmt, self.locator);
let ranges: Vec<Range> = helpers::find_names(stmt, self.locator).collect();
if scope_index != GLOBAL_SCOPE_INDEX {
// Add the binding to the current scope.
let context = self.execution_context();
@ -372,7 +372,7 @@ where
}
StmtKind::Nonlocal { names } => {
let scope_index = *self.scope_stack.last().expect("No current scope found");
let ranges = helpers::find_names(stmt, self.locator);
let ranges: Vec<Range> = helpers::find_names(stmt, self.locator).collect();
if scope_index != GLOBAL_SCOPE_INDEX {
let context = self.execution_context();
let scope = &mut self.scopes[scope_index];
@ -1651,6 +1651,9 @@ where
self.current_stmt_parent().map(Into::into),
);
}
if self.settings.rules.enabled(&Rule::RedefinedLoopName) {
pylint::rules::redefined_loop_name(self, &Node::Stmt(stmt));
}
}
StmtKind::While { body, orelse, .. } => {
if self.settings.rules.enabled(&Rule::FunctionUsesLoopVariable) {
@ -1695,6 +1698,9 @@ where
if self.settings.rules.enabled(&Rule::UselessElseOnLoop) {
pylint::rules::useless_else_on_loop(self, stmt, body, orelse);
}
if self.settings.rules.enabled(&Rule::RedefinedLoopName) {
pylint::rules::redefined_loop_name(self, &Node::Stmt(stmt));
}
if matches!(stmt.node, StmtKind::For { .. }) {
if self.settings.rules.enabled(&Rule::ReimplementedBuiltin) {
flake8_simplify::rules::convert_for_loop_to_any_all(

View file

@ -146,6 +146,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<Rule> {
(Pylint, "R0913") => Rule::TooManyArguments,
(Pylint, "R0912") => Rule::TooManyBranches,
(Pylint, "R0915") => Rule::TooManyStatements,
(Pylint, "W2901") => Rule::RedefinedLoopName,
// flake8-builtins
(Flake8Builtins, "001") => Rule::BuiltinVariableShadowing,

View file

@ -149,6 +149,7 @@ ruff_macros::register_rules!(
rules::pylint::rules::TooManyArguments,
rules::pylint::rules::TooManyBranches,
rules::pylint::rules::TooManyStatements,
rules::pylint::rules::RedefinedLoopName,
// flake8-builtins
rules::flake8_builtins::rules::BuiltinVariableShadowing,
rules::flake8_builtins::rules::BuiltinArgumentShadowing,

View file

@ -47,6 +47,7 @@ mod tests {
#[test_case(Rule::BidirectionalUnicode, Path::new("bidirectional_unicode.py"); "PLE2502")]
#[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"); "PLE01310")]
#[test_case(Rule::YieldInInit, Path::new("yield_in_init.py"); "PLE0100")]
#[test_case(Rule::RedefinedLoopName, Path::new("redefined_loop_name.py"); "PLW2901")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(

View file

@ -11,6 +11,7 @@ pub use magic_value_comparison::{magic_value_comparison, MagicValueComparison};
pub use merge_isinstance::{merge_isinstance, ConsiderMergingIsinstance};
pub use nonlocal_without_binding::NonlocalWithoutBinding;
pub use property_with_parameters::{property_with_parameters, PropertyWithParameters};
pub use redefined_loop_name::{redefined_loop_name, RedefinedLoopName};
pub use return_in_init::{return_in_init, ReturnInInit};
pub use too_many_arguments::{too_many_arguments, TooManyArguments};
pub use too_many_branches::{too_many_branches, TooManyBranches};
@ -40,6 +41,7 @@ mod magic_value_comparison;
mod merge_isinstance;
mod nonlocal_without_binding;
mod property_with_parameters;
mod redefined_loop_name;
mod return_in_init;
mod too_many_arguments;
mod too_many_branches;

View file

@ -0,0 +1,302 @@
use crate::ast::comparable::ComparableExpr;
use crate::ast::helpers::unparse_expr;
use crate::ast::types::{Node, Range};
use crate::ast::visitor;
use crate::ast::visitor::Visitor;
use crate::checkers::ast::Checker;
use crate::registry::Diagnostic;
use crate::settings::hashable::HashableRegex;
use crate::violation::Violation;
use ruff_macros::{define_violation, derive_message_formats};
use rustpython_parser::ast::{Expr, ExprContext, ExprKind, Stmt, StmtKind, Withitem};
use serde::{Deserialize, Serialize};
use std::{fmt, iter};
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy)]
pub enum BindingKind {
For,
With,
Assignment,
}
impl fmt::Display for BindingKind {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self {
BindingKind::For => fmt.write_str("for loop"),
BindingKind::With => fmt.write_str("with statement"),
BindingKind::Assignment => fmt.write_str("assignment"),
}
}
}
define_violation!(
/// ## What it does
/// Checks for variables defined in `for` loops and `with` statements that
/// get overwritten within the body, for example by another `for` loop or
/// `with` statement or by direct assignment.
///
/// ## Why is this bad?
/// Redefinition of a loop variable inside the loop's body causes its value
/// to differ from the original loop iteration for the remainder of the
/// block, in a way that will likely cause bugs.
///
/// In Python, unlike many other languages, `for` loops and `with`
/// statements don't define their own scopes. Therefore, a nested loop that
/// uses the same target variable name as an outer loop will reuse the same
/// actual variable, and the value from the last iteration will "leak out"
/// into the remainder of the enclosing loop.
///
/// While this mistake is easy to spot in small examples, it can be hidden
/// in larger blocks of code where the definition and redefinition of the
/// variable may not be visible at the same time.
///
/// ## Example
/// ```python
/// for i in range(10):
/// i = 9
/// print(i) # prints 9 every iteration
///
/// for i in range(10):
/// for i in range(10): # original value overwritten
/// pass
/// print(i) # also prints 9 every iteration
///
/// with path1.open() as f:
/// with path2.open() as f:
/// f = path2.open()
/// print(f.readline()) # prints a line from path2
/// ```
pub struct RedefinedLoopName {
pub name: String,
pub outer_kind: BindingKind,
pub inner_kind: BindingKind,
}
);
impl Violation for RedefinedLoopName {
#[derive_message_formats]
fn message(&self) -> String {
let RedefinedLoopName {
name,
outer_kind,
inner_kind,
} = self;
format!("Outer {outer_kind} variable `{name}` overwritten by inner {inner_kind} target")
}
}
struct ExprWithBindingKind<'a> {
expr: &'a Expr,
binding_kind: BindingKind,
}
struct InnerForWithAssignTargetsVisitor<'a> {
dummy_variable_rgx: &'a HashableRegex,
assignment_targets: Vec<ExprWithBindingKind<'a>>,
}
impl<'a, 'b> Visitor<'b> for InnerForWithAssignTargetsVisitor<'a>
where
'b: 'a,
{
fn visit_stmt(&mut self, stmt: &'b Stmt) {
// Collect target expressions.
match &stmt.node {
// For and async for.
StmtKind::For { target, .. } | StmtKind::AsyncFor { target, .. } => {
self.assignment_targets.extend(
assignment_targets_from_expr(target, self.dummy_variable_rgx).map(|expr| {
ExprWithBindingKind {
expr,
binding_kind: BindingKind::For,
}
}),
);
}
// With.
StmtKind::With { items, .. } => {
self.assignment_targets.extend(
assignment_targets_from_with_items(items, self.dummy_variable_rgx).map(
|expr| ExprWithBindingKind {
expr,
binding_kind: BindingKind::With,
},
),
);
}
// Assignment, augmented assignment, and annotated assignment.
StmtKind::Assign { targets, .. } => {
self.assignment_targets.extend(
assignment_targets_from_assign_targets(targets, self.dummy_variable_rgx).map(
|expr| ExprWithBindingKind {
expr,
binding_kind: BindingKind::Assignment,
},
),
);
}
StmtKind::AugAssign { target, .. } | StmtKind::AnnAssign { target, .. } => {
self.assignment_targets.extend(
assignment_targets_from_expr(target, self.dummy_variable_rgx).map(|expr| {
ExprWithBindingKind {
expr,
binding_kind: BindingKind::Assignment,
}
}),
);
}
_ => {}
}
// Decide whether to recurse.
match &stmt.node {
// Don't recurse into blocks that create a new scope.
StmtKind::ClassDef { .. } => {}
StmtKind::FunctionDef { .. } => {}
// Otherwise, do recurse.
_ => {
visitor::walk_stmt(self, stmt);
}
}
}
}
fn assignment_targets_from_expr<'a, U>(
expr: &'a Expr<U>,
dummy_variable_rgx: &'a HashableRegex,
) -> Box<dyn Iterator<Item = &'a Expr<U>> + 'a> {
// The Box is necessary to ensure the match arms have the same return type - we can't use
// a cast to "impl Iterator", since at the time of writing that is only allowed for
// return types and argument types.
match &expr.node {
ExprKind::Attribute {
ctx: ExprContext::Store,
..
} => Box::new(iter::once(expr)),
ExprKind::Subscript {
ctx: ExprContext::Store,
..
} => Box::new(iter::once(expr)),
ExprKind::Starred {
ctx: ExprContext::Store,
value,
..
} => Box::new(iter::once(&**value)),
ExprKind::Name {
ctx: ExprContext::Store,
id,
..
} => {
// Ignore dummy variables.
if dummy_variable_rgx.is_match(id) {
Box::new(iter::empty())
} else {
Box::new(iter::once(expr))
}
}
ExprKind::List {
ctx: ExprContext::Store,
elts,
..
} => Box::new(
elts.iter()
.flat_map(|elt| assignment_targets_from_expr(elt, dummy_variable_rgx)),
),
ExprKind::Tuple {
ctx: ExprContext::Store,
elts,
..
} => Box::new(
elts.iter()
.flat_map(|elt| assignment_targets_from_expr(elt, dummy_variable_rgx)),
),
_ => Box::new(iter::empty()),
}
}
fn assignment_targets_from_with_items<'a, U>(
items: &'a [Withitem<U>],
dummy_variable_rgx: &'a HashableRegex,
) -> impl Iterator<Item = &'a Expr<U>> + 'a {
items
.iter()
.filter_map(|item| {
item.optional_vars
.as_ref()
.map(|expr| assignment_targets_from_expr(&**expr, dummy_variable_rgx))
})
.flatten()
}
fn assignment_targets_from_assign_targets<'a, U>(
targets: &'a [Expr<U>],
dummy_variable_rgx: &'a HashableRegex,
) -> impl Iterator<Item = &'a Expr<U>> + 'a {
targets
.iter()
.flat_map(|target| assignment_targets_from_expr(target, dummy_variable_rgx))
}
/// PLW2901
pub fn redefined_loop_name<'a, 'b>(checker: &'a mut Checker<'b>, node: &Node<'b>) {
let (outer_assignment_targets, inner_assignment_targets) = match node {
Node::Stmt(stmt) => match &stmt.node {
// With.
StmtKind::With { items, body, .. } => {
let outer_assignment_targets: Vec<ExprWithBindingKind<'a>> =
assignment_targets_from_with_items(items, &checker.settings.dummy_variable_rgx)
.map(|expr| ExprWithBindingKind {
expr,
binding_kind: BindingKind::With,
})
.collect();
let mut visitor = InnerForWithAssignTargetsVisitor {
dummy_variable_rgx: &checker.settings.dummy_variable_rgx,
assignment_targets: vec![],
};
for stmt in body {
visitor.visit_stmt(stmt);
}
(outer_assignment_targets, visitor.assignment_targets)
}
// For and async for.
StmtKind::For { target, body, .. } | StmtKind::AsyncFor { target, body, .. } => {
let outer_assignment_targets: Vec<ExprWithBindingKind<'a>> =
assignment_targets_from_expr(target, &checker.settings.dummy_variable_rgx)
.map(|expr| ExprWithBindingKind {
expr,
binding_kind: BindingKind::For,
})
.collect();
let mut visitor = InnerForWithAssignTargetsVisitor {
dummy_variable_rgx: &checker.settings.dummy_variable_rgx,
assignment_targets: vec![],
};
for stmt in body {
visitor.visit_stmt(stmt);
}
(outer_assignment_targets, visitor.assignment_targets)
}
_ => panic!(
"redefined_loop_name called on Statement that is not a With, For, or AsyncFor"
),
},
Node::Expr(_) => panic!("redefined_loop_name called on Node that is not a Statement"),
};
for outer_assignment_target in &outer_assignment_targets {
for inner_assignment_target in &inner_assignment_targets {
// Compare the targets structurally.
if ComparableExpr::from(outer_assignment_target.expr)
.eq(&(ComparableExpr::from(inner_assignment_target.expr)))
{
checker.diagnostics.push(Diagnostic::new(
RedefinedLoopName {
name: unparse_expr(outer_assignment_target.expr, checker.stylist),
outer_kind: outer_assignment_target.binding_kind,
inner_kind: inner_assignment_target.binding_kind,
},
Range::from_located(inner_assignment_target.expr),
));
}
}
}
}

View file

@ -0,0 +1,330 @@
---
source: crates/ruff/src/rules/pylint/mod.rs
expression: diagnostics
---
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 3
column: 8
end_location:
row: 3
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: With
inner_kind: For
location:
row: 8
column: 8
end_location:
row: 8
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: With
location:
row: 13
column: 17
end_location:
row: 13
column: 18
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: With
inner_kind: With
location:
row: 18
column: 17
end_location:
row: 18
column: 18
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 34
column: 12
end_location:
row: 34
column: 13
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 40
column: 12
end_location:
row: 40
column: 13
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: j
outer_kind: For
inner_kind: For
location:
row: 41
column: 16
end_location:
row: 41
column: 17
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: Assignment
location:
row: 46
column: 4
end_location:
row: 46
column: 5
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: Assignment
location:
row: 50
column: 4
end_location:
row: 50
column: 5
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: Assignment
location:
row: 54
column: 4
end_location:
row: 54
column: 5
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 58
column: 8
end_location:
row: 58
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 63
column: 14
end_location:
row: 63
column: 15
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 68
column: 8
end_location:
row: 68
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 73
column: 8
end_location:
row: 73
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 78
column: 8
end_location:
row: 78
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: j
outer_kind: For
inner_kind: For
location:
row: 78
column: 11
end_location:
row: 78
column: 12
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: j
outer_kind: For
inner_kind: For
location:
row: 95
column: 8
end_location:
row: 95
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: Assignment
location:
row: 112
column: 12
end_location:
row: 112
column: 13
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: i
outer_kind: For
inner_kind: For
location:
row: 118
column: 16
end_location:
row: 118
column: 17
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: "a[0]"
outer_kind: For
inner_kind: Assignment
location:
row: 133
column: 4
end_location:
row: 133
column: 8
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: "a['i']"
outer_kind: For
inner_kind: Assignment
location:
row: 138
column: 4
end_location:
row: 138
column: 10
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: a.i
outer_kind: For
inner_kind: Assignment
location:
row: 143
column: 4
end_location:
row: 143
column: 7
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: a.i.j
outer_kind: For
inner_kind: Assignment
location:
row: 148
column: 4
end_location:
row: 148
column: 9
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: a.i
outer_kind: For
inner_kind: Assignment
location:
row: 153
column: 4
end_location:
row: 153
column: 8
fix: ~
parent: ~
- kind:
RedefinedLoopName:
name: a.i
outer_kind: For
inner_kind: Assignment
location:
row: 155
column: 4
end_location:
row: 155
column: 7
fix: ~
parent: ~