[flake8-bugbear] Implement return-in-generator (B901) (#11644)

## Summary

This PR implements the rule B901, which is part of the opinionated rules
of `flake8-bugbear`.

This rule seems to be desired in `ruff` as per
https://github.com/astral-sh/ruff/issues/3758 and
https://github.com/astral-sh/ruff/issues/2954#issuecomment-1441162976.

## Test Plan

As this PR was made closely following the
[CONTRIBUTING.md](8a25531a71/CONTRIBUTING.md),
it tests using the snapshot approach, that is described there.

## Sources

The implementation is inspired by [the original implementation in the
`flake8-bugbear`
repository](d1aec4cbef/bugbear.py (L1092)).
The error message and [test
file](d1aec4cbef/tests/b901.py)
where also copied from there.

The documentation I came up with on my own and needs improvement. Maybe
the example given in
https://github.com/astral-sh/ruff/issues/2954#issuecomment-1441162976
could be used, but maybe they are too complex, I'm not sure.

## Open Questions

- [ ] Documentation. (See above.)

- [x] Can I access the parent in a visitor?

The [original
implementation](d1aec4cbef/bugbear.py (L1100))
references the `yield` statement's parent to check if it is an
expression statement. I didn't find a way to do this in `ruff` and used
the `is_expresssion_statement` field on the visitor instead. What are
your thoughts on this? Is it possible and / or desired to access the
parent node here?

- [x] Is `Option::is_some(...)` -> `...unwrap()` the right thing to do?

Referring to [this piece of
code](9d5a280f71/crates/ruff_linter/src/rules/flake8_bugbear/rules/return_x_in_generator.rs (L91-L96)).
From my understanding, the `.unwrap()` is safe, because it is checked
that `return_` is not `None`. However, I feel like I missed a more
elegant solution that does both in one.

## Other

I don't know a lot about this rule, I just implemented it because I
found it in a
https://github.com/astral-sh/ruff/labels/good%20first%20issue.

I'm new to Rust, so any constructive critisism is appreciated.

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Tobias Fischer 2024-05-31 23:48:36 +02:00 committed by GitHub
parent 91a5fdee7a
commit 312f6640b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 244 additions and 0 deletions

View file

@ -0,0 +1,78 @@
"""
Should emit:
B901 - on lines 9, 36
"""
def broken():
if True:
return [1, 2, 3]
yield 3
yield 2
yield 1
def not_broken():
if True:
return
yield 3
yield 2
yield 1
def not_broken2():
return not_broken()
def not_broken3():
return
yield from not_broken()
def broken2():
return [3, 2, 1]
yield from not_broken()
async def not_broken4():
import asyncio
await asyncio.sleep(1)
return 1
def not_broken5():
def inner():
return 2
yield inner()
def not_broken6():
return (yield from [])
def not_broken7():
x = yield from []
return x
def not_broken8():
x = None
def inner(ex):
nonlocal x
x = ex
inner((yield from []))
return x
class NotBroken9(object):
def __await__(self):
yield from function()
return 42

View file

@ -207,6 +207,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
if checker.enabled(Rule::MutableArgumentDefault) { if checker.enabled(Rule::MutableArgumentDefault) {
flake8_bugbear::rules::mutable_argument_default(checker, function_def); flake8_bugbear::rules::mutable_argument_default(checker, function_def);
} }
if checker.enabled(Rule::ReturnInGenerator) {
flake8_bugbear::rules::return_in_generator(checker, function_def);
}
if checker.any_enabled(&[ if checker.any_enabled(&[
Rule::UnnecessaryReturnNone, Rule::UnnecessaryReturnNone,
Rule::ImplicitReturnValue, Rule::ImplicitReturnValue,

View file

@ -383,6 +383,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Bugbear, "033") => (RuleGroup::Stable, rules::flake8_bugbear::rules::DuplicateValue), (Flake8Bugbear, "033") => (RuleGroup::Stable, rules::flake8_bugbear::rules::DuplicateValue),
(Flake8Bugbear, "034") => (RuleGroup::Stable, rules::flake8_bugbear::rules::ReSubPositionalArgs), (Flake8Bugbear, "034") => (RuleGroup::Stable, rules::flake8_bugbear::rules::ReSubPositionalArgs),
(Flake8Bugbear, "035") => (RuleGroup::Stable, rules::flake8_bugbear::rules::StaticKeyDictComprehension), (Flake8Bugbear, "035") => (RuleGroup::Stable, rules::flake8_bugbear::rules::StaticKeyDictComprehension),
(Flake8Bugbear, "901") => (RuleGroup::Preview, rules::flake8_bugbear::rules::ReturnInGenerator),
(Flake8Bugbear, "904") => (RuleGroup::Stable, rules::flake8_bugbear::rules::RaiseWithoutFromInsideExcept), (Flake8Bugbear, "904") => (RuleGroup::Stable, rules::flake8_bugbear::rules::RaiseWithoutFromInsideExcept),
(Flake8Bugbear, "905") => (RuleGroup::Stable, rules::flake8_bugbear::rules::ZipWithoutExplicitStrict), (Flake8Bugbear, "905") => (RuleGroup::Stable, rules::flake8_bugbear::rules::ZipWithoutExplicitStrict),
(Flake8Bugbear, "909") => (RuleGroup::Preview, rules::flake8_bugbear::rules::LoopIteratorMutation), (Flake8Bugbear, "909") => (RuleGroup::Preview, rules::flake8_bugbear::rules::LoopIteratorMutation),

View file

@ -62,6 +62,7 @@ mod tests {
#[test_case(Rule::UselessContextlibSuppress, Path::new("B022.py"))] #[test_case(Rule::UselessContextlibSuppress, Path::new("B022.py"))]
#[test_case(Rule::UselessExpression, Path::new("B018.ipynb"))] #[test_case(Rule::UselessExpression, Path::new("B018.ipynb"))]
#[test_case(Rule::UselessExpression, Path::new("B018.py"))] #[test_case(Rule::UselessExpression, Path::new("B018.py"))]
#[test_case(Rule::ReturnInGenerator, Path::new("B901.py"))]
#[test_case(Rule::LoopIteratorMutation, Path::new("B909.py"))] #[test_case(Rule::LoopIteratorMutation, Path::new("B909.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> { fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());

View file

@ -20,6 +20,7 @@ pub(crate) use raise_literal::*;
pub(crate) use raise_without_from_inside_except::*; pub(crate) use raise_without_from_inside_except::*;
pub(crate) use re_sub_positional_args::*; pub(crate) use re_sub_positional_args::*;
pub(crate) use redundant_tuple_in_exception_handler::*; pub(crate) use redundant_tuple_in_exception_handler::*;
pub(crate) use return_in_generator::*;
pub(crate) use reuse_of_groupby_generator::*; pub(crate) use reuse_of_groupby_generator::*;
pub(crate) use setattr_with_constant::*; pub(crate) use setattr_with_constant::*;
pub(crate) use star_arg_unpacking_after_keyword_arg::*; pub(crate) use star_arg_unpacking_after_keyword_arg::*;
@ -56,6 +57,7 @@ mod raise_literal;
mod raise_without_from_inside_except; mod raise_without_from_inside_except;
mod re_sub_positional_args; mod re_sub_positional_args;
mod redundant_tuple_in_exception_handler; mod redundant_tuple_in_exception_handler;
mod return_in_generator;
mod reuse_of_groupby_generator; mod reuse_of_groupby_generator;
mod setattr_with_constant; mod setattr_with_constant;
mod star_arg_unpacking_after_keyword_arg; mod star_arg_unpacking_after_keyword_arg;

View file

@ -0,0 +1,137 @@
use ruff_diagnostics::Diagnostic;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::statement_visitor;
use ruff_python_ast::statement_visitor::StatementVisitor;
use ruff_python_ast::{self as ast, Expr, Stmt, StmtFunctionDef};
use ruff_text_size::TextRange;
use crate::checkers::ast::Checker;
/// ## What it does
/// Checks for `return {value}` statements in functions that also contain `yield`
/// or `yield from` statements.
///
/// ## Why is this bad?
/// Using `return {value}` in a generator function was syntactically invalid in
/// Python 2. In Python 3 `return {value}` _can_ be used in a generator; however,
/// the combination of `yield` and `return` can lead to confusing behavior, as
/// the `return` statement will cause the generator to raise `StopIteration`
/// with the value provided, rather than returning the value to the caller.
///
/// For example, given:
/// ```python
/// from collections.abc import Iterable
/// from pathlib import Path
///
///
/// def get_file_paths(file_types: Iterable[str] | None = None) -> Iterable[Path]:
/// dir_path = Path(".")
/// if file_types is None:
/// return dir_path.glob("*")
///
/// for file_type in file_types:
/// yield from dir_path.glob(f"*.{file_type}")
/// ```
///
/// Readers might assume that `get_file_paths()` would return an iterable of
/// `Path` objects in the directory; in reality, though, `list(get_file_paths())`
/// evaluates to `[]`, since the `return` statement causes the generator to raise
/// `StopIteration` with the value `dir_path.glob("*")`:
///
/// ```shell
/// >>> list(get_file_paths(file_types=["cfg", "toml"]))
/// [PosixPath('setup.cfg'), PosixPath('pyproject.toml')]
/// >>> list(get_file_paths())
/// []
/// ```
///
/// For intentional uses of `return` in a generator, consider suppressing this
/// diagnostic.
///
/// ## Example
/// ```python
/// from collections.abc import Iterable
/// from pathlib import Path
///
///
/// def get_file_paths(file_types: Iterable[str] | None = None) -> Iterable[Path]:
/// dir_path = Path(".")
/// if file_types is None:
/// return dir_path.glob("*")
///
/// for file_type in file_types:
/// yield from dir_path.glob(f"*.{file_type}")
/// ```
///
/// Use instead:
///
/// ```python
/// from collections.abc import Iterable
/// from pathlib import Path
///
///
/// def get_file_paths(file_types: Iterable[str] | None = None) -> Iterable[Path]:
/// dir_path = Path(".")
/// if file_types is None:
/// yield from dir_path.glob("*")
/// else:
/// for file_type in file_types:
/// yield from dir_path.glob(f"*.{file_type}")
/// ```
#[violation]
pub struct ReturnInGenerator;
impl Violation for ReturnInGenerator {
#[derive_message_formats]
fn message(&self) -> String {
format!("Using `yield` and `return {{value}}` in a generator function can lead to confusing behavior")
}
}
/// B901
pub(crate) fn return_in_generator(checker: &mut Checker, function_def: &StmtFunctionDef) {
if function_def.name.id == "__await__" {
return;
}
let mut visitor = ReturnInGeneratorVisitor::default();
visitor.visit_body(&function_def.body);
if visitor.has_yield {
if let Some(return_) = visitor.return_ {
checker
.diagnostics
.push(Diagnostic::new(ReturnInGenerator, return_));
}
}
}
#[derive(Default)]
struct ReturnInGeneratorVisitor {
return_: Option<TextRange>,
has_yield: bool,
}
impl StatementVisitor<'_> for ReturnInGeneratorVisitor {
fn visit_stmt(&mut self, stmt: &Stmt) {
match stmt {
Stmt::Expr(ast::StmtExpr { value, .. }) => match **value {
Expr::Yield(_) | Expr::YieldFrom(_) => {
self.has_yield = true;
}
_ => {}
},
Stmt::FunctionDef(_) => {
// Do not recurse into nested functions; they're evaluated separately.
}
Stmt::Return(ast::StmtReturn {
value: Some(_),
range,
}) => {
self.return_ = Some(*range);
}
_ => statement_visitor::walk_stmt(self, stmt),
}
}
}

View file

@ -0,0 +1,21 @@
---
source: crates/ruff_linter/src/rules/flake8_bugbear/mod.rs
---
B901.py:9:9: B901 Using `yield` and `return {value}` in a generator function can lead to confusing behavior
|
7 | def broken():
8 | if True:
9 | return [1, 2, 3]
| ^^^^^^^^^^^^^^^^ B901
10 |
11 | yield 3
|
B901.py:36:5: B901 Using `yield` and `return {value}` in a generator function can lead to confusing behavior
|
35 | def broken2():
36 | return [3, 2, 1]
| ^^^^^^^^^^^^^^^^ B901
37 |
38 | yield from not_broken()
|

1
ruff.schema.json generated
View file

@ -2729,6 +2729,7 @@
"B035", "B035",
"B9", "B9",
"B90", "B90",
"B901",
"B904", "B904",
"B905", "B905",
"B909", "B909",