Avoid B015,B018 for last expression in a cell (#8815)

## Summary This PR updates `B015` and `B018` to ignore last top-level expressions in each cell of a Jupyter Notebook. Part of #8669 ## Test Plan Add test cases for both rules and update the snapshots.
2025-10-02 22:54:42 +00:00 · 2023-11-22 09:33:23 -06:00 · 2023-11-22 09:33:23 -06:00 · 5b726f70f4
commit 5b726f70f4
parent 727e389cac
11 changed files with 454 additions and 1 deletions
--- a/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B015.ipynb
+++ b/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B015.ipynb
@ -0,0 +1,149 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "481fb4bf-c1b9-47da-927f-3cfdfe4b49ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Simple case\n",
+    "x == 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2f0c65a5-0a0e-4080-afce-5a8ed0d706df",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Only skip the last expression\n",
+    "x == 1  # B018\n",
+    "x == 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5a3fd75d-26d9-44f7-b013-1684aabfd0ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Nested expressions isn't relevant\n",
+    "if True:\n",
+    "    x == 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e00e1afa-b76c-4774-be2a-7223641579e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Semicolons shouldn't affect the output\n",
+    "x == 1;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "05eab5b9-e2ba-4954-8ef3-b035a79573fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Semicolons with multiple expressions\n",
+    "x == 1; x == 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9cbbddc5-83fc-4fdb-81ab-53a3912ae898",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Comments, newlines and whitespace\n",
+    "x == 1 # comment\n",
+    "\n",
+    "# another comment"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff-playground)",
+   "language": "python",
+   "name": "ruff-playground"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B018.ipynb
+++ b/crates/ruff_linter/resources/test/fixtures/flake8_bugbear/B018.ipynb
@ -0,0 +1,149 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "33faf7ad-a3fd-4ac4-a0c3-52e507ed49df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "481fb4bf-c1b9-47da-927f-3cfdfe4b49ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Simple case\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2f0c65a5-0a0e-4080-afce-5a8ed0d706df",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Only skip the last expression\n",
+    "x  # B018\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5a3fd75d-26d9-44f7-b013-1684aabfd0ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Nested expressions isn't relevant\n",
+    "if True:\n",
+    "    x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e00e1afa-b76c-4774-be2a-7223641579e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Semicolons shouldn't affect the output\n",
+    "x;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "05eab5b9-e2ba-4954-8ef3-b035a79573fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Semicolons with multiple expressions\n",
+    "x; x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9cbbddc5-83fc-4fdb-81ab-53a3912ae898",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Comments, newlines and whitespace\n",
+    "x  # comment\n",
+    "\n",
+    "# another comment"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff-playground)",
+   "language": "python",
+   "name": "ruff-playground"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/crates/ruff_linter/src/checkers/ast/mod.rs
+++ b/crates/ruff_linter/src/checkers/ast/mod.rs
@ -37,6 +37,7 @@ use ruff_python_ast::{
 use ruff_text_size::{Ranged, TextRange, TextSize};

 use ruff_diagnostics::{Diagnostic, IsolationLevel};
+use ruff_notebook::CellOffsets;
 use ruff_python_ast::all::{extract_all_names, DunderAllFlags};
 use ruff_python_ast::helpers::{
    collect_import_from_member, extract_handled_exceptions, to_module_path,
@ -78,6 +79,8 @@ pub(crate) struct Checker<'a> {
    module_path: Option<&'a [String]>,
    /// The [`PySourceType`] of the current file.
    pub(crate) source_type: PySourceType,
+    /// The [`CellOffsets`] for the current file, if it's a Jupyter notebook.
+    cell_offsets: Option<&'a CellOffsets>,
    /// The [`flags::Noqa`] for the current analysis (i.e., whether to respect suppression
    /// comments).
    noqa: flags::Noqa,
@ -120,6 +123,7 @@ impl<'a> Checker<'a> {
        indexer: &'a Indexer,
        importer: Importer<'a>,
        source_type: PySourceType,
+        cell_offsets: Option<&'a CellOffsets>,
    ) -> Checker<'a> {
        Checker {
            settings,
@ -137,6 +141,7 @@ impl<'a> Checker<'a> {
            deferred: Deferred::default(),
            diagnostics: Vec::default(),
            flake8_bugbear_seen: Vec::default(),
+            cell_offsets,
        }
    }
 }
@ -225,6 +230,11 @@ impl<'a> Checker<'a> {
        self.package
    }

+    /// The [`CellOffsets`] for the current file, if it's a Jupyter notebook.
+    pub(crate) const fn cell_offsets(&self) -> Option<&'a CellOffsets> {
+        self.cell_offsets
+    }
+
    /// Returns whether the given rule should be checked.
    #[inline]
    pub(crate) const fn enabled(&self, rule: Rule) -> bool {
@ -1942,6 +1952,7 @@ pub(crate) fn check_ast(
    path: &Path,
    package: Option<&Path>,
    source_type: PySourceType,
+    cell_offsets: Option<&CellOffsets>,
 ) -> Vec<Diagnostic> {
    let module_path = package.and_then(|package| to_module_path(package, path));
    let module = Module {
@ -1970,6 +1981,7 @@ pub(crate) fn check_ast(
        indexer,
        Importer::new(python_ast, locator, stylist),
        source_type,
+        cell_offsets,
    );
    checker.bind_builtins();

--- a/crates/ruff_linter/src/linter.rs
+++ b/crates/ruff_linter/src/linter.rs
@ -163,6 +163,7 @@ pub fn check_path(
                        path,
                        package,
                        source_type,
+                        cell_offsets,
                    ));
                }
                if use_imports {
--- a/crates/ruff_linter/src/rules/flake8_bugbear/helpers.rs
+++ b/crates/ruff_linter/src/rules/flake8_bugbear/helpers.rs
@ -0,0 +1,28 @@
+use ruff_notebook::CellOffsets;
+use ruff_python_semantic::SemanticModel;
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
+use ruff_source_file::Locator;
+use ruff_text_size::{Ranged, TextRange};
+
+/// Return `true` if the statement containing the current expression is the last
+/// top-level expression in the cell. This assumes that the source is a Jupyter
+/// Notebook.
+pub(super) fn at_last_top_level_expression_in_cell(
+    semantic: &SemanticModel,
+    locator: &Locator,
+    cell_offsets: Option<&CellOffsets>,
+) -> bool {
+    if !semantic.at_top_level() {
+        return false;
+    }
+    let current_statement_end = semantic.current_statement().end();
+    cell_offsets
+        .and_then(|cell_offsets| cell_offsets.containing_range(current_statement_end))
+        .is_some_and(|cell_range| {
+            SimpleTokenizer::new(
+                locator.contents(),
+                TextRange::new(current_statement_end, cell_range.end()),
+            )
+            .all(|token| token.kind() == SimpleTokenKind::Semi || token.kind().is_trivia())
+        })
+}
--- a/crates/ruff_linter/src/rules/flake8_bugbear/mod.rs
+++ b/crates/ruff_linter/src/rules/flake8_bugbear/mod.rs
@ -1,4 +1,5 @@
 //! Rules from [flake8-bugbear](https://pypi.org/project/flake8-bugbear/).
+pub(crate) mod helpers;
 pub(crate) mod rules;
 pub mod settings;

@ -54,8 +55,10 @@ mod tests {
    #[test_case(Rule::UnreliableCallableCheck, Path::new("B004.py"))]
    #[test_case(Rule::UnusedLoopControlVariable, Path::new("B007.py"))]
    #[test_case(Rule::UselessComparison, Path::new("B015.py"))]
+    #[test_case(Rule::UselessComparison, Path::new("B015.ipynb"))]
    #[test_case(Rule::UselessContextlibSuppress, Path::new("B022.py"))]
    #[test_case(Rule::UselessExpression, Path::new("B018.py"))]
+    #[test_case(Rule::UselessExpression, Path::new("B018.ipynb"))]
    fn rules(rule_code: Rule, path: &Path) -> Result<()> {
        let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
        let diagnostics = test_path(
--- a/crates/ruff_linter/src/rules/flake8_bugbear/rules/useless_comparison.rs
+++ b/crates/ruff_linter/src/rules/flake8_bugbear/rules/useless_comparison.rs
@ -5,6 +5,8 @@ use ruff_text_size::Ranged;

 use crate::checkers::ast::Checker;

+use super::super::helpers::at_last_top_level_expression_in_cell;
+
 /// ## What it does
 /// Checks for useless comparisons.
 ///
@ -41,6 +43,19 @@ impl Violation for UselessComparison {
 /// B015
 pub(crate) fn useless_comparison(checker: &mut Checker, expr: &Expr) {
    if expr.is_compare_expr() {
+        // For Jupyter Notebooks, ignore the last top-level expression for each cell.
+        // This is because it's common to have a cell that ends with an expression
+        // to display it's value.
+        if checker.source_type.is_ipynb()
+            && at_last_top_level_expression_in_cell(
+                checker.semantic(),
+                checker.locator(),
+                checker.cell_offsets(),
+            )
+        {
+            return;
+        }
+
        checker
            .diagnostics
            .push(Diagnostic::new(UselessComparison, expr.range()));
--- a/crates/ruff_linter/src/rules/flake8_bugbear/rules/useless_expression.rs
+++ b/crates/ruff_linter/src/rules/flake8_bugbear/rules/useless_expression.rs
@ -6,6 +6,8 @@ use ruff_text_size::Ranged;

 use crate::checkers::ast::Checker;

+use super::super::helpers::at_last_top_level_expression_in_cell;
+
 /// ## What it does
 /// Checks for useless expressions.
 ///
@ -59,6 +61,19 @@ pub(crate) fn useless_expression(checker: &mut Checker, value: &Expr) {
        return;
    }

+    // For Jupyter Notebooks, ignore the last top-level expression for each cell.
+    // This is because it's common to have a cell that ends with an expression
+    // to display it's value.
+    if checker.source_type.is_ipynb()
+        && at_last_top_level_expression_in_cell(
+            checker.semantic(),
+            checker.locator(),
+            checker.cell_offsets(),
+        )
+    {
+        return;
+    }
+
    // Ignore statements that have side effects.
    if contains_effect(value, |id| checker.semantic().is_builtin(id)) {
        // Flag attributes as useless expressions, even if they're attached to calls or other
--- a/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linterrulesflake8_bugbeartestsB015_B015.ipynb.snap
+++ b/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linterrulesflake8_bugbeartestsB015_B015.ipynb.snap
@ -0,0 +1,34 @@
+---
+source: crates/ruff_linter/src/rules/flake8_bugbear/mod.rs
+---
+B015.ipynb:5:1: B015 Pointless comparison. Did you mean to assign a value? Otherwise, prepend `assert` or remove it.
+  |
+3 | x == 1
+4 | # Only skip the last expression
+5 | x == 1  # B018
+  | ^^^^^^ B015
+6 | x == 1
+7 | # Nested expressions isn't relevant
+  |
+
+B015.ipynb:9:5: B015 Pointless comparison. Did you mean to assign a value? Otherwise, prepend `assert` or remove it.
+   |
+ 7 | # Nested expressions isn't relevant
+ 8 | if True:
+ 9 |     x == 1
+   |     ^^^^^^ B015
+10 | # Semicolons shouldn't affect the output
+11 | x == 1;
+   |
+
+B015.ipynb:13:1: B015 Pointless comparison. Did you mean to assign a value? Otherwise, prepend `assert` or remove it.
+   |
+11 | x == 1;
+12 | # Semicolons with multiple expressions
+13 | x == 1; x == 1
+   | ^^^^^^ B015
+14 | # Comments, newlines and whitespace
+15 | x == 1 # comment
+   |
+
+
--- a/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linterrulesflake8_bugbeartestsB018_B018.ipynb.snap
+++ b/crates/ruff_linter/src/rules/flake8_bugbear/snapshots/ruff_linterrulesflake8_bugbeartestsB018_B018.ipynb.snap
@ -0,0 +1,34 @@
+---
+source: crates/ruff_linter/src/rules/flake8_bugbear/mod.rs
+---
+B018.ipynb:5:1: B018 Found useless expression. Either assign it to a variable or remove it.
+  |
+3 | x
+4 | # Only skip the last expression
+5 | x  # B018
+  | ^ B018
+6 | x
+7 | # Nested expressions isn't relevant
+  |
+
+B018.ipynb:9:5: B018 Found useless expression. Either assign it to a variable or remove it.
+   |
+ 7 | # Nested expressions isn't relevant
+ 8 | if True:
+ 9 |     x
+   |     ^ B018
+10 | # Semicolons shouldn't affect the output
+11 | x;
+   |
+
+B018.ipynb:13:1: B018 Found useless expression. Either assign it to a variable or remove it.
+   |
+11 | x;
+12 | # Semicolons with multiple expressions
+13 | x; x
+   | ^ B018
+14 | # Comments, newlines and whitespace
+15 | x  # comment
+   |
+
+
--- a/crates/ruff_notebook/src/cell.rs
+++ b/crates/ruff_notebook/src/cell.rs
@ -1,7 +1,9 @@
 use std::fmt;
 use std::ops::{Deref, DerefMut};

-use ruff_text_size::TextSize;
+use itertools::Itertools;
+
+use ruff_text_size::{TextRange, TextSize};

 use crate::schema::{Cell, SourceValue};

@ -187,6 +189,17 @@ impl CellOffsets {
    pub(crate) fn push(&mut self, offset: TextSize) {
        self.0.push(offset);
    }
+
+    /// Returns the range of the cell containing the given offset, if any.
+    pub fn containing_range(&self, offset: TextSize) -> Option<TextRange> {
+        self.iter().tuple_windows().find_map(|(start, end)| {
+            if *start <= offset && offset < *end {
+                Some(TextRange::new(*start, *end))
+            } else {
+                None
+            }
+        })
+    }
 }

 impl Deref for CellOffsets {