[pylint] Implement empty-comment (PLR2044) (#9174)

## Summary Part of #970. This adds Pylint's [R0244 empty_comment](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/empty-comment.html) lint as well as an always-safe fix. ## Test Plan The included snapshot verifies the following: - A line containing only a non-empty comment is not changed - A line containing leading whitespace before a non-empty comment is not changed - A line containing only an empty comment has its content deleted - A line containing only leading whitespace before an empty comment has its content deleted - A line containing only leading and trailing whitespace on an empty comment has its content deleted - A line containing trailing whitespace after a non-empty comment is not changed - A line containing only a single newline character (i.e. a blank line) is not changed - A line containing code followed by a non-empty comment is not changed - A line containing code followed by an empty comment has its content deleted after the last non-whitespace character - Lines containing code and no comments are not changed - Empty comment lines within block comments are ignored - Empty comments within triple-quoted sections are ignored ## Comparison to Pylint Running Ruff and Pylint 3.0.3 with Python 3.12.0 against the `empty_comment.py` file added in this PR, we see the following: * Identical behavior: * empty_comment.py:3:0: R2044: Line with empty comment (empty-comment) * empty_comment.py:4:0: R2044: Line with empty comment (empty-comment) * empty_comment.py:5:0: R2044: Line with empty comment (empty-comment) * empty_comment.py:18:0: R2044: Line with empty comment (empty-comment) * Differing behavior: * Pylint doesn't ignore empty comments in block comments commonly used for visual spacing; I decided these were fine in this implementation since many projects use these and likely do not want them removed. * empty_comment.py:28:0: R2044: Line with empty comment (empty-comment) * Pylint detects "empty comments" within the triple-quoted section at the bottom of the file, which is arguably a bug in the Pylint implementation since these are not truly comments. These are ignored by this implementation. * empty_comment.py:37:0: R2044: Line with empty comment (empty-comment) * empty_comment.py:38:0: R2044: Line with empty comment (empty-comment) * empty_comment.py:39:0: R2044: Line with empty comment (empty-comment)
2025-08-03 10:23:11 +00:00 · 2023-12-28 21:53:56 -05:00 · 2023-12-28 21:53:56 -05:00 · 375c175d53
commit 375c175d53
parent 57b6a8cedd
13 changed files with 581 additions and 1 deletions
--- a/crates/ruff_python_trivia/Cargo.toml
+++ b/crates/ruff_python_trivia/Cargo.toml
@ -23,6 +23,7 @@ unicode-ident = { workspace = true }
 insta = { workspace = true }
 ruff_python_ast = { path = "../ruff_python_ast" }
 ruff_python_parser = { path = "../ruff_python_parser" }
+ruff_python_index = { path = "../ruff_python_index" }

 [lints]
 workspace = true
--- a/crates/ruff_python_trivia/src/comment_ranges.rs
+++ b/crates/ruff_python_trivia/src/comment_ranges.rs
@ -2,8 +2,11 @@ use std::fmt::{Debug, Formatter};
 use std::ops::Deref;

 use itertools::Itertools;
+use ruff_source_file::Locator;

-use ruff_text_size::{Ranged, TextRange};
+use ruff_text_size::{Ranged, TextRange, TextSize};
+
+use crate::is_python_whitespace;

 /// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
 #[derive(Clone, Default)]
@ -45,6 +48,137 @@ impl CommentRanges {
            None => &self.raw[start..],
        }
    }
+
+    /// Given a [`CommentRanges`], determine which comments are grouped together
+    /// in "comment blocks". A "comment block" is a sequence of consecutive
+    /// own-line comments in which the comment hash (`#`) appears in the same
+    /// column in each line, and at least one comment is non-empty.
+    ///
+    /// Returns a vector containing the offset of the leading hash (`#`) for
+    /// each comment in any block comment.
+    ///
+    /// ## Examples
+    /// ```python
+    /// # This is a block comment
+    /// # because it spans multiple lines
+    ///
+    ///     # This is also a block comment
+    ///     # even though it is indented
+    ///
+    /// # this is not a block comment
+    ///
+    /// x = 1  # this is not a block comment because
+    /// y = 2  # the lines do not *only* contain comments
+    ///
+    /// # This is not a block comment because
+    ///     # not all consecutive lines have the
+    /// # first `#` character in the same column
+    ///
+    /// """
+    /// # This is not a block comment because it is
+    /// # contained within a multi-line string/comment
+    /// """
+    /// ```
+    pub fn block_comments(&self, locator: &Locator) -> Vec<TextSize> {
+        let mut block_comments: Vec<TextSize> = Vec::new();
+
+        let mut current_block: Vec<TextSize> = Vec::new();
+        let mut current_block_column: Option<TextSize> = None;
+        let mut current_block_non_empty = false;
+
+        let mut prev_line_end = None;
+
+        for comment_range in &self.raw {
+            let offset = comment_range.start();
+            let line_start = locator.line_start(offset);
+            let line_end = locator.full_line_end(offset);
+            let column = offset - line_start;
+
+            // If this is an end-of-line comment, reset the current block.
+            if !Self::is_own_line(offset, locator) {
+                // Push the current block, and reset.
+                if current_block.len() > 1 && current_block_non_empty {
+                    block_comments.extend(current_block);
+                }
+                current_block = vec![];
+                current_block_column = None;
+                current_block_non_empty = false;
+                prev_line_end = Some(line_end);
+                continue;
+            }
+
+            // If there's a blank line between this comment and the previous
+            // comment, reset the current block.
+            if prev_line_end.is_some_and(|prev_line_end| {
+                locator.contains_line_break(TextRange::new(prev_line_end, line_start))
+            }) {
+                // Push the current block.
+                if current_block.len() > 1 && current_block_non_empty {
+                    block_comments.extend(current_block);
+                }
+
+                // Reset the block state.
+                current_block = vec![offset];
+                current_block_column = Some(column);
+                current_block_non_empty = !Self::is_empty(*comment_range, locator);
+                prev_line_end = Some(line_end);
+                continue;
+            }
+
+            if let Some(current_column) = current_block_column {
+                if column == current_column {
+                    // Add the comment to the current block.
+                    current_block.push(offset);
+                    current_block_non_empty |= !Self::is_empty(*comment_range, locator);
+                    prev_line_end = Some(line_end);
+                } else {
+                    // Push the current block.
+                    if current_block.len() > 1 && current_block_non_empty {
+                        block_comments.extend(current_block);
+                    }
+
+                    // Reset the block state.
+                    current_block = vec![offset];
+                    current_block_column = Some(column);
+                    current_block_non_empty = !Self::is_empty(*comment_range, locator);
+                    prev_line_end = Some(line_end);
+                }
+            } else {
+                // Push the current block.
+                if current_block.len() > 1 && current_block_non_empty {
+                    block_comments.extend(current_block);
+                }
+
+                // Reset the block state.
+                current_block = vec![offset];
+                current_block_column = Some(column);
+                current_block_non_empty = !Self::is_empty(*comment_range, locator);
+                prev_line_end = Some(line_end);
+            }
+        }
+
+        // Push any lingering blocks.
+        if current_block.len() > 1 && current_block_non_empty {
+            block_comments.extend(current_block);
+        }
+
+        block_comments
+    }
+
+    /// Returns `true` if the given range is an empty comment.
+    fn is_empty(range: TextRange, locator: &Locator) -> bool {
+        locator
+            .slice(range)
+            .chars()
+            .skip(1)
+            .all(is_python_whitespace)
+    }
+
+    /// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment).
+    fn is_own_line(offset: TextSize, locator: &Locator) -> bool {
+        let range = TextRange::new(locator.line_start(offset), offset);
+        locator.slice(range).chars().all(is_python_whitespace)
+    }
 }

 impl Deref for CommentRanges {
@ -69,3 +203,158 @@ impl<'a> IntoIterator for &'a CommentRanges {
        self.raw.iter()
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use ruff_python_index::Indexer;
+    use ruff_python_parser::lexer::LexResult;
+    use ruff_python_parser::{tokenize, Mode};
+    use ruff_source_file::Locator;
+    use ruff_text_size::TextSize;
+
+    #[test]
+    fn block_comments_two_line_block_at_start() {
+        // arrange
+        let source = "# line 1\n# line 2\n";
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]);
+    }
+
+    #[test]
+    fn block_comments_indented_block() {
+        // arrange
+        let source = "    # line 1\n    # line 2\n";
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]);
+    }
+
+    #[test]
+    fn block_comments_single_line_is_not_a_block() {
+        // arrange
+        let source = "\n";
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(block_comments, Vec::<TextSize>::new());
+    }
+
+    #[test]
+    fn block_comments_lines_with_code_not_a_block() {
+        // arrange
+        let source = "x = 1  # line 1\ny = 2  # line 2\n";
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(block_comments, Vec::<TextSize>::new());
+    }
+
+    #[test]
+    fn block_comments_sequential_lines_not_in_block() {
+        // arrange
+        let source = "    # line 1\n        # line 2\n";
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(block_comments, Vec::<TextSize>::new());
+    }
+
+    #[test]
+    fn block_comments_lines_in_triple_quotes_not_a_block() {
+        // arrange
+        let source = r#"
+        """
+        # line 1
+        # line 2
+        """
+        "#;
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(block_comments, Vec::<TextSize>::new());
+    }
+
+    #[test]
+    fn block_comments_stress_test() {
+        // arrange
+        let source = r#"
+# block comment 1 line 1
+# block comment 2 line 2
+
+# these lines
+    # do not form
+# a block comment
+
+x = 1  # these lines also do not
+y = 2  # do not form a block comment
+
+# these lines do form a block comment
+#
+
+    #
+    # and so do these
+    #
+
+"""
+# these lines are in triple quotes and
+# therefore do not form a block comment
+"""
+        "#;
+        let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
+        let locator = Locator::new(source);
+        let indexer = Indexer::from_tokens(&tokens, &locator);
+
+        // act
+        let block_comments = indexer.comment_ranges().block_comments(&locator);
+
+        // assert
+        assert_eq!(
+            block_comments,
+            vec![
+                // Block #1
+                TextSize::new(1),
+                TextSize::new(26),
+                // Block #2
+                TextSize::new(174),
+                TextSize::new(212),
+                // Block #3
+                TextSize::new(219),
+                TextSize::new(225),
+                TextSize::new(247)
+            ]
+        );
+    }
+}