[pylint] Implement empty-comment (PLR2044) (#9174)

## Summary

Part of #970.

This adds Pylint's [R0244
empty_comment](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/empty-comment.html)
lint as well as an always-safe fix.

## Test Plan

The included snapshot verifies the following:

- A line containing only a non-empty comment is not changed
- A line containing leading whitespace before a non-empty comment is not
changed
- A line containing only an empty comment has its content deleted
- A line containing only leading whitespace before an empty comment has
its content deleted
- A line containing only leading and trailing whitespace on an empty
comment has its content deleted
- A line containing trailing whitespace after a non-empty comment is not
changed
- A line containing only a single newline character (i.e. a blank line)
is not changed
- A line containing code followed by a non-empty comment is not changed
- A line containing code followed by an empty comment has its content
deleted after the last non-whitespace character
- Lines containing code and no comments are not changed
- Empty comment lines within block comments are ignored
- Empty comments within triple-quoted sections are ignored

## Comparison to Pylint

Running Ruff and Pylint 3.0.3 with Python 3.12.0 against the
`empty_comment.py` file added in this PR, we see the following:

* Identical behavior:
* empty_comment.py:3:0: R2044: Line with empty comment (empty-comment)
* empty_comment.py:4:0: R2044: Line with empty comment (empty-comment)
* empty_comment.py:5:0: R2044: Line with empty comment (empty-comment)
* empty_comment.py:18:0: R2044: Line with empty comment (empty-comment)

* Differing behavior:
* Pylint doesn't ignore empty comments in block comments commonly used
for visual spacing; I decided these were fine in this implementation
since many projects use these and likely do not want them removed.
* empty_comment.py:28:0: R2044: Line with empty comment (empty-comment)
* Pylint detects "empty comments" within the triple-quoted section at
the bottom of the file, which is arguably a bug in the Pylint
implementation since these are not truly comments. These are ignored by
this implementation.
* empty_comment.py:37:0: R2044: Line with empty comment (empty-comment)
* empty_comment.py:38:0: R2044: Line with empty comment (empty-comment)
* empty_comment.py:39:0: R2044: Line with empty comment (empty-comment)
This commit is contained in:
Mike Bernard 2023-12-28 21:53:56 -05:00 committed by GitHub
parent 57b6a8cedd
commit 375c175d53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 581 additions and 1 deletions

1
Cargo.lock generated
View file

@ -2454,6 +2454,7 @@ dependencies = [
"insta",
"itertools 0.11.0",
"ruff_python_ast",
"ruff_python_index",
"ruff_python_parser",
"ruff_source_file",
"ruff_text_size",

View file

@ -0,0 +1,53 @@
# this line has a non-empty comment and is OK
# this line is also OK, but the three following lines are not
#
#
#
# this non-empty comment has trailing whitespace and is OK
# Many codebases use multiple `#` characters on a single line to visually
# separate sections of code, so we don't consider these empty comments.
##########################################
# trailing `#` characters are not considered empty comments ###
def foo(): # this comment is OK, the one below is not
pass #
# the lines below have no comments and are OK
def bar():
pass
# "Empty comments" are common in block comments
# to add legibility. For example:
#
# The above line's "empty comment" is likely
# intentional and is considered OK.
# lines in multi-line strings/comments whose last non-whitespace character is a `#`
# do not count as empty comments
"""
The following lines are all fine:
#
#
#
"""
# These should be removed, despite being an empty "block comment".
#
#
# These should also be removed.
x = 1
#
##
#

View file

@ -1,4 +1,5 @@
//! Lint rules based on checking physical lines.
use ruff_diagnostics::Diagnostic;
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;

View file

@ -40,6 +40,10 @@ pub(crate) fn check_tokens(
pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, indexer, locator);
}
if settings.rules.enabled(Rule::EmptyComment) {
pylint::rules::empty_comments(&mut diagnostics, indexer, locator);
}
if settings.rules.any_enabled(&[
Rule::AmbiguousUnicodeCharacterString,
Rule::AmbiguousUnicodeCharacterDocstring,

View file

@ -265,6 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Pylint, "R1733") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryDictIndexLookup),
(Pylint, "R1736") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryListIndexLookup),
(Pylint, "R2004") => (RuleGroup::Stable, rules::pylint::rules::MagicValueComparison),
(Pylint, "R2044") => (RuleGroup::Preview, rules::pylint::rules::EmptyComment),
(Pylint, "R5501") => (RuleGroup::Stable, rules::pylint::rules::CollapsibleElseIf),
(Pylint, "R6201") => (RuleGroup::Preview, rules::pylint::rules::LiteralMembership),
#[allow(deprecated)]

View file

@ -265,6 +265,7 @@ impl Rule {
| Rule::BlanketNOQA
| Rule::BlanketTypeIgnore
| Rule::CommentedOutCode
| Rule::EmptyComment
| Rule::ExtraneousParentheses
| Rule::InvalidCharacterBackspace
| Rule::InvalidCharacterEsc

View file

@ -40,6 +40,7 @@ mod tests {
)]
#[test_case(Rule::ComparisonWithItself, Path::new("comparison_with_itself.py"))]
#[test_case(Rule::EqWithoutHash, Path::new("eq_without_hash.py"))]
#[test_case(Rule::EmptyComment, Path::new("empty_comment.py"))]
#[test_case(Rule::ManualFromImport, Path::new("import_aliasing.py"))]
#[test_case(Rule::SingleStringSlots, Path::new("single_string_slots.py"))]
#[test_case(Rule::SysExitAlias, Path::new("sys_exit_alias_0.py"))]

View file

@ -0,0 +1,106 @@
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_python_trivia::is_python_whitespace;
use ruff_source_file::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustc_hash::FxHashSet;
/// ## What it does
/// Checks for a # symbol appearing on a line not followed by an actual comment.
///
/// ## Why is this bad?
/// Empty comments don't provide any clarity to the code, and just add clutter.
/// Either add a comment or delete the empty comment.
///
/// ## Example
/// ```python
/// class Foo: #
/// pass
/// ```
///
/// Use instead:
/// ```python
/// class Foo:
/// pass
/// ```
///
/// ## References
/// - [Pylint documentation](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/empty-comment.html)
#[violation]
pub struct EmptyComment;
impl Violation for EmptyComment {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Always;
#[derive_message_formats]
fn message(&self) -> String {
format!("Line with empty comment")
}
fn fix_title(&self) -> Option<String> {
Some(format!("Delete the empty comment"))
}
}
/// PLR2044
pub(crate) fn empty_comments(
diagnostics: &mut Vec<Diagnostic>,
indexer: &Indexer,
locator: &Locator,
) {
let block_comments = FxHashSet::from_iter(indexer.comment_ranges().block_comments(locator));
for range in indexer.comment_ranges() {
// Ignore comments that are part of multi-line "comment blocks".
if block_comments.contains(&range.start()) {
continue;
}
// If the line contains an empty comment, add a diagnostic.
if let Some(diagnostic) = empty_comment(*range, locator) {
diagnostics.push(diagnostic);
}
}
}
/// Return a [`Diagnostic`] if the comment at the given [`TextRange`] is empty.
fn empty_comment(range: TextRange, locator: &Locator) -> Option<Diagnostic> {
// Check: is the comment empty?
if !locator
.slice(range)
.chars()
.skip(1)
.all(is_python_whitespace)
{
return None;
}
// Find the location of the `#`.
let first_hash_col = range.start();
// Find the start of the line.
let line = locator.line_range(first_hash_col);
// Find the last character in the line that precedes the comment, if any.
let deletion_start_col = locator
.slice(TextRange::new(line.start(), first_hash_col))
.char_indices()
.rev()
.find(|&(_, c)| !is_python_whitespace(c) && c != '#')
.map(|(last_non_whitespace_non_comment_col, _)| {
// SAFETY: (last_non_whitespace_non_comment_col + 1) <= first_hash_col
TextSize::try_from(last_non_whitespace_non_comment_col).unwrap() + TextSize::new(1)
});
Some(
Diagnostic::new(EmptyComment, TextRange::new(first_hash_col, line.end())).with_fix(
Fix::safe_edit(if let Some(deletion_start_col) = deletion_start_col {
Edit::deletion(line.start() + deletion_start_col, line.end())
} else {
Edit::range_deletion(locator.full_line_range(first_hash_col))
}),
),
)
}

View file

@ -14,6 +14,7 @@ pub(crate) use comparison_of_constant::*;
pub(crate) use comparison_with_itself::*;
pub(crate) use continue_in_finally::*;
pub(crate) use duplicate_bases::*;
pub(crate) use empty_comment::*;
pub(crate) use eq_without_hash::*;
pub(crate) use global_at_module_level::*;
pub(crate) use global_statement::*;
@ -92,6 +93,7 @@ mod comparison_of_constant;
mod comparison_with_itself;
mod continue_in_finally;
mod duplicate_bases;
mod empty_comment;
mod eq_without_hash;
mod global_at_module_level;
mod global_statement;

View file

@ -0,0 +1,118 @@
---
source: crates/ruff_linter/src/rules/pylint/mod.rs
---
empty_comment.py:3:1: PLR2044 [*] Line with empty comment
|
1 | # this line has a non-empty comment and is OK
2 | # this line is also OK, but the three following lines are not
3 | #
| ^ PLR2044
4 | #
5 | #
|
= help: Delete the empty comment
Safe fix
1 1 | # this line has a non-empty comment and is OK
2 2 | # this line is also OK, but the three following lines are not
3 |-#
4 3 | #
5 4 | #
6 5 |
empty_comment.py:4:5: PLR2044 [*] Line with empty comment
|
2 | # this line is also OK, but the three following lines are not
3 | #
4 | #
| ^ PLR2044
5 | #
|
= help: Delete the empty comment
Safe fix
1 1 | # this line has a non-empty comment and is OK
2 2 | # this line is also OK, but the three following lines are not
3 3 | #
4 |- #
5 4 | #
6 5 |
7 6 | # this non-empty comment has trailing whitespace and is OK
empty_comment.py:5:9: PLR2044 [*] Line with empty comment
|
3 | #
4 | #
5 | #
| ^ PLR2044
6 |
7 | # this non-empty comment has trailing whitespace and is OK
|
= help: Delete the empty comment
Safe fix
2 2 | # this line is also OK, but the three following lines are not
3 3 | #
4 4 | #
5 |- #
6 5 |
7 6 | # this non-empty comment has trailing whitespace and is OK
8 7 |
empty_comment.py:18:11: PLR2044 [*] Line with empty comment
|
17 | def foo(): # this comment is OK, the one below is not
18 | pass #
| ^ PLR2044
|
= help: Delete the empty comment
Safe fix
15 15 |
16 16 |
17 17 | def foo(): # this comment is OK, the one below is not
18 |- pass #
18 |+ pass
19 19 |
20 20 |
21 21 | # the lines below have no comments and are OK
empty_comment.py:44:1: PLR2044 [*] Line with empty comment
|
42 | # These should be removed, despite being an empty "block comment".
43 |
44 | #
| ^ PLR2044
45 | #
|
= help: Delete the empty comment
Safe fix
42 42 | # These should be removed, despite being an empty "block comment".
43 43 |
44 44 | #
45 |-#
46 45 |
47 46 | # These should also be removed.
48 47 |
empty_comment.py:45:1: PLR2044 [*] Line with empty comment
|
44 | #
45 | #
| ^ PLR2044
46 |
47 | # These should also be removed.
|
= help: Delete the empty comment
Safe fix
42 42 | # These should be removed, despite being an empty "block comment".
43 43 |
44 44 | #
45 |-#
46 45 |
47 46 | # These should also be removed.
48 47 |

View file

@ -23,6 +23,7 @@ unicode-ident = { workspace = true }
insta = { workspace = true }
ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_parser = { path = "../ruff_python_parser" }
ruff_python_index = { path = "../ruff_python_index" }
[lints]
workspace = true

View file

@ -2,8 +2,11 @@ use std::fmt::{Debug, Formatter};
use std::ops::Deref;
use itertools::Itertools;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::is_python_whitespace;
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
#[derive(Clone, Default)]
@ -45,6 +48,137 @@ impl CommentRanges {
None => &self.raw[start..],
}
}
/// Given a [`CommentRanges`], determine which comments are grouped together
/// in "comment blocks". A "comment block" is a sequence of consecutive
/// own-line comments in which the comment hash (`#`) appears in the same
/// column in each line, and at least one comment is non-empty.
///
/// Returns a vector containing the offset of the leading hash (`#`) for
/// each comment in any block comment.
///
/// ## Examples
/// ```python
/// # This is a block comment
/// # because it spans multiple lines
///
/// # This is also a block comment
/// # even though it is indented
///
/// # this is not a block comment
///
/// x = 1 # this is not a block comment because
/// y = 2 # the lines do not *only* contain comments
///
/// # This is not a block comment because
/// # not all consecutive lines have the
/// # first `#` character in the same column
///
/// """
/// # This is not a block comment because it is
/// # contained within a multi-line string/comment
/// """
/// ```
pub fn block_comments(&self, locator: &Locator) -> Vec<TextSize> {
let mut block_comments: Vec<TextSize> = Vec::new();
let mut current_block: Vec<TextSize> = Vec::new();
let mut current_block_column: Option<TextSize> = None;
let mut current_block_non_empty = false;
let mut prev_line_end = None;
for comment_range in &self.raw {
let offset = comment_range.start();
let line_start = locator.line_start(offset);
let line_end = locator.full_line_end(offset);
let column = offset - line_start;
// If this is an end-of-line comment, reset the current block.
if !Self::is_own_line(offset, locator) {
// Push the current block, and reset.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
current_block = vec![];
current_block_column = None;
current_block_non_empty = false;
prev_line_end = Some(line_end);
continue;
}
// If there's a blank line between this comment and the previous
// comment, reset the current block.
if prev_line_end.is_some_and(|prev_line_end| {
locator.contains_line_break(TextRange::new(prev_line_end, line_start))
}) {
// Push the current block.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
// Reset the block state.
current_block = vec![offset];
current_block_column = Some(column);
current_block_non_empty = !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
continue;
}
if let Some(current_column) = current_block_column {
if column == current_column {
// Add the comment to the current block.
current_block.push(offset);
current_block_non_empty |= !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
} else {
// Push the current block.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
// Reset the block state.
current_block = vec![offset];
current_block_column = Some(column);
current_block_non_empty = !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
}
} else {
// Push the current block.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
// Reset the block state.
current_block = vec![offset];
current_block_column = Some(column);
current_block_non_empty = !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
}
}
// Push any lingering blocks.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
block_comments
}
/// Returns `true` if the given range is an empty comment.
fn is_empty(range: TextRange, locator: &Locator) -> bool {
locator
.slice(range)
.chars()
.skip(1)
.all(is_python_whitespace)
}
/// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment).
fn is_own_line(offset: TextSize, locator: &Locator) -> bool {
let range = TextRange::new(locator.line_start(offset), offset);
locator.slice(range).chars().all(is_python_whitespace)
}
}
impl Deref for CommentRanges {
@ -69,3 +203,158 @@ impl<'a> IntoIterator for &'a CommentRanges {
self.raw.iter()
}
}
#[cfg(test)]
mod tests {
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{tokenize, Mode};
use ruff_source_file::Locator;
use ruff_text_size::TextSize;
#[test]
fn block_comments_two_line_block_at_start() {
// arrange
let source = "# line 1\n# line 2\n";
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]);
}
#[test]
fn block_comments_indented_block() {
// arrange
let source = " # line 1\n # line 2\n";
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]);
}
#[test]
fn block_comments_single_line_is_not_a_block() {
// arrange
let source = "\n";
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(block_comments, Vec::<TextSize>::new());
}
#[test]
fn block_comments_lines_with_code_not_a_block() {
// arrange
let source = "x = 1 # line 1\ny = 2 # line 2\n";
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(block_comments, Vec::<TextSize>::new());
}
#[test]
fn block_comments_sequential_lines_not_in_block() {
// arrange
let source = " # line 1\n # line 2\n";
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(block_comments, Vec::<TextSize>::new());
}
#[test]
fn block_comments_lines_in_triple_quotes_not_a_block() {
// arrange
let source = r#"
"""
# line 1
# line 2
"""
"#;
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(block_comments, Vec::<TextSize>::new());
}
#[test]
fn block_comments_stress_test() {
// arrange
let source = r#"
# block comment 1 line 1
# block comment 2 line 2
# these lines
# do not form
# a block comment
x = 1 # these lines also do not
y = 2 # do not form a block comment
# these lines do form a block comment
#
#
# and so do these
#
"""
# these lines are in triple quotes and
# therefore do not form a block comment
"""
"#;
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
// act
let block_comments = indexer.comment_ranges().block_comments(&locator);
// assert
assert_eq!(
block_comments,
vec![
// Block #1
TextSize::new(1),
TextSize::new(26),
// Block #2
TextSize::new(174),
TextSize::new(212),
// Block #3
TextSize::new(219),
TextSize::new(225),
TextSize::new(247)
]
);
}
}

2
ruff.schema.json generated
View file

@ -3201,6 +3201,8 @@
"PLR20",
"PLR200",
"PLR2004",
"PLR204",
"PLR2044",
"PLR5",
"PLR55",
"PLR550",