mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 13:33:50 +00:00
Remove lexer-based comment range detection (#5785)
## Summary I'm doing some unrelated profiling, and I noticed that this method is actually measurable on the CPython benchmark -- it's > 1% of execution time. We don't need to lex here, we already know the ranges of all comments, so we can just do a simple binary search for overlap, which brings the method down to 0%. ## Test Plan `cargo test`
This commit is contained in:
parent
f2e995f78d
commit
4782675bf9
10 changed files with 44 additions and 44 deletions
|
@ -11,7 +11,7 @@ use rustpython_parser::ast::{self, BoolOp, ExceptHandler, Expr, Keyword, Ranged,
|
|||
|
||||
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::helpers::{has_comments_in, Truthiness};
|
||||
use ruff_python_ast::helpers::Truthiness;
|
||||
use ruff_python_ast::source_code::{Locator, Stylist};
|
||||
use ruff_python_ast::visitor::Visitor;
|
||||
use ruff_python_ast::{visitor, whitespace};
|
||||
|
@ -197,7 +197,7 @@ pub(crate) fn unittest_assertion(
|
|||
if checker.semantic().stmt().is_expr_stmt()
|
||||
&& checker.semantic().expr_parent().is_none()
|
||||
&& !checker.semantic().scope().kind.is_lambda()
|
||||
&& !has_comments_in(expr.range(), checker.locator)
|
||||
&& !checker.indexer.comment_ranges().intersects(expr.range())
|
||||
{
|
||||
if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) {
|
||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||
|
@ -483,7 +483,7 @@ pub(crate) fn composite_condition(
|
|||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if matches!(composite, CompositionKind::Simple)
|
||||
&& msg.is_none()
|
||||
&& !has_comments_in(stmt.range(), checker.locator)
|
||||
&& !checker.indexer.comment_ranges().intersects(stmt.range())
|
||||
{
|
||||
#[allow(deprecated)]
|
||||
diagnostic.try_set_fix_from_edit(|| {
|
||||
|
|
|
@ -505,7 +505,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) {
|
|||
}
|
||||
|
||||
// Avoid removing comments.
|
||||
if has_comments(expr, checker.locator) {
|
||||
if has_comments(expr, checker.locator, checker.indexer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,9 +6,7 @@ use rustpython_parser::ast::{self, CmpOp, Constant, Expr, ExprContext, Identifie
|
|||
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
|
||||
use ruff_python_ast::helpers::{
|
||||
any_over_expr, contains_effect, first_colon_range, has_comments, has_comments_in,
|
||||
};
|
||||
use ruff_python_ast::helpers::{any_over_expr, contains_effect, first_colon_range, has_comments};
|
||||
use ruff_python_semantic::SemanticModel;
|
||||
use ruff_python_whitespace::UniversalNewlines;
|
||||
|
||||
|
@ -378,10 +376,11 @@ pub(crate) fn nested_if_statements(
|
|||
// The fixer preserves comments in the nested body, but removes comments between
|
||||
// the outer and inner if statements.
|
||||
let nested_if = &body[0];
|
||||
if !has_comments_in(
|
||||
TextRange::new(stmt.start(), nested_if.start()),
|
||||
checker.locator,
|
||||
) {
|
||||
if !checker
|
||||
.indexer
|
||||
.comment_ranges()
|
||||
.intersects(TextRange::new(stmt.start(), nested_if.start()))
|
||||
{
|
||||
match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) {
|
||||
Ok(edit) => {
|
||||
if edit
|
||||
|
@ -464,7 +463,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) {
|
|||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if matches!(if_return, Bool::True)
|
||||
&& matches!(else_return, Bool::False)
|
||||
&& !has_comments(stmt, checker.locator)
|
||||
&& !has_comments(stmt, checker.locator, checker.indexer)
|
||||
&& (test.is_compare_expr() || checker.semantic().is_builtin("bool"))
|
||||
{
|
||||
if test.is_compare_expr() {
|
||||
|
@ -650,7 +649,7 @@ pub(crate) fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: O
|
|||
stmt.range(),
|
||||
);
|
||||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if !has_comments(stmt, checker.locator) {
|
||||
if !has_comments(stmt, checker.locator, checker.indexer) {
|
||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||
contents,
|
||||
stmt.range(),
|
||||
|
@ -1050,7 +1049,7 @@ pub(crate) fn use_dict_get_with_default(
|
|||
stmt.range(),
|
||||
);
|
||||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if !has_comments(stmt, checker.locator) {
|
||||
if !has_comments(stmt, checker.locator, checker.indexer) {
|
||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||
contents,
|
||||
stmt.range(),
|
||||
|
|
|
@ -5,7 +5,7 @@ use rustpython_parser::ast::{self, Ranged, Stmt, WithItem};
|
|||
use ruff_diagnostics::{AutofixKind, Violation};
|
||||
use ruff_diagnostics::{Diagnostic, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::helpers::{first_colon_range, has_comments_in};
|
||||
use ruff_python_ast::helpers::first_colon_range;
|
||||
use ruff_python_whitespace::UniversalNewlines;
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
|
@ -129,10 +129,11 @@ pub(crate) fn multiple_with_statements(
|
|||
),
|
||||
);
|
||||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if !has_comments_in(
|
||||
TextRange::new(with_stmt.start(), with_body[0].start()),
|
||||
checker.locator,
|
||||
) {
|
||||
if !checker
|
||||
.indexer
|
||||
.comment_ranges()
|
||||
.intersects(TextRange::new(with_stmt.start(), with_body[0].start()))
|
||||
{
|
||||
match fix_with::fix_multiple_with_statements(
|
||||
checker.locator,
|
||||
checker.stylist,
|
||||
|
|
|
@ -117,7 +117,7 @@ pub(crate) fn suppressible_exception(
|
|||
stmt.range(),
|
||||
);
|
||||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if !has_comments(stmt, checker.locator) {
|
||||
if !has_comments(stmt, checker.locator, checker.indexer) {
|
||||
diagnostic.try_set_fix(|| {
|
||||
let (import_edit, binding) = checker.importer.get_or_import_symbol(
|
||||
&ImportRequest::import("contextlib", "suppress"),
|
||||
|
|
|
@ -150,7 +150,7 @@ pub(crate) fn nested_min_max(
|
|||
}) {
|
||||
let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range());
|
||||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if !has_comments(expr, checker.locator) {
|
||||
if !has_comments(expr, checker.locator, checker.indexer) {
|
||||
let flattened_expr = Expr::Call(ast::ExprCall {
|
||||
func: Box::new(func.clone()),
|
||||
args: collect_nested_args(min_max, args, checker.semantic()),
|
||||
|
|
|
@ -192,7 +192,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp
|
|||
expr.range(),
|
||||
);
|
||||
if checker.patch(diagnostic.kind.rule()) {
|
||||
if !has_comments(expr, checker.locator) {
|
||||
if !has_comments(expr, checker.locator, checker.indexer) {
|
||||
// This suggestion could be unsafe if the non-literal expression in the
|
||||
// expression has overridden the `__add__` (or `__radd__`) magic methods.
|
||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||
|
|
|
@ -717,7 +717,7 @@ pub fn map_subscript(expr: &Expr) -> &Expr {
|
|||
}
|
||||
|
||||
/// Returns `true` if a statement or expression includes at least one comment.
|
||||
pub fn has_comments<T>(node: &T, locator: &Locator) -> bool
|
||||
pub fn has_comments<T>(node: &T, locator: &Locator, indexer: &Indexer) -> bool
|
||||
where
|
||||
T: Ranged,
|
||||
{
|
||||
|
@ -732,26 +732,9 @@ where
|
|||
locator.line_end(node.end())
|
||||
};
|
||||
|
||||
has_comments_in(TextRange::new(start, end), locator)
|
||||
}
|
||||
|
||||
/// Returns `true` if a [`TextRange`] includes at least one comment.
|
||||
pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool {
|
||||
let source = &locator.contents()[range];
|
||||
|
||||
for tok in lexer::lex_starts_at(source, Mode::Module, range.start()) {
|
||||
match tok {
|
||||
Ok((tok, _)) => {
|
||||
if matches!(tok, Tok::Comment(..)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
indexer
|
||||
.comment_ranges()
|
||||
.intersects(TextRange::new(start, end))
|
||||
}
|
||||
|
||||
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.
|
||||
|
|
|
@ -10,6 +10,23 @@ pub struct CommentRanges {
|
|||
raw: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl CommentRanges {
|
||||
/// Returns `true` if the given range includes a comment.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
.binary_search_by(|range| {
|
||||
if target.contains_range(*range) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else if range.end() < target.start() {
|
||||
std::cmp::Ordering::Less
|
||||
} else {
|
||||
std::cmp::Ordering::Greater
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for CommentRanges {
|
||||
type Target = [TextRange];
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ impl Indexer {
|
|||
}
|
||||
|
||||
/// Returns the byte offset ranges of comments
|
||||
pub fn comment_ranges(&self) -> &CommentRanges {
|
||||
pub const fn comment_ranges(&self) -> &CommentRanges {
|
||||
&self.comment_ranges
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue