Remove lexer-based comment range detection (#5785)

## Summary

I'm doing some unrelated profiling, and I noticed that this method is
actually measurable on the CPython benchmark -- it's > 1% of execution
time. We don't need to lex here, we already know the ranges of all
comments, so we can just do a simple binary search for overlap, which
brings the method down to 0%.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-07-15 21:03:27 -04:00 committed by GitHub
parent f2e995f78d
commit 4782675bf9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 44 additions and 44 deletions

View file

@ -11,7 +11,7 @@ use rustpython_parser::ast::{self, BoolOp, ExceptHandler, Expr, Keyword, Ranged,
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::{has_comments_in, Truthiness};
use ruff_python_ast::helpers::Truthiness;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::{visitor, whitespace};
@ -197,7 +197,7 @@ pub(crate) fn unittest_assertion(
if checker.semantic().stmt().is_expr_stmt()
&& checker.semantic().expr_parent().is_none()
&& !checker.semantic().scope().kind.is_lambda()
&& !has_comments_in(expr.range(), checker.locator)
&& !checker.indexer.comment_ranges().intersects(expr.range())
{
if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) {
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
@ -483,7 +483,7 @@ pub(crate) fn composite_condition(
if checker.patch(diagnostic.kind.rule()) {
if matches!(composite, CompositionKind::Simple)
&& msg.is_none()
&& !has_comments_in(stmt.range(), checker.locator)
&& !checker.indexer.comment_ranges().intersects(stmt.range())
{
#[allow(deprecated)]
diagnostic.try_set_fix_from_edit(|| {

View file

@ -505,7 +505,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) {
}
// Avoid removing comments.
if has_comments(expr, checker.locator) {
if has_comments(expr, checker.locator, checker.indexer) {
continue;
}

View file

@ -6,9 +6,7 @@ use rustpython_parser::ast::{self, CmpOp, Constant, Expr, ExprContext, Identifie
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
use ruff_python_ast::helpers::{
any_over_expr, contains_effect, first_colon_range, has_comments, has_comments_in,
};
use ruff_python_ast::helpers::{any_over_expr, contains_effect, first_colon_range, has_comments};
use ruff_python_semantic::SemanticModel;
use ruff_python_whitespace::UniversalNewlines;
@ -378,10 +376,11 @@ pub(crate) fn nested_if_statements(
// The fixer preserves comments in the nested body, but removes comments between
// the outer and inner if statements.
let nested_if = &body[0];
if !has_comments_in(
TextRange::new(stmt.start(), nested_if.start()),
checker.locator,
) {
if !checker
.indexer
.comment_ranges()
.intersects(TextRange::new(stmt.start(), nested_if.start()))
{
match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) {
Ok(edit) => {
if edit
@ -464,7 +463,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) {
if checker.patch(diagnostic.kind.rule()) {
if matches!(if_return, Bool::True)
&& matches!(else_return, Bool::False)
&& !has_comments(stmt, checker.locator)
&& !has_comments(stmt, checker.locator, checker.indexer)
&& (test.is_compare_expr() || checker.semantic().is_builtin("bool"))
{
if test.is_compare_expr() {
@ -650,7 +649,7 @@ pub(crate) fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: O
stmt.range(),
);
if checker.patch(diagnostic.kind.rule()) {
if !has_comments(stmt, checker.locator) {
if !has_comments(stmt, checker.locator, checker.indexer) {
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
contents,
stmt.range(),
@ -1050,7 +1049,7 @@ pub(crate) fn use_dict_get_with_default(
stmt.range(),
);
if checker.patch(diagnostic.kind.rule()) {
if !has_comments(stmt, checker.locator) {
if !has_comments(stmt, checker.locator, checker.indexer) {
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
contents,
stmt.range(),

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{self, Ranged, Stmt, WithItem};
use ruff_diagnostics::{AutofixKind, Violation};
use ruff_diagnostics::{Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::{first_colon_range, has_comments_in};
use ruff_python_ast::helpers::first_colon_range;
use ruff_python_whitespace::UniversalNewlines;
use crate::checkers::ast::Checker;
@ -129,10 +129,11 @@ pub(crate) fn multiple_with_statements(
),
);
if checker.patch(diagnostic.kind.rule()) {
if !has_comments_in(
TextRange::new(with_stmt.start(), with_body[0].start()),
checker.locator,
) {
if !checker
.indexer
.comment_ranges()
.intersects(TextRange::new(with_stmt.start(), with_body[0].start()))
{
match fix_with::fix_multiple_with_statements(
checker.locator,
checker.stylist,

View file

@ -117,7 +117,7 @@ pub(crate) fn suppressible_exception(
stmt.range(),
);
if checker.patch(diagnostic.kind.rule()) {
if !has_comments(stmt, checker.locator) {
if !has_comments(stmt, checker.locator, checker.indexer) {
diagnostic.try_set_fix(|| {
let (import_edit, binding) = checker.importer.get_or_import_symbol(
&ImportRequest::import("contextlib", "suppress"),

View file

@ -150,7 +150,7 @@ pub(crate) fn nested_min_max(
}) {
let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range());
if checker.patch(diagnostic.kind.rule()) {
if !has_comments(expr, checker.locator) {
if !has_comments(expr, checker.locator, checker.indexer) {
let flattened_expr = Expr::Call(ast::ExprCall {
func: Box::new(func.clone()),
args: collect_nested_args(min_max, args, checker.semantic()),

View file

@ -192,7 +192,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp
expr.range(),
);
if checker.patch(diagnostic.kind.rule()) {
if !has_comments(expr, checker.locator) {
if !has_comments(expr, checker.locator, checker.indexer) {
// This suggestion could be unsafe if the non-literal expression in the
// expression has overridden the `__add__` (or `__radd__`) magic methods.
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(

View file

@ -717,7 +717,7 @@ pub fn map_subscript(expr: &Expr) -> &Expr {
}
/// Returns `true` if a statement or expression includes at least one comment.
pub fn has_comments<T>(node: &T, locator: &Locator) -> bool
pub fn has_comments<T>(node: &T, locator: &Locator, indexer: &Indexer) -> bool
where
T: Ranged,
{
@ -732,26 +732,9 @@ where
locator.line_end(node.end())
};
has_comments_in(TextRange::new(start, end), locator)
}
/// Returns `true` if a [`TextRange`] includes at least one comment.
pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool {
let source = &locator.contents()[range];
for tok in lexer::lex_starts_at(source, Mode::Module, range.start()) {
match tok {
Ok((tok, _)) => {
if matches!(tok, Tok::Comment(..)) {
return true;
}
}
Err(_) => {
return false;
}
}
}
false
indexer
.comment_ranges()
.intersects(TextRange::new(start, end))
}
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.

View file

@ -10,6 +10,23 @@ pub struct CommentRanges {
raw: Vec<TextRange>,
}
impl CommentRanges {
/// Returns `true` if the given range includes a comment.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.contains_range(*range) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
}
impl Deref for CommentRanges {
type Target = [TextRange];

View file

@ -93,7 +93,7 @@ impl Indexer {
}
/// Returns the byte offset ranges of comments
pub fn comment_ranges(&self) -> &CommentRanges {
pub const fn comment_ranges(&self) -> &CommentRanges {
&self.comment_ranges
}