Remove lexing for colon-matching use cases (#6803)

It's much simpler to just search ahead for the first colon.
2025-10-07 09:00:31 +00:00 · 2023-08-23 00:44:51 -04:00 · 2023-08-23 00:44:51 -04:00 · d08f697a04
commit d08f697a04
parent 4bc5eddf91
3 changed files with 27 additions and 67 deletions
--- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs
@ -11,8 +11,8 @@ use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
 use ruff_python_ast::helpers::{any_over_expr, contains_effect};
 use ruff_python_ast::stmt_if::{if_elif_branches, IfElifBranch};
 use ruff_python_parser::first_colon_range;
 use ruff_python_semantic::SemanticModel;
 use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
 use ruff_source_file::{Locator, UniversalNewlines};
 use crate::checkers::ast::Checker;
@ -369,16 +369,10 @@ pub(crate) fn nested_if_statements(
    };
    // Find the deepest nested if-statement, to inform the range.
-    let Some((test, first_stmt)) = find_last_nested_if(body) else {
+    let Some((test, _first_stmt)) = find_last_nested_if(body) else {
        return;
    };
    let colon = first_colon_range(
        TextRange::new(test.end(), first_stmt.start()),
        checker.locator().contents(),
        checker.source_type.is_jupyter(),
    );
    // Check if the parent is already emitting a larger diagnostic including this if statement
    if let Some(Stmt::If(stmt_if)) = parent {
        if let Some((body, _range, _is_elif)) = nested_if_body(stmt_if) {
@ -392,10 +386,14 @@ pub(crate) fn nested_if_statements(
        }
    }
-    let mut diagnostic = Diagnostic::new(
+    let Some(colon) = SimpleTokenizer::starts_at(test.end(), checker.locator().contents())
-        CollapsibleIf,
+        .skip_trivia()
-        colon.map_or(range, |colon| TextRange::new(range.start(), colon.end())),
+        .find(|token| token.kind == SimpleTokenKind::Colon)
-    );
+    else {
        return;
    };
    let mut diagnostic = Diagnostic::new(CollapsibleIf, TextRange::new(range.start(), colon.end()));
    if checker.patch(diagnostic.kind.rule()) {
        // The fixer preserves comments in the nested body, but removes comments between
        // the outer and inner if statements.
--- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs
@ -1,12 +1,12 @@
 use log::error;
 use ruff_python_ast::{self as ast, Ranged, Stmt, WithItem};
 use ruff_text_size::TextRange;
 use ruff_diagnostics::{AutofixKind, Violation};
 use ruff_diagnostics::{Diagnostic, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_parser::first_colon_range;
+use ruff_python_ast::{self as ast, Ranged, Stmt, WithItem};
 use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
 use ruff_source_file::UniversalNewlines;
 use ruff_text_size::TextRange;
 use crate::checkers::ast::Checker;
 use crate::line_width::LineWidth;
@ -106,32 +106,24 @@ pub(crate) fn multiple_with_statements(
        }
    }
-    if let Some((is_async, items, body)) = next_with(&with_stmt.body) {
+    if let Some((is_async, items, _body)) = next_with(&with_stmt.body) {
        if is_async != with_stmt.is_async {
            // One of the statements is an async with, while the other is not,
            // we can't merge those statements.
            return;
        }
-        let last_item = items.last().expect("Expected items to be non-empty");
+        let Some(colon) = items.last().and_then(|item| {
-        let colon = first_colon_range(
+            SimpleTokenizer::starts_at(item.end(), checker.locator().contents())
-            TextRange::new(
+                .skip_trivia()
-                last_item
+                .find(|token| token.kind == SimpleTokenKind::Colon)
-                    .optional_vars
+        }) else {
-                    .as_ref()
+            return;
-                    .map_or(last_item.context_expr.end(), |v| v.end()),
+        };
                body.first().expect("Expected body to be non-empty").start(),
            ),
            checker.locator().contents(),
            checker.source_type.is_jupyter(),
        );
        let mut diagnostic = Diagnostic::new(
            MultipleWithStatements,
-            colon.map_or_else(
+            TextRange::new(with_stmt.start(), colon.end()),
                || with_stmt.range(),
                |colon| TextRange::new(with_stmt.start(), colon.end()),
            ),
        );
        if checker.patch(diagnostic.kind.rule()) {
            if !checker
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -109,7 +109,6 @@
 //! [parsing]: https://en.wikipedia.org/wiki/Parsing
 //! [lexer]: crate::lexer
 use crate::lexer::LexResult;
 pub use parser::{
    parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
    parse_suite, parse_tokens, ParseError, ParseErrorType,
@ -119,6 +118,8 @@ use ruff_text_size::{TextRange, TextSize};
 pub use string::FStringErrorType;
 pub use token::{StringKind, Tok, TokenKind};
 use crate::lexer::LexResult;
 mod function;
 // Skip flattening lexer to distinguish from full ruff_python_parser
 mod context;
@ -159,25 +160,6 @@ pub fn parse_program_tokens(
    }
 }
 /// Return the `Range` of the first `Tok::Colon` token in a `Range`.
 pub fn first_colon_range(
    range: TextRange,
    source: &str,
    is_jupyter_notebook: bool,
 ) -> Option<TextRange> {
    let contents = &source[range];
    let mode = if is_jupyter_notebook {
        Mode::Jupyter
    } else {
        Mode::Module
    };
    let range = lexer::lex_starts_at(contents, mode, range.start())
        .flatten()
        .find(|(tok, _)| tok.is_colon())
        .map(|(_, range)| range);
    range
 }
 /// Extract all [`CmpOp`] operators from an expression snippet, with appropriate
 /// ranges.
 ///
@ -373,24 +355,12 @@ mod python {
 #[cfg(test)]
 mod tests {
    use crate::{first_colon_range, locate_cmp_ops, parse_expression, LocatedCmpOp};
    use anyhow::Result;
    use ruff_python_ast::CmpOp;
    use ruff_text_size::TextSize;
-    use ruff_text_size::{TextLen, TextRange, TextSize};
+    use crate::{locate_cmp_ops, parse_expression, LocatedCmpOp};
    #[test]
    fn extract_first_colon_range() {
        let contents = "with a: pass";
        let range = first_colon_range(
            TextRange::new(TextSize::from(0), contents.text_len()),
            contents,
            false,
        )
        .unwrap();
        assert_eq!(&contents[range], ":");
        assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
    }
    #[test]
    fn extract_cmp_op_location() -> Result<()> {