From bb5fbb1b5c9dc9cc67940d79ec42212f50ac469c Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Mon, 21 Aug 2023 00:16:29 -0400
Subject: [PATCH] Use simple lexer for argument removal (#6710)

---
 crates/ruff/src/autofix/edits.rs              | 119 +++++++-----------
 .../rules/unnecessary_range_start.rs          |   3 +-
 .../flake8_pytest_style/rules/fixture.rs      |   3 +-
 .../pandas_vet/rules/inplace_argument.rs      |  15 +--
 .../pyupgrade/rules/replace_stdout_stderr.rs  |  14 +--
 .../rules/unnecessary_encode_utf8.rs          |  12 +-
 .../rules/useless_object_inheritance.rs       |   3 +-
 7 files changed, 62 insertions(+), 107 deletions(-)
diff --git a/crates/ruff/src/autofix/edits.rs b/crates/ruff/src/autofix/edits.rs
index 1ab80106af..f55381b1e6 100644
--- a/crates/ruff/src/autofix/edits.rs
+++ b/crates/ruff/src/autofix/edits.rs
@@ -1,15 +1,15 @@
 //! Interface for generating autofix edits from higher-level actions (e.g., "remove an argument").
 
-use anyhow::{bail, Result};
+use anyhow::{Context, Result};
 
 use ruff_diagnostics::Edit;
-use ruff_python_ast::{
-    self as ast, Arguments, ExceptHandler, Expr, Keyword, PySourceType, Ranged, Stmt,
-};
+use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Expr, Keyword, Ranged, Stmt};
 use ruff_python_codegen::Stylist;
 use ruff_python_index::Indexer;
-use ruff_python_parser::{lexer, AsMode};
-use ruff_python_trivia::{has_leading_content, is_python_whitespace, PythonWhitespace};
+
+use ruff_python_trivia::{
+    has_leading_content, is_python_whitespace, PythonWhitespace, SimpleTokenKind, SimpleTokenizer,
+};
 use ruff_source_file::{Locator, NewlineWithTrailingNewline};
 use ruff_text_size::{TextLen, TextRange, TextSize};
 
@@ -89,78 +89,49 @@ pub(crate) fn remove_argument<T: Ranged>(
     argument: &T,
     arguments: &Arguments,
     parentheses: Parentheses,
-    locator: &Locator,
-    source_type: PySourceType,
+    source: &str,
 ) -> Result<Edit> {
-    // TODO(sbrugman): Preserve trailing comments.
-    if arguments.keywords.len() + arguments.args.len() > 1 {
-        let mut fix_start = None;
-        let mut fix_end = None;
+    // Partition into arguments before and after the argument to remove.
+    let (before, after): (Vec<_>, Vec<_>) = arguments
+        .args
+        .iter()
+        .map(Expr::range)
+        .chain(arguments.keywords.iter().map(Keyword::range))
+        .filter(|range| argument.range() != *range)
+        .partition(|range| range.start() < argument.start());
 
-        if arguments
-            .args
-            .iter()
-            .map(Expr::start)
-            .chain(arguments.keywords.iter().map(Keyword::start))
-            .any(|location| location > argument.start())
-        {
-            // Case 1: argument or keyword is _not_ the last node, so delete from the start of the
-            // argument to the end of the subsequent comma.
-            let mut seen_comma = false;
-            for (tok, range) in lexer::lex_starts_at(
-                locator.slice(arguments.range()),
-                source_type.as_mode(),
-                arguments.start(),
-            )
-            .flatten()
-            {
-                if seen_comma {
-                    if tok.is_non_logical_newline() {
-                        // Also delete any non-logical newlines after the comma.
-                        continue;
-                    }
-                    fix_end = Some(if tok.is_newline() {
-                        range.end()
-                    } else {
-                        range.start()
-                    });
-                    break;
-                }
-                if range.start() == argument.start() {
-                    fix_start = Some(range.start());
-                }
-                if fix_start.is_some() && tok.is_comma() {
-                    seen_comma = true;
-                }
-            }
-        } else {
-            // Case 2: argument or keyword is the last node, so delete from the start of the
-            // previous comma to the end of the argument.
-            for (tok, range) in lexer::lex_starts_at(
-                locator.slice(arguments.range()),
-                source_type.as_mode(),
-                arguments.start(),
-            )
-            .flatten()
-            {
-                if range.start() == argument.start() {
-                    fix_end = Some(argument.end());
-                    break;
-                }
-                if tok.is_comma() {
-                    fix_start = Some(range.start());
-                }
-            }
-        }
+    if !after.is_empty() {
+        // Case 1: argument or keyword is _not_ the last node, so delete from the start of the
+        // argument to the end of the subsequent comma.
+        let mut tokenizer = SimpleTokenizer::starts_at(argument.end(), source);
 
-        match (fix_start, fix_end) {
-            (Some(start), Some(end)) => Ok(Edit::deletion(start, end)),
-            _ => {
-                bail!("No fix could be constructed")
-            }
-        }
+        // Find the trailing comma.
+        tokenizer
+            .find(|token| token.kind == SimpleTokenKind::Comma)
+            .context("Unable to find trailing comma")?;
+
+        // Find the next non-whitespace token.
+        let next = tokenizer
+            .find(|token| {
+                token.kind != SimpleTokenKind::Whitespace && token.kind != SimpleTokenKind::Newline
+            })
+            .context("Unable to find next token")?;
+
+        Ok(Edit::deletion(argument.start(), next.start()))
+    } else if let Some(previous) = before.iter().map(Ranged::end).max() {
+        // Case 2: argument or keyword is the last node, so delete from the start of the
+        // previous comma to the end of the argument.
+        let mut tokenizer = SimpleTokenizer::starts_at(previous, source);
+
+        // Find the trailing comma.
+        let comma = tokenizer
+            .find(|token| token.kind == SimpleTokenKind::Comma)
+            .context("Unable to find trailing comma")?;
+
+        Ok(Edit::deletion(comma.start(), argument.end()))
     } else {
-        // Only one argument; remove it (but preserve parentheses, if needed).
+        // Case 3: argument or keyword is the only node, so delete the arguments (but preserve
+        // parentheses, if needed).
         Ok(match parentheses {
             Parentheses::Remove => Edit::deletion(arguments.start(), arguments.end()),
             Parentheses::Preserve => {
diff --git a/crates/ruff/src/rules/flake8_pie/rules/unnecessary_range_start.rs b/crates/ruff/src/rules/flake8_pie/rules/unnecessary_range_start.rs
index 07067d6f75..a6d619a432 100644
--- a/crates/ruff/src/rules/flake8_pie/rules/unnecessary_range_start.rs
+++ b/crates/ruff/src/rules/flake8_pie/rules/unnecessary_range_start.rs
@@ -85,8 +85,7 @@ pub(crate) fn unnecessary_range_start(checker: &mut Checker, call: &ast::ExprCal
                 &start,
                 &call.arguments,
                 Parentheses::Preserve,
-                checker.locator(),
-                checker.source_type,
+                checker.locator().contents(),
             )
             .map(Fix::automatic)
         });
diff --git a/crates/ruff/src/rules/flake8_pytest_style/rules/fixture.rs b/crates/ruff/src/rules/flake8_pytest_style/rules/fixture.rs
index ddcd51b388..0e2ec919a8 100644
--- a/crates/ruff/src/rules/flake8_pytest_style/rules/fixture.rs
+++ b/crates/ruff/src/rules/flake8_pytest_style/rules/fixture.rs
@@ -732,8 +732,7 @@ fn check_fixture_decorator(checker: &mut Checker, func_name: &str, decorator: &D
                                     keyword,
                                     arguments,
                                     edits::Parentheses::Preserve,
-                                    checker.locator(),
-                                    checker.source_type,
+                                    checker.locator().contents(),
                                 )
                                 .map(Fix::suggested)
                             });
diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs
index 7cc11c2025..cecd169ad0 100644
--- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs
+++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs
@@ -1,7 +1,7 @@
 use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::helpers::is_const_true;
-use ruff_python_ast::{self as ast, Keyword, PySourceType, Ranged};
+use ruff_python_ast::{self as ast, Keyword, Ranged};
 use ruff_source_file::Locator;
 
 use crate::autofix::edits::{remove_argument, Parentheses};
@@ -78,12 +78,9 @@ pub(crate) fn inplace_argument(checker: &mut Checker, call: &ast::ExprCall) {
                         && checker.semantic().current_statement().is_expr_stmt()
                         && checker.semantic().current_expression_parent().is_none()
                     {
-                        if let Some(fix) = convert_inplace_argument_to_assignment(
-                            call,
-                            keyword,
-                            checker.source_type,
-                            checker.locator(),
-                        ) {
+                        if let Some(fix) =
+                            convert_inplace_argument_to_assignment(call, keyword, checker.locator())
+                        {
                             diagnostic.set_fix(fix);
                         }
                     }
@@ -103,7 +100,6 @@ pub(crate) fn inplace_argument(checker: &mut Checker, call: &ast::ExprCall) {
 fn convert_inplace_argument_to_assignment(
     call: &ast::ExprCall,
     keyword: &Keyword,
-    source_type: PySourceType,
     locator: &Locator,
 ) -> Option<Fix> {
     // Add the assignment.
@@ -118,8 +114,7 @@ fn convert_inplace_argument_to_assignment(
         keyword,
         &call.arguments,
         Parentheses::Preserve,
-        locator,
-        source_type,
+        locator.contents(),
     )
     .ok()?;
 
diff --git a/crates/ruff/src/rules/pyupgrade/rules/replace_stdout_stderr.rs b/crates/ruff/src/rules/pyupgrade/rules/replace_stdout_stderr.rs
index 3482e99006..df120210f4 100644
--- a/crates/ruff/src/rules/pyupgrade/rules/replace_stdout_stderr.rs
+++ b/crates/ruff/src/rules/pyupgrade/rules/replace_stdout_stderr.rs
@@ -2,8 +2,7 @@ use anyhow::Result;
 
 use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_ast::{self as ast, Keyword, PySourceType, Ranged};
-use ruff_source_file::Locator;
+use ruff_python_ast::{self as ast, Keyword, Ranged};
 
 use crate::autofix::edits::{remove_argument, Parentheses};
 use crate::checkers::ast::Checker;
@@ -55,8 +54,7 @@ fn generate_fix(
     stdout: &Keyword,
     stderr: &Keyword,
     call: &ast::ExprCall,
-    locator: &Locator,
-    source_type: PySourceType,
+    source: &str,
 ) -> Result<Fix> {
     let (first, second) = if stdout.start() < stderr.start() {
         (stdout, stderr)
@@ -69,8 +67,7 @@ fn generate_fix(
             second,
             &call.arguments,
             Parentheses::Preserve,
-            locator,
-            source_type,
+            source,
         )?],
     ))
 }
@@ -105,9 +102,8 @@ pub(crate) fn replace_stdout_stderr(checker: &mut Checker, call: &ast::ExprCall)
 
         let mut diagnostic = Diagnostic::new(ReplaceStdoutStderr, call.range());
         if checker.patch(diagnostic.kind.rule()) {
-            diagnostic.try_set_fix(|| {
-                generate_fix(stdout, stderr, call, checker.locator(), checker.source_type)
-            });
+            diagnostic
+                .try_set_fix(|| generate_fix(stdout, stderr, call, checker.locator().contents()));
         }
         checker.diagnostics.push(diagnostic);
     }
diff --git a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs
index d6e3aa5e36..0c48fdcc55 100644
--- a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs
+++ b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs
@@ -194,8 +194,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal
                                 kwarg,
                                 &call.arguments,
                                 Parentheses::Preserve,
-                                checker.locator(),
-                                checker.source_type,
+                                checker.locator().contents(),
                             )
                             .map(Fix::automatic)
                         });
@@ -215,8 +214,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal
                                 arg,
                                 &call.arguments,
                                 Parentheses::Preserve,
-                                checker.locator(),
-                                checker.source_type,
+                                checker.locator().contents(),
                             )
                             .map(Fix::automatic)
                         });
@@ -243,8 +241,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal
                                 kwarg,
                                 &call.arguments,
                                 Parentheses::Preserve,
-                                checker.locator(),
-                                checker.source_type,
+                                checker.locator().contents(),
                             )
                             .map(Fix::automatic)
                         });
@@ -264,8 +261,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal
                                 arg,
                                 &call.arguments,
                                 Parentheses::Preserve,
-                                checker.locator(),
-                                checker.source_type,
+                                checker.locator().contents(),
                             )
                             .map(Fix::automatic)
                         });
diff --git a/crates/ruff/src/rules/pyupgrade/rules/useless_object_inheritance.rs b/crates/ruff/src/rules/pyupgrade/rules/useless_object_inheritance.rs
index 3736942d84..2e2de98529 100644
--- a/crates/ruff/src/rules/pyupgrade/rules/useless_object_inheritance.rs
+++ b/crates/ruff/src/rules/pyupgrade/rules/useless_object_inheritance.rs
@@ -73,8 +73,7 @@ pub(crate) fn useless_object_inheritance(checker: &mut Checker, class_def: &ast:
                     base,
                     arguments,
                     Parentheses::Remove,
-                    checker.locator(),
-                    checker.source_type,
+                    checker.locator().contents(),
                 )
                 .map(Fix::automatic)
             });