[flake8-simplify] Fix incorrect fix for positive maxsplit without separator (SIM905) (#20056)

## Summary Resolves #20033 ## Test Plan unit tests added to the new split function, existing snapshot test updated. Co-authored-by: Brent Westbrook <brentrwestbrook@gmail.com>
2025-09-29 13:24:57 +00:00 · 2025-09-18 22:56:34 +02:00 · 2025-09-18 22:56:34 +02:00 · bc89d0394c
commit bc89d0394c
parent 706be0a6e7
4 changed files with 141 additions and 51 deletions
--- a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py
+++ b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py
@ -170,3 +170,4 @@ print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
 # leading/trailing whitespace should not count towards maxsplit
 " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 "a  b".split(maxsplit=1)  # ["a", "b"]
--- a/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs
+++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs
@ -58,8 +58,8 @@ enum Method {
 }
 impl Method {
-    fn is_split(self) -> bool {
+    fn is_rsplit(self) -> bool {
-        matches!(self, Method::Split)
+        matches!(self, Method::RSplit)
    }
 }
@ -201,65 +201,19 @@ fn split_default(
    method: Method,
    settings: &LinterSettings,
 ) -> Option<Expr> {
    // From the Python documentation:
    // > If sep is not specified or is None, a different splitting algorithm is applied: runs of
    // > consecutive whitespace are regarded as a single separator, and the result will contain
    // > no empty strings at the start or end if the string has leading or trailing whitespace.
    // > Consequently, splitting an empty string or a string consisting of just whitespace with
    // > a None separator returns [].
    // https://docs.python.org/3/library/stdtypes.html#str.split
    let string_val = str_value.to_str();
    match max_split.cmp(&0) {
-        Ordering::Greater => {
+        Ordering::Greater if !is_maxsplit_without_separator_fix_enabled(settings) => None,
-            if !is_maxsplit_without_separator_fix_enabled(settings) {
+        Ordering::Greater | Ordering::Equal => {
                return None;
            }
            let Ok(max_split) = usize::try_from(max_split) else {
                return None;
            };
-            let list_items: Vec<&str> = if method.is_split() {
+            let list_items = split_whitespace_with_maxsplit(string_val, max_split, method);
                string_val
                    .trim_start_matches(py_unicode_is_whitespace)
                    .splitn(max_split + 1, py_unicode_is_whitespace)
                    .filter(|s| !s.is_empty())
                    .collect()
            } else {
                let mut items: Vec<&str> = string_val
                    .trim_end_matches(py_unicode_is_whitespace)
                    .rsplitn(max_split + 1, py_unicode_is_whitespace)
                    .filter(|s| !s.is_empty())
                    .collect();
                items.reverse();
                items
            };
            Some(construct_replacement(
                &list_items,
                str_value.first_literal_flags(),
            ))
        }
        Ordering::Equal => {
            // Behavior for maxsplit = 0 when sep is None:
            // - If the string is empty or all whitespace, result is [].
            // - Otherwise:
            //   - " x ".split(maxsplit=0)  -> ['x ']
            //   - " x ".rsplit(maxsplit=0) -> [' x']
            //   - "".split(maxsplit=0) -> []
            //   - " ".split(maxsplit=0) -> []
            let processed_str = if method.is_split() {
                string_val.trim_start_matches(py_unicode_is_whitespace)
            } else {
                string_val.trim_end_matches(py_unicode_is_whitespace)
            };
            let list_items: &[_] = if processed_str.is_empty() {
                &[]
            } else {
                &[processed_str]
            };
            Some(construct_replacement(
                list_items,
                str_value.first_literal_flags(),
            ))
        }
        Ordering::Less => {
            let list_items: Vec<&str> = string_val
                .split(py_unicode_is_whitespace)
@ -367,3 +321,107 @@ const fn py_unicode_is_whitespace(ch: char) -> bool {
        | '\u{3000}'
    )
 }
 struct WhitespaceMaxSplitIterator<'a> {
    remaining: &'a str,
    max_split: usize,
    splits: usize,
    method: Method,
 }
 impl<'a> WhitespaceMaxSplitIterator<'a> {
    fn new(s: &'a str, max_split: usize, method: Method) -> Self {
        let remaining = match method {
            Method::Split => s.trim_start_matches(py_unicode_is_whitespace),
            Method::RSplit => s.trim_end_matches(py_unicode_is_whitespace),
        };
        Self {
            remaining,
            max_split,
            splits: 0,
            method,
        }
    }
 }
 impl<'a> Iterator for WhitespaceMaxSplitIterator<'a> {
    type Item = &'a str;
    fn next(&mut self) -> Option<Self::Item> {
        if self.remaining.is_empty() {
            return None;
        }
        if self.splits >= self.max_split {
            let result = self.remaining;
            self.remaining = "";
            return Some(result);
        }
        self.splits += 1;
        match self.method {
            Method::Split => match self.remaining.split_once(py_unicode_is_whitespace) {
                Some((s, remaining)) => {
                    self.remaining = remaining.trim_start_matches(py_unicode_is_whitespace);
                    Some(s)
                }
                None => Some(std::mem::take(&mut self.remaining)),
            },
            Method::RSplit => match self.remaining.rsplit_once(py_unicode_is_whitespace) {
                Some((remaining, s)) => {
                    self.remaining = remaining.trim_end_matches(py_unicode_is_whitespace);
                    Some(s)
                }
                None => Some(std::mem::take(&mut self.remaining)),
            },
        }
    }
 }
 // From the Python documentation:
 // > If sep is not specified or is None, a different splitting algorithm is applied: runs of
 // > consecutive whitespace are regarded as a single separator, and the result will contain
 // > no empty strings at the start or end if the string has leading or trailing whitespace.
 // > Consequently, splitting an empty string or a string consisting of just whitespace with
 // > a None separator returns [].
 // https://docs.python.org/3/library/stdtypes.html#str.split
 fn split_whitespace_with_maxsplit(s: &str, max_split: usize, method: Method) -> Vec<&str> {
    let mut result: Vec<_> = WhitespaceMaxSplitIterator::new(s, max_split, method).collect();
    if method.is_rsplit() {
        result.reverse();
    }
    result
 }
 #[cfg(test)]
 mod tests {
    use super::{Method, split_whitespace_with_maxsplit};
    use test_case::test_case;
    #[test_case("  ", 1, &[])]
    #[test_case("a  b", 1, &["a", "b"])]
    #[test_case("a  b", 2, &["a", "b"])]
    #[test_case(" a b c d ", 2, &["a", "b", "c d "])]
    #[test_case("  a  b  c  ", 1, &["a", "b  c  "])]
    #[test_case(" x ", 0, &["x "])]
    #[test_case(" ", 0, &[])]
    #[test_case("a\u{3000}b", 1, &["a", "b"])]
    fn test_split_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
        let parts = split_whitespace_with_maxsplit(s, max_split, Method::Split);
        assert_eq!(parts, expected);
    }
    #[test_case("  ", 1, &[])]
    #[test_case("a  b", 1, &["a", "b"])]
    #[test_case("a  b", 2, &["a", "b"])]
    #[test_case(" a b c d ", 2, &[" a b", "c", "d"])]
    #[test_case("  a  b  c  ", 1, &["  a  b", "c"])]
    #[test_case(" x ", 0, &[" x"])]
    #[test_case(" ", 0, &[])]
    #[test_case("a\u{3000}b", 1, &["a", "b"])]
    fn test_rsplit_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
        let parts = split_whitespace_with_maxsplit(s, max_split, Method::RSplit);
        assert_eq!(parts, expected);
    }
 }
--- a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linterrulesflake8_simplifytestsSIM905_SIM905.py.snap
+++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linterrulesflake8_simplifytestsSIM905_SIM905.py.snap
@ -1372,6 +1372,7 @@ SIM905 Consider using a list literal instead of `str.split`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
    |
 help: Replace with list literal
@ -1382,5 +1383,16 @@ SIM905 Consider using a list literal instead of `str.rsplit`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
    |
 help: Replace with list literal
 SIM905 Consider using a list literal instead of `str.split`
   --> SIM905.py:173:1
    |
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
    | ^^^^^^^^^^^^^^^^^^^^^^^^
    |
 help: Replace with list literal
--- a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linterrulesflake8_simplifytestspreview__SIM905_SIM905.py.snap
+++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linterrulesflake8_simplifytestspreview__SIM905_SIM905.py.snap
@ -1420,6 +1420,7 @@ SIM905 [*] Consider using a list literal instead of `str.split`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
    |
 help: Replace with list literal
 168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
@ -1428,6 +1429,7 @@ help: Replace with list literal
    - " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 171 + ["a", "b", "c d "]  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
 SIM905 [*] Consider using a list literal instead of `str.rsplit`
   --> SIM905.py:172:1
@ -1436,6 +1438,7 @@ SIM905 [*] Consider using a list literal instead of `str.rsplit`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
    |
 help: Replace with list literal
 169 | 
@ -1443,3 +1446,19 @@ help: Replace with list literal
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
    - " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 172 + [" a b", "c", "d"]  # [" a b", "c", "d"]
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
 SIM905 [*] Consider using a list literal instead of `str.split`
   --> SIM905.py:173:1
    |
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 173 | "a  b".split(maxsplit=1)  # ["a", "b"]
    | ^^^^^^^^^^^^^^^^^^^^^^^^
    |
 help: Replace with list literal
 170 | # leading/trailing whitespace should not count towards maxsplit
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
    - "a  b".split(maxsplit=1)  # ["a", "b"]
 173 + ["a", "b"]  # ["a", "b"]