[flake8-simplify] Fix incorrect fix for positive maxsplit without separator (SIM905) (#20056)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions

<!--
Thank you for contributing to Ruff/ty! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title? (Please prefix
with `[ty]` for ty pull
  requests.)
- Does this pull request include references to any relevant issues?
-->

## Summary

Resolves #20033

## Test Plan

unit tests added to the new split function, existing snapshot test
updated.

Co-authored-by: Brent Westbrook <brentrwestbrook@gmail.com>
This commit is contained in:
Frazer McLean 2025-09-18 22:56:34 +02:00 committed by GitHub
parent 706be0a6e7
commit bc89d0394c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 141 additions and 51 deletions

View file

@ -170,3 +170,4 @@ print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
# leading/trailing whitespace should not count towards maxsplit # leading/trailing whitespace should not count towards maxsplit
" a b c d ".split(maxsplit=2) # ["a", "b", "c d "] " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
" a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
"a b".split(maxsplit=1) # ["a", "b"]

View file

@ -58,8 +58,8 @@ enum Method {
} }
impl Method { impl Method {
fn is_split(self) -> bool { fn is_rsplit(self) -> bool {
matches!(self, Method::Split) matches!(self, Method::RSplit)
} }
} }
@ -201,65 +201,19 @@ fn split_default(
method: Method, method: Method,
settings: &LinterSettings, settings: &LinterSettings,
) -> Option<Expr> { ) -> Option<Expr> {
// From the Python documentation:
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
// > consecutive whitespace are regarded as a single separator, and the result will contain
// > no empty strings at the start or end if the string has leading or trailing whitespace.
// > Consequently, splitting an empty string or a string consisting of just whitespace with
// > a None separator returns [].
// https://docs.python.org/3/library/stdtypes.html#str.split
let string_val = str_value.to_str(); let string_val = str_value.to_str();
match max_split.cmp(&0) { match max_split.cmp(&0) {
Ordering::Greater => { Ordering::Greater if !is_maxsplit_without_separator_fix_enabled(settings) => None,
if !is_maxsplit_without_separator_fix_enabled(settings) { Ordering::Greater | Ordering::Equal => {
return None;
}
let Ok(max_split) = usize::try_from(max_split) else { let Ok(max_split) = usize::try_from(max_split) else {
return None; return None;
}; };
let list_items: Vec<&str> = if method.is_split() { let list_items = split_whitespace_with_maxsplit(string_val, max_split, method);
string_val
.trim_start_matches(py_unicode_is_whitespace)
.splitn(max_split + 1, py_unicode_is_whitespace)
.filter(|s| !s.is_empty())
.collect()
} else {
let mut items: Vec<&str> = string_val
.trim_end_matches(py_unicode_is_whitespace)
.rsplitn(max_split + 1, py_unicode_is_whitespace)
.filter(|s| !s.is_empty())
.collect();
items.reverse();
items
};
Some(construct_replacement( Some(construct_replacement(
&list_items, &list_items,
str_value.first_literal_flags(), str_value.first_literal_flags(),
)) ))
} }
Ordering::Equal => {
// Behavior for maxsplit = 0 when sep is None:
// - If the string is empty or all whitespace, result is [].
// - Otherwise:
// - " x ".split(maxsplit=0) -> ['x ']
// - " x ".rsplit(maxsplit=0) -> [' x']
// - "".split(maxsplit=0) -> []
// - " ".split(maxsplit=0) -> []
let processed_str = if method.is_split() {
string_val.trim_start_matches(py_unicode_is_whitespace)
} else {
string_val.trim_end_matches(py_unicode_is_whitespace)
};
let list_items: &[_] = if processed_str.is_empty() {
&[]
} else {
&[processed_str]
};
Some(construct_replacement(
list_items,
str_value.first_literal_flags(),
))
}
Ordering::Less => { Ordering::Less => {
let list_items: Vec<&str> = string_val let list_items: Vec<&str> = string_val
.split(py_unicode_is_whitespace) .split(py_unicode_is_whitespace)
@ -367,3 +321,107 @@ const fn py_unicode_is_whitespace(ch: char) -> bool {
| '\u{3000}' | '\u{3000}'
) )
} }
struct WhitespaceMaxSplitIterator<'a> {
remaining: &'a str,
max_split: usize,
splits: usize,
method: Method,
}
impl<'a> WhitespaceMaxSplitIterator<'a> {
fn new(s: &'a str, max_split: usize, method: Method) -> Self {
let remaining = match method {
Method::Split => s.trim_start_matches(py_unicode_is_whitespace),
Method::RSplit => s.trim_end_matches(py_unicode_is_whitespace),
};
Self {
remaining,
max_split,
splits: 0,
method,
}
}
}
impl<'a> Iterator for WhitespaceMaxSplitIterator<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if self.remaining.is_empty() {
return None;
}
if self.splits >= self.max_split {
let result = self.remaining;
self.remaining = "";
return Some(result);
}
self.splits += 1;
match self.method {
Method::Split => match self.remaining.split_once(py_unicode_is_whitespace) {
Some((s, remaining)) => {
self.remaining = remaining.trim_start_matches(py_unicode_is_whitespace);
Some(s)
}
None => Some(std::mem::take(&mut self.remaining)),
},
Method::RSplit => match self.remaining.rsplit_once(py_unicode_is_whitespace) {
Some((remaining, s)) => {
self.remaining = remaining.trim_end_matches(py_unicode_is_whitespace);
Some(s)
}
None => Some(std::mem::take(&mut self.remaining)),
},
}
}
}
// From the Python documentation:
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
// > consecutive whitespace are regarded as a single separator, and the result will contain
// > no empty strings at the start or end if the string has leading or trailing whitespace.
// > Consequently, splitting an empty string or a string consisting of just whitespace with
// > a None separator returns [].
// https://docs.python.org/3/library/stdtypes.html#str.split
fn split_whitespace_with_maxsplit(s: &str, max_split: usize, method: Method) -> Vec<&str> {
let mut result: Vec<_> = WhitespaceMaxSplitIterator::new(s, max_split, method).collect();
if method.is_rsplit() {
result.reverse();
}
result
}
#[cfg(test)]
mod tests {
use super::{Method, split_whitespace_with_maxsplit};
use test_case::test_case;
#[test_case(" ", 1, &[])]
#[test_case("a b", 1, &["a", "b"])]
#[test_case("a b", 2, &["a", "b"])]
#[test_case(" a b c d ", 2, &["a", "b", "c d "])]
#[test_case(" a b c ", 1, &["a", "b c "])]
#[test_case(" x ", 0, &["x "])]
#[test_case(" ", 0, &[])]
#[test_case("a\u{3000}b", 1, &["a", "b"])]
fn test_split_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
let parts = split_whitespace_with_maxsplit(s, max_split, Method::Split);
assert_eq!(parts, expected);
}
#[test_case(" ", 1, &[])]
#[test_case("a b", 1, &["a", "b"])]
#[test_case("a b", 2, &["a", "b"])]
#[test_case(" a b c d ", 2, &[" a b", "c", "d"])]
#[test_case(" a b c ", 1, &[" a b", "c"])]
#[test_case(" x ", 0, &[" x"])]
#[test_case(" ", 0, &[])]
#[test_case("a\u{3000}b", 1, &["a", "b"])]
fn test_rsplit_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
let parts = split_whitespace_with_maxsplit(s, max_split, Method::RSplit);
assert_eq!(parts, expected);
}
}

View file

@ -1372,6 +1372,7 @@ SIM905 Consider using a list literal instead of `str.split`
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
173 | "a b".split(maxsplit=1) # ["a", "b"]
| |
help: Replace with list literal help: Replace with list literal
@ -1382,5 +1383,16 @@ SIM905 Consider using a list literal instead of `str.rsplit`
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
help: Replace with list literal
SIM905 Consider using a list literal instead of `str.split`
--> SIM905.py:173:1
|
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
173 | "a b".split(maxsplit=1) # ["a", "b"]
| ^^^^^^^^^^^^^^^^^^^^^^^^
| |
help: Replace with list literal help: Replace with list literal

View file

@ -1420,6 +1420,7 @@ SIM905 [*] Consider using a list literal instead of `str.split`
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
173 | "a b".split(maxsplit=1) # ["a", "b"]
| |
help: Replace with list literal help: Replace with list literal
168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0)) 168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
@ -1428,6 +1429,7 @@ help: Replace with list literal
- " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] - " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
171 + ["a", "b", "c d "] # ["a", "b", "c d "] 171 + ["a", "b", "c d "] # ["a", "b", "c d "]
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
173 | "a b".split(maxsplit=1) # ["a", "b"]
SIM905 [*] Consider using a list literal instead of `str.rsplit` SIM905 [*] Consider using a list literal instead of `str.rsplit`
--> SIM905.py:172:1 --> SIM905.py:172:1
@ -1436,6 +1438,7 @@ SIM905 [*] Consider using a list literal instead of `str.rsplit`
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173 | "a b".split(maxsplit=1) # ["a", "b"]
| |
help: Replace with list literal help: Replace with list literal
169 | 169 |
@ -1443,3 +1446,19 @@ help: Replace with list literal
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
- " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] - " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
172 + [" a b", "c", "d"] # [" a b", "c", "d"] 172 + [" a b", "c", "d"] # [" a b", "c", "d"]
173 | "a b".split(maxsplit=1) # ["a", "b"]
SIM905 [*] Consider using a list literal instead of `str.split`
--> SIM905.py:173:1
|
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
173 | "a b".split(maxsplit=1) # ["a", "b"]
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
help: Replace with list literal
170 | # leading/trailing whitespace should not count towards maxsplit
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
- "a b".split(maxsplit=1) # ["a", "b"]
173 + ["a", "b"] # ["a", "b"]