mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 21:34:57 +00:00
[flake8-simplify
] Fix incorrect fix for positive maxsplit
without separator (SIM905
) (#20056)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
<!-- Thank you for contributing to Ruff/ty! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? (Please prefix with `[ty]` for ty pull requests.) - Does this pull request include references to any relevant issues? --> ## Summary Resolves #20033 ## Test Plan unit tests added to the new split function, existing snapshot test updated. Co-authored-by: Brent Westbrook <brentrwestbrook@gmail.com>
This commit is contained in:
parent
706be0a6e7
commit
bc89d0394c
4 changed files with 141 additions and 51 deletions
|
@ -170,3 +170,4 @@ print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
|||
# leading/trailing whitespace should not count towards maxsplit
|
||||
" a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
" a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
"a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
|
|
@ -58,8 +58,8 @@ enum Method {
|
|||
}
|
||||
|
||||
impl Method {
|
||||
fn is_split(self) -> bool {
|
||||
matches!(self, Method::Split)
|
||||
fn is_rsplit(self) -> bool {
|
||||
matches!(self, Method::RSplit)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -201,65 +201,19 @@ fn split_default(
|
|||
method: Method,
|
||||
settings: &LinterSettings,
|
||||
) -> Option<Expr> {
|
||||
// From the Python documentation:
|
||||
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
|
||||
// > consecutive whitespace are regarded as a single separator, and the result will contain
|
||||
// > no empty strings at the start or end if the string has leading or trailing whitespace.
|
||||
// > Consequently, splitting an empty string or a string consisting of just whitespace with
|
||||
// > a None separator returns [].
|
||||
// https://docs.python.org/3/library/stdtypes.html#str.split
|
||||
let string_val = str_value.to_str();
|
||||
match max_split.cmp(&0) {
|
||||
Ordering::Greater => {
|
||||
if !is_maxsplit_without_separator_fix_enabled(settings) {
|
||||
return None;
|
||||
}
|
||||
Ordering::Greater if !is_maxsplit_without_separator_fix_enabled(settings) => None,
|
||||
Ordering::Greater | Ordering::Equal => {
|
||||
let Ok(max_split) = usize::try_from(max_split) else {
|
||||
return None;
|
||||
};
|
||||
let list_items: Vec<&str> = if method.is_split() {
|
||||
string_val
|
||||
.trim_start_matches(py_unicode_is_whitespace)
|
||||
.splitn(max_split + 1, py_unicode_is_whitespace)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect()
|
||||
} else {
|
||||
let mut items: Vec<&str> = string_val
|
||||
.trim_end_matches(py_unicode_is_whitespace)
|
||||
.rsplitn(max_split + 1, py_unicode_is_whitespace)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
items.reverse();
|
||||
items
|
||||
};
|
||||
let list_items = split_whitespace_with_maxsplit(string_val, max_split, method);
|
||||
Some(construct_replacement(
|
||||
&list_items,
|
||||
str_value.first_literal_flags(),
|
||||
))
|
||||
}
|
||||
Ordering::Equal => {
|
||||
// Behavior for maxsplit = 0 when sep is None:
|
||||
// - If the string is empty or all whitespace, result is [].
|
||||
// - Otherwise:
|
||||
// - " x ".split(maxsplit=0) -> ['x ']
|
||||
// - " x ".rsplit(maxsplit=0) -> [' x']
|
||||
// - "".split(maxsplit=0) -> []
|
||||
// - " ".split(maxsplit=0) -> []
|
||||
let processed_str = if method.is_split() {
|
||||
string_val.trim_start_matches(py_unicode_is_whitespace)
|
||||
} else {
|
||||
string_val.trim_end_matches(py_unicode_is_whitespace)
|
||||
};
|
||||
let list_items: &[_] = if processed_str.is_empty() {
|
||||
&[]
|
||||
} else {
|
||||
&[processed_str]
|
||||
};
|
||||
Some(construct_replacement(
|
||||
list_items,
|
||||
str_value.first_literal_flags(),
|
||||
))
|
||||
}
|
||||
Ordering::Less => {
|
||||
let list_items: Vec<&str> = string_val
|
||||
.split(py_unicode_is_whitespace)
|
||||
|
@ -367,3 +321,107 @@ const fn py_unicode_is_whitespace(ch: char) -> bool {
|
|||
| '\u{3000}'
|
||||
)
|
||||
}
|
||||
|
||||
struct WhitespaceMaxSplitIterator<'a> {
|
||||
remaining: &'a str,
|
||||
max_split: usize,
|
||||
splits: usize,
|
||||
method: Method,
|
||||
}
|
||||
|
||||
impl<'a> WhitespaceMaxSplitIterator<'a> {
|
||||
fn new(s: &'a str, max_split: usize, method: Method) -> Self {
|
||||
let remaining = match method {
|
||||
Method::Split => s.trim_start_matches(py_unicode_is_whitespace),
|
||||
Method::RSplit => s.trim_end_matches(py_unicode_is_whitespace),
|
||||
};
|
||||
|
||||
Self {
|
||||
remaining,
|
||||
max_split,
|
||||
splits: 0,
|
||||
method,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for WhitespaceMaxSplitIterator<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.remaining.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.splits >= self.max_split {
|
||||
let result = self.remaining;
|
||||
self.remaining = "";
|
||||
return Some(result);
|
||||
}
|
||||
|
||||
self.splits += 1;
|
||||
match self.method {
|
||||
Method::Split => match self.remaining.split_once(py_unicode_is_whitespace) {
|
||||
Some((s, remaining)) => {
|
||||
self.remaining = remaining.trim_start_matches(py_unicode_is_whitespace);
|
||||
Some(s)
|
||||
}
|
||||
None => Some(std::mem::take(&mut self.remaining)),
|
||||
},
|
||||
Method::RSplit => match self.remaining.rsplit_once(py_unicode_is_whitespace) {
|
||||
Some((remaining, s)) => {
|
||||
self.remaining = remaining.trim_end_matches(py_unicode_is_whitespace);
|
||||
Some(s)
|
||||
}
|
||||
None => Some(std::mem::take(&mut self.remaining)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// From the Python documentation:
|
||||
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
|
||||
// > consecutive whitespace are regarded as a single separator, and the result will contain
|
||||
// > no empty strings at the start or end if the string has leading or trailing whitespace.
|
||||
// > Consequently, splitting an empty string or a string consisting of just whitespace with
|
||||
// > a None separator returns [].
|
||||
// https://docs.python.org/3/library/stdtypes.html#str.split
|
||||
fn split_whitespace_with_maxsplit(s: &str, max_split: usize, method: Method) -> Vec<&str> {
|
||||
let mut result: Vec<_> = WhitespaceMaxSplitIterator::new(s, max_split, method).collect();
|
||||
if method.is_rsplit() {
|
||||
result.reverse();
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Method, split_whitespace_with_maxsplit};
|
||||
use test_case::test_case;
|
||||
|
||||
#[test_case(" ", 1, &[])]
|
||||
#[test_case("a b", 1, &["a", "b"])]
|
||||
#[test_case("a b", 2, &["a", "b"])]
|
||||
#[test_case(" a b c d ", 2, &["a", "b", "c d "])]
|
||||
#[test_case(" a b c ", 1, &["a", "b c "])]
|
||||
#[test_case(" x ", 0, &["x "])]
|
||||
#[test_case(" ", 0, &[])]
|
||||
#[test_case("a\u{3000}b", 1, &["a", "b"])]
|
||||
fn test_split_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
|
||||
let parts = split_whitespace_with_maxsplit(s, max_split, Method::Split);
|
||||
assert_eq!(parts, expected);
|
||||
}
|
||||
|
||||
#[test_case(" ", 1, &[])]
|
||||
#[test_case("a b", 1, &["a", "b"])]
|
||||
#[test_case("a b", 2, &["a", "b"])]
|
||||
#[test_case(" a b c d ", 2, &[" a b", "c", "d"])]
|
||||
#[test_case(" a b c ", 1, &[" a b", "c"])]
|
||||
#[test_case(" x ", 0, &[" x"])]
|
||||
#[test_case(" ", 0, &[])]
|
||||
#[test_case("a\u{3000}b", 1, &["a", "b"])]
|
||||
fn test_rsplit_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
|
||||
let parts = split_whitespace_with_maxsplit(s, max_split, Method::RSplit);
|
||||
assert_eq!(parts, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1372,6 +1372,7 @@ SIM905 Consider using a list literal instead of `str.split`
|
|||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
|
||||
help: Replace with list literal
|
||||
|
||||
|
@ -1382,5 +1383,16 @@ SIM905 Consider using a list literal instead of `str.rsplit`
|
|||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
|
||||
help: Replace with list literal
|
||||
|
||||
SIM905 Consider using a list literal instead of `str.split`
|
||||
--> SIM905.py:173:1
|
||||
|
|
||||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
||||
help: Replace with list literal
|
||||
|
|
|
@ -1420,6 +1420,7 @@ SIM905 [*] Consider using a list literal instead of `str.split`
|
|||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
|
||||
help: Replace with list literal
|
||||
168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||
|
@ -1428,6 +1429,7 @@ help: Replace with list literal
|
|||
- " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
171 + ["a", "b", "c d "] # ["a", "b", "c d "]
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
||||
SIM905 [*] Consider using a list literal instead of `str.rsplit`
|
||||
--> SIM905.py:172:1
|
||||
|
@ -1436,6 +1438,7 @@ SIM905 [*] Consider using a list literal instead of `str.rsplit`
|
|||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
|
||||
help: Replace with list literal
|
||||
169 |
|
||||
|
@ -1443,3 +1446,19 @@ help: Replace with list literal
|
|||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
- " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
172 + [" a b", "c", "d"] # [" a b", "c", "d"]
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
|
||||
SIM905 [*] Consider using a list literal instead of `str.split`
|
||||
--> SIM905.py:173:1
|
||||
|
|
||||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
173 | "a b".split(maxsplit=1) # ["a", "b"]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
||||
help: Replace with list literal
|
||||
170 | # leading/trailing whitespace should not count towards maxsplit
|
||||
171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "]
|
||||
172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"]
|
||||
- "a b".split(maxsplit=1) # ["a", "b"]
|
||||
173 + ["a", "b"] # ["a", "b"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue