diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py index 6b954b530b..1545b07ca8 100644 --- a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py +++ b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py @@ -170,3 +170,4 @@ print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0)) # leading/trailing whitespace should not count towards maxsplit " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] +"a b".split(maxsplit=1) # ["a", "b"] diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs index 0c08e53819..05e5eddf4f 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs @@ -58,8 +58,8 @@ enum Method { } impl Method { - fn is_split(self) -> bool { - matches!(self, Method::Split) + fn is_rsplit(self) -> bool { + matches!(self, Method::RSplit) } } @@ -201,65 +201,19 @@ fn split_default( method: Method, settings: &LinterSettings, ) -> Option { - // From the Python documentation: - // > If sep is not specified or is None, a different splitting algorithm is applied: runs of - // > consecutive whitespace are regarded as a single separator, and the result will contain - // > no empty strings at the start or end if the string has leading or trailing whitespace. - // > Consequently, splitting an empty string or a string consisting of just whitespace with - // > a None separator returns []. - // https://docs.python.org/3/library/stdtypes.html#str.split let string_val = str_value.to_str(); match max_split.cmp(&0) { - Ordering::Greater => { - if !is_maxsplit_without_separator_fix_enabled(settings) { - return None; - } + Ordering::Greater if !is_maxsplit_without_separator_fix_enabled(settings) => None, + Ordering::Greater | Ordering::Equal => { let Ok(max_split) = usize::try_from(max_split) else { return None; }; - let list_items: Vec<&str> = if method.is_split() { - string_val - .trim_start_matches(py_unicode_is_whitespace) - .splitn(max_split + 1, py_unicode_is_whitespace) - .filter(|s| !s.is_empty()) - .collect() - } else { - let mut items: Vec<&str> = string_val - .trim_end_matches(py_unicode_is_whitespace) - .rsplitn(max_split + 1, py_unicode_is_whitespace) - .filter(|s| !s.is_empty()) - .collect(); - items.reverse(); - items - }; + let list_items = split_whitespace_with_maxsplit(string_val, max_split, method); Some(construct_replacement( &list_items, str_value.first_literal_flags(), )) } - Ordering::Equal => { - // Behavior for maxsplit = 0 when sep is None: - // - If the string is empty or all whitespace, result is []. - // - Otherwise: - // - " x ".split(maxsplit=0) -> ['x '] - // - " x ".rsplit(maxsplit=0) -> [' x'] - // - "".split(maxsplit=0) -> [] - // - " ".split(maxsplit=0) -> [] - let processed_str = if method.is_split() { - string_val.trim_start_matches(py_unicode_is_whitespace) - } else { - string_val.trim_end_matches(py_unicode_is_whitespace) - }; - let list_items: &[_] = if processed_str.is_empty() { - &[] - } else { - &[processed_str] - }; - Some(construct_replacement( - list_items, - str_value.first_literal_flags(), - )) - } Ordering::Less => { let list_items: Vec<&str> = string_val .split(py_unicode_is_whitespace) @@ -367,3 +321,107 @@ const fn py_unicode_is_whitespace(ch: char) -> bool { | '\u{3000}' ) } + +struct WhitespaceMaxSplitIterator<'a> { + remaining: &'a str, + max_split: usize, + splits: usize, + method: Method, +} + +impl<'a> WhitespaceMaxSplitIterator<'a> { + fn new(s: &'a str, max_split: usize, method: Method) -> Self { + let remaining = match method { + Method::Split => s.trim_start_matches(py_unicode_is_whitespace), + Method::RSplit => s.trim_end_matches(py_unicode_is_whitespace), + }; + + Self { + remaining, + max_split, + splits: 0, + method, + } + } +} + +impl<'a> Iterator for WhitespaceMaxSplitIterator<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + if self.remaining.is_empty() { + return None; + } + + if self.splits >= self.max_split { + let result = self.remaining; + self.remaining = ""; + return Some(result); + } + + self.splits += 1; + match self.method { + Method::Split => match self.remaining.split_once(py_unicode_is_whitespace) { + Some((s, remaining)) => { + self.remaining = remaining.trim_start_matches(py_unicode_is_whitespace); + Some(s) + } + None => Some(std::mem::take(&mut self.remaining)), + }, + Method::RSplit => match self.remaining.rsplit_once(py_unicode_is_whitespace) { + Some((remaining, s)) => { + self.remaining = remaining.trim_end_matches(py_unicode_is_whitespace); + Some(s) + } + None => Some(std::mem::take(&mut self.remaining)), + }, + } + } +} + +// From the Python documentation: +// > If sep is not specified or is None, a different splitting algorithm is applied: runs of +// > consecutive whitespace are regarded as a single separator, and the result will contain +// > no empty strings at the start or end if the string has leading or trailing whitespace. +// > Consequently, splitting an empty string or a string consisting of just whitespace with +// > a None separator returns []. +// https://docs.python.org/3/library/stdtypes.html#str.split +fn split_whitespace_with_maxsplit(s: &str, max_split: usize, method: Method) -> Vec<&str> { + let mut result: Vec<_> = WhitespaceMaxSplitIterator::new(s, max_split, method).collect(); + if method.is_rsplit() { + result.reverse(); + } + result +} + +#[cfg(test)] +mod tests { + use super::{Method, split_whitespace_with_maxsplit}; + use test_case::test_case; + + #[test_case(" ", 1, &[])] + #[test_case("a b", 1, &["a", "b"])] + #[test_case("a b", 2, &["a", "b"])] + #[test_case(" a b c d ", 2, &["a", "b", "c d "])] + #[test_case(" a b c ", 1, &["a", "b c "])] + #[test_case(" x ", 0, &["x "])] + #[test_case(" ", 0, &[])] + #[test_case("a\u{3000}b", 1, &["a", "b"])] + fn test_split_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) { + let parts = split_whitespace_with_maxsplit(s, max_split, Method::Split); + assert_eq!(parts, expected); + } + + #[test_case(" ", 1, &[])] + #[test_case("a b", 1, &["a", "b"])] + #[test_case("a b", 2, &["a", "b"])] + #[test_case(" a b c d ", 2, &[" a b", "c", "d"])] + #[test_case(" a b c ", 1, &[" a b", "c"])] + #[test_case(" x ", 0, &[" x"])] + #[test_case(" ", 0, &[])] + #[test_case("a\u{3000}b", 1, &["a", "b"])] + fn test_rsplit_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) { + let parts = split_whitespace_with_maxsplit(s, max_split, Method::RSplit); + assert_eq!(parts, expected); + } +} diff --git a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap index 8815373ad3..2cadf0ceb9 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap +++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap @@ -1372,6 +1372,7 @@ SIM905 Consider using a list literal instead of `str.split` 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] +173 | "a b".split(maxsplit=1) # ["a", "b"] | help: Replace with list literal @@ -1382,5 +1383,16 @@ SIM905 Consider using a list literal instead of `str.rsplit` 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +173 | "a b".split(maxsplit=1) # ["a", "b"] + | +help: Replace with list literal + +SIM905 Consider using a list literal instead of `str.split` + --> SIM905.py:173:1 + | +171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] +172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] +173 | "a b".split(maxsplit=1) # ["a", "b"] + | ^^^^^^^^^^^^^^^^^^^^^^^^ | help: Replace with list literal diff --git a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM905_SIM905.py.snap b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM905_SIM905.py.snap index f0d4d20a08..d477367205 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM905_SIM905.py.snap +++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM905_SIM905.py.snap @@ -1420,6 +1420,7 @@ SIM905 [*] Consider using a list literal instead of `str.split` 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] +173 | "a b".split(maxsplit=1) # ["a", "b"] | help: Replace with list literal 168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0)) @@ -1428,6 +1429,7 @@ help: Replace with list literal - " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 171 + ["a", "b", "c d "] # ["a", "b", "c d "] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] +173 | "a b".split(maxsplit=1) # ["a", "b"] SIM905 [*] Consider using a list literal instead of `str.rsplit` --> SIM905.py:172:1 @@ -1436,6 +1438,7 @@ SIM905 [*] Consider using a list literal instead of `str.rsplit` 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] 172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +173 | "a b".split(maxsplit=1) # ["a", "b"] | help: Replace with list literal 169 | @@ -1443,3 +1446,19 @@ help: Replace with list literal 171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] - " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] 172 + [" a b", "c", "d"] # [" a b", "c", "d"] +173 | "a b".split(maxsplit=1) # ["a", "b"] + +SIM905 [*] Consider using a list literal instead of `str.split` + --> SIM905.py:173:1 + | +171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] +172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] +173 | "a b".split(maxsplit=1) # ["a", "b"] + | ^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: Replace with list literal +170 | # leading/trailing whitespace should not count towards maxsplit +171 | " a b c d ".split(maxsplit=2) # ["a", "b", "c d "] +172 | " a b c d ".rsplit(maxsplit=2) # [" a b", "c", "d"] + - "a b".split(maxsplit=1) # ["a", "b"] +173 + ["a", "b"] # ["a", "b"]