mirror of
https://github.com/astral-sh/ruff.git
synced 2025-11-03 05:13:00 +00:00
SIM905: Fix handling of U+001C..U+001F whitespace (#19849)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / mkdocs (push) Waiting to run
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / mkdocs (push) Waiting to run
CI / ecosystem (push) Blocked by required conditions
CI / Fuzz for new ty panics (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / check playground (push) Blocked by required conditions
CI / benchmarks-instrumented (push) Blocked by required conditions
CI / benchmarks-walltime (push) Blocked by required conditions
Fixes #19845 ## Summary The linked issue explains it well, Rust and Python do not agree on what whitespace is for the purposes of `str.split`.
This commit is contained in:
parent
4d8ccb6125
commit
b8a9b1994b
3 changed files with 103 additions and 3 deletions
|
|
@ -161,3 +161,8 @@ r"""first
|
||||||
'no need' to escape
|
'no need' to escape
|
||||||
"swap" quote style
|
"swap" quote style
|
||||||
"use' ugly triple quotes""".split("\n")
|
"use' ugly triple quotes""".split("\n")
|
||||||
|
|
||||||
|
# https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
|
|
||||||
|
|
@ -199,9 +199,9 @@ fn split_default(
|
||||||
// - "".split(maxsplit=0) -> []
|
// - "".split(maxsplit=0) -> []
|
||||||
// - " ".split(maxsplit=0) -> []
|
// - " ".split(maxsplit=0) -> []
|
||||||
let processed_str = if direction == Direction::Left {
|
let processed_str = if direction == Direction::Left {
|
||||||
string_val.trim_start()
|
string_val.trim_start_matches(py_unicode_is_whitespace)
|
||||||
} else {
|
} else {
|
||||||
string_val.trim_end()
|
string_val.trim_end_matches(py_unicode_is_whitespace)
|
||||||
};
|
};
|
||||||
let list_items: &[_] = if processed_str.is_empty() {
|
let list_items: &[_] = if processed_str.is_empty() {
|
||||||
&[]
|
&[]
|
||||||
|
|
@ -214,7 +214,10 @@ fn split_default(
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
Ordering::Less => {
|
Ordering::Less => {
|
||||||
let list_items: Vec<&str> = string_val.split_whitespace().collect();
|
let list_items: Vec<&str> = string_val
|
||||||
|
.split(py_unicode_is_whitespace)
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.collect();
|
||||||
Some(construct_replacement(
|
Some(construct_replacement(
|
||||||
&list_items,
|
&list_items,
|
||||||
str_value.first_literal_flags(),
|
str_value.first_literal_flags(),
|
||||||
|
|
@ -292,3 +295,34 @@ enum Direction {
|
||||||
Left,
|
Left,
|
||||||
Right,
|
Right,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Like [`char::is_whitespace`] but with Python's notion of whitespace.
|
||||||
|
///
|
||||||
|
/// <https://github.com/astral-sh/ruff/issues/19845>
|
||||||
|
/// <https://github.com/python/cpython/blob/v3.14.0rc1/Objects/unicodetype_db.h#L6673-L6711>
|
||||||
|
#[rustfmt::skip]
|
||||||
|
#[inline]
|
||||||
|
const fn py_unicode_is_whitespace(ch: char) -> bool {
|
||||||
|
matches!(
|
||||||
|
ch,
|
||||||
|
| '\u{0009}'
|
||||||
|
| '\u{000A}'
|
||||||
|
| '\u{000B}'
|
||||||
|
| '\u{000C}'
|
||||||
|
| '\u{000D}'
|
||||||
|
| '\u{001C}'
|
||||||
|
| '\u{001D}'
|
||||||
|
| '\u{001E}'
|
||||||
|
| '\u{001F}'
|
||||||
|
| '\u{0020}'
|
||||||
|
| '\u{0085}'
|
||||||
|
| '\u{00A0}'
|
||||||
|
| '\u{1680}'
|
||||||
|
| '\u{2000}'..='\u{200A}'
|
||||||
|
| '\u{2028}'
|
||||||
|
| '\u{2029}'
|
||||||
|
| '\u{202F}'
|
||||||
|
| '\u{205F}'
|
||||||
|
| '\u{3000}'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1402,6 +1402,8 @@ SIM905 [*] Consider using a list literal instead of `str.split`
|
||||||
162 | | "swap" quote style
|
162 | | "swap" quote style
|
||||||
163 | | "use' ugly triple quotes""".split("\n")
|
163 | | "use' ugly triple quotes""".split("\n")
|
||||||
| |_______________________________________^
|
| |_______________________________________^
|
||||||
|
164 |
|
||||||
|
165 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
|
|
|
||||||
help: Replace with list literal
|
help: Replace with list literal
|
||||||
|
|
||||||
|
|
@ -1414,3 +1416,62 @@ help: Replace with list literal
|
||||||
162 |-"swap" quote style
|
162 |-"swap" quote style
|
||||||
163 |-"use' ugly triple quotes""".split("\n")
|
163 |-"use' ugly triple quotes""".split("\n")
|
||||||
160 |+[r"first", r"'no need' to escape", r'"swap" quote style', r""""use' ugly triple quotes"""]
|
160 |+[r"first", r"'no need' to escape", r'"swap" quote style', r""""use' ugly triple quotes"""]
|
||||||
|
164 161 |
|
||||||
|
165 162 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
166 163 | print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
|
||||||
|
SIM905 [*] Consider using a list literal instead of `str.split`
|
||||||
|
--> SIM905.py:166:7
|
||||||
|
|
|
||||||
|
165 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
166 | print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
167 | print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
|
|
||||||
|
help: Replace with list literal
|
||||||
|
|
||||||
|
ℹ Safe fix
|
||||||
|
163 163 | "use' ugly triple quotes""".split("\n")
|
||||||
|
164 164 |
|
||||||
|
165 165 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
166 |-print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
166 |+print(["S", "P", "L", "I", "T"])
|
||||||
|
167 167 | print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
168 168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
|
||||||
|
SIM905 [*] Consider using a list literal instead of `str.split`
|
||||||
|
--> SIM905.py:167:7
|
||||||
|
|
|
||||||
|
165 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
166 | print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
167 | print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
|
|
||||||
|
help: Replace with list literal
|
||||||
|
|
||||||
|
ℹ Safe fix
|
||||||
|
164 164 |
|
||||||
|
165 165 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
166 166 | print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
167 |-print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
167 |+print([">"])
|
||||||
|
168 168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
|
||||||
|
SIM905 [*] Consider using a list literal instead of `str.split`
|
||||||
|
--> SIM905.py:168:7
|
||||||
|
|
|
||||||
|
166 | print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
167 | print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
|
||||||
|
help: Replace with list literal
|
||||||
|
|
||||||
|
ℹ Safe fix
|
||||||
|
165 165 | # https://github.com/astral-sh/ruff/issues/19845
|
||||||
|
166 166 | print("S\x1cP\x1dL\x1eI\x1fT".split())
|
||||||
|
167 167 | print("\x1c\x1d\x1e\x1f>".split(maxsplit=0))
|
||||||
|
168 |-print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
|
||||||
|
168 |+print(["<"])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue