mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-01 09:22:19 +00:00
Update identifier Unicode character validation to match Python spec (#7209)
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
fda48afc23
commit
041cdb95e0
6 changed files with 45 additions and 5 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -2467,6 +2467,9 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "ruff_python_stdlib"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"unic-ucd-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruff_python_trivia"
|
||||
|
|
|
@ -49,6 +49,7 @@ toml = { version = "0.7.2" }
|
|||
tracing = "0.1.37"
|
||||
tracing-indicatif = "0.3.4"
|
||||
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
|
||||
unic-ucd-ident = "0.9.0"
|
||||
unicode-width = "0.1.10"
|
||||
uuid = { version = "1.4.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
|
||||
wsl = { version = "0.1.0" }
|
||||
|
|
|
@ -24,7 +24,7 @@ lalrpop-util = { version = "0.20.0", default-features = false }
|
|||
num-bigint = { workspace = true }
|
||||
num-traits = { workspace = true }
|
||||
unic-emoji-char = "0.9.0"
|
||||
unic-ucd-ident = "0.9.0"
|
||||
unic-ucd-ident = { workspace = true }
|
||||
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
|
||||
rustc-hash = { workspace = true }
|
||||
static_assertions = "1.1.0"
|
||||
|
|
|
@ -13,3 +13,4 @@ license = { workspace = true }
|
|||
[lib]
|
||||
|
||||
[dependencies]
|
||||
unic-ucd-ident = { workspace = true }
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
||||
|
||||
use crate::keyword::is_keyword;
|
||||
|
||||
/// Returns `true` if a string is a valid Python identifier (e.g., variable
|
||||
|
@ -5,12 +7,12 @@ use crate::keyword::is_keyword;
|
|||
pub fn is_identifier(name: &str) -> bool {
|
||||
// Is the first character a letter or underscore?
|
||||
let mut chars = name.chars();
|
||||
if !chars.next().is_some_and(|c| c.is_alphabetic() || c == '_') {
|
||||
if !chars.next().is_some_and(is_identifier_start) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Are the rest of the characters letters, digits, or underscores?
|
||||
if !chars.all(|c| c.is_alphanumeric() || c == '_') {
|
||||
if !chars.all(is_identifier_continuation) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -22,6 +24,21 @@ pub fn is_identifier(name: &str) -> bool {
|
|||
true
|
||||
}
|
||||
|
||||
// Checks if the character c is a valid starting character as described
|
||||
// in https://docs.python.org/3/reference/lexical_analysis.html#identifiers
|
||||
fn is_identifier_start(c: char) -> bool {
|
||||
matches!(c, 'a'..='z' | 'A'..='Z' | '_') || is_xid_start(c)
|
||||
}
|
||||
|
||||
// Checks if the character c is a valid continuation character as described
|
||||
// in https://docs.python.org/3/reference/lexical_analysis.html#identifiers
|
||||
fn is_identifier_continuation(c: char) -> bool {
|
||||
match c {
|
||||
'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => true,
|
||||
c => is_xid_continue(c),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if a string is a private identifier, such that, when the
|
||||
/// identifier is defined in a class definition, it will be mangled prior to
|
||||
/// code generation.
|
||||
|
@ -76,7 +93,25 @@ pub fn is_migration_name(name: &str) -> bool {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::identifiers::{is_migration_name, is_module_name};
|
||||
use crate::identifiers::{is_identifier, is_migration_name, is_module_name};
|
||||
|
||||
#[test]
|
||||
fn valid_identifiers() {
|
||||
assert!(is_identifier("_abc"));
|
||||
assert!(is_identifier("abc"));
|
||||
assert!(is_identifier("_"));
|
||||
assert!(is_identifier("a_b_c"));
|
||||
assert!(is_identifier("abc123"));
|
||||
assert!(is_identifier("abc_123"));
|
||||
assert!(is_identifier("漢字"));
|
||||
assert!(is_identifier("ひらがな"));
|
||||
assert!(is_identifier("العربية"));
|
||||
assert!(is_identifier("кириллица"));
|
||||
assert!(is_identifier("πr"));
|
||||
assert!(!is_identifier(""));
|
||||
assert!(!is_identifier("percentile_co³t"));
|
||||
assert!(!is_identifier("HelloWorld❤️"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_name() {
|
||||
|
|
|
@ -18,7 +18,7 @@ ruff_source_file = { path = "../ruff_source_file" }
|
|||
|
||||
memchr = { workspace = true }
|
||||
smallvec = { workspace = true }
|
||||
unic-ucd-ident = "0.9.0"
|
||||
unic-ucd-ident = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = { workspace = true }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue