mirror of
				https://github.com/astral-sh/ruff.git
				synced 2025-11-03 21:24:29 +00:00 
			
		
		
		
	Update identifier Unicode character validation to match Python spec (#7209)
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
		
							parent
							
								
									fda48afc23
								
							
						
					
					
						commit
						041cdb95e0
					
				
					 6 changed files with 45 additions and 5 deletions
				
			
		
							
								
								
									
										3
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										3
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							| 
						 | 
					@ -2467,6 +2467,9 @@ dependencies = [
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "ruff_python_stdlib"
 | 
					name = "ruff_python_stdlib"
 | 
				
			||||||
version = "0.0.0"
 | 
					version = "0.0.0"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "unic-ucd-ident",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "ruff_python_trivia"
 | 
					name = "ruff_python_trivia"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,6 +49,7 @@ toml = { version = "0.7.2" }
 | 
				
			||||||
tracing = "0.1.37"
 | 
					tracing = "0.1.37"
 | 
				
			||||||
tracing-indicatif = "0.3.4"
 | 
					tracing-indicatif = "0.3.4"
 | 
				
			||||||
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
 | 
					tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
 | 
				
			||||||
 | 
					unic-ucd-ident = "0.9.0"
 | 
				
			||||||
unicode-width = "0.1.10"
 | 
					unicode-width = "0.1.10"
 | 
				
			||||||
uuid = { version = "1.4.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
 | 
					uuid = { version = "1.4.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
 | 
				
			||||||
wsl = { version = "0.1.0" }
 | 
					wsl = { version = "0.1.0" }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -24,7 +24,7 @@ lalrpop-util = { version = "0.20.0", default-features = false }
 | 
				
			||||||
num-bigint = { workspace = true }
 | 
					num-bigint = { workspace = true }
 | 
				
			||||||
num-traits = { workspace = true }
 | 
					num-traits = { workspace = true }
 | 
				
			||||||
unic-emoji-char = "0.9.0"
 | 
					unic-emoji-char = "0.9.0"
 | 
				
			||||||
unic-ucd-ident = "0.9.0"
 | 
					unic-ucd-ident = {  workspace = true }
 | 
				
			||||||
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
 | 
					unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
 | 
				
			||||||
rustc-hash = { workspace = true }
 | 
					rustc-hash = { workspace = true }
 | 
				
			||||||
static_assertions = "1.1.0"
 | 
					static_assertions = "1.1.0"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,3 +13,4 @@ license = { workspace = true }
 | 
				
			||||||
[lib]
 | 
					[lib]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
 | 
					unic-ucd-ident = { workspace = true }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,3 +1,5 @@
 | 
				
			||||||
 | 
					use unic_ucd_ident::{is_xid_continue, is_xid_start};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::keyword::is_keyword;
 | 
					use crate::keyword::is_keyword;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Returns `true` if a string is a valid Python identifier (e.g., variable
 | 
					/// Returns `true` if a string is a valid Python identifier (e.g., variable
 | 
				
			||||||
| 
						 | 
					@ -5,12 +7,12 @@ use crate::keyword::is_keyword;
 | 
				
			||||||
pub fn is_identifier(name: &str) -> bool {
 | 
					pub fn is_identifier(name: &str) -> bool {
 | 
				
			||||||
    // Is the first character a letter or underscore?
 | 
					    // Is the first character a letter or underscore?
 | 
				
			||||||
    let mut chars = name.chars();
 | 
					    let mut chars = name.chars();
 | 
				
			||||||
    if !chars.next().is_some_and(|c| c.is_alphabetic() || c == '_') {
 | 
					    if !chars.next().is_some_and(is_identifier_start) {
 | 
				
			||||||
        return false;
 | 
					        return false;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Are the rest of the characters letters, digits, or underscores?
 | 
					    // Are the rest of the characters letters, digits, or underscores?
 | 
				
			||||||
    if !chars.all(|c| c.is_alphanumeric() || c == '_') {
 | 
					    if !chars.all(is_identifier_continuation) {
 | 
				
			||||||
        return false;
 | 
					        return false;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -22,6 +24,21 @@ pub fn is_identifier(name: &str) -> bool {
 | 
				
			||||||
    true
 | 
					    true
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Checks if the character c is a valid starting character as described
 | 
				
			||||||
 | 
					// in https://docs.python.org/3/reference/lexical_analysis.html#identifiers
 | 
				
			||||||
 | 
					fn is_identifier_start(c: char) -> bool {
 | 
				
			||||||
 | 
					    matches!(c, 'a'..='z' | 'A'..='Z' | '_') || is_xid_start(c)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Checks if the character c is a valid continuation character as described
 | 
				
			||||||
 | 
					// in https://docs.python.org/3/reference/lexical_analysis.html#identifiers
 | 
				
			||||||
 | 
					fn is_identifier_continuation(c: char) -> bool {
 | 
				
			||||||
 | 
					    match c {
 | 
				
			||||||
 | 
					        'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => true,
 | 
				
			||||||
 | 
					        c => is_xid_continue(c),
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Returns `true` if a string is a private identifier, such that, when the
 | 
					/// Returns `true` if a string is a private identifier, such that, when the
 | 
				
			||||||
/// identifier is defined in a class definition, it will be mangled prior to
 | 
					/// identifier is defined in a class definition, it will be mangled prior to
 | 
				
			||||||
/// code generation.
 | 
					/// code generation.
 | 
				
			||||||
| 
						 | 
					@ -76,7 +93,25 @@ pub fn is_migration_name(name: &str) -> bool {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[cfg(test)]
 | 
					#[cfg(test)]
 | 
				
			||||||
mod tests {
 | 
					mod tests {
 | 
				
			||||||
    use crate::identifiers::{is_migration_name, is_module_name};
 | 
					    use crate::identifiers::{is_identifier, is_migration_name, is_module_name};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[test]
 | 
				
			||||||
 | 
					    fn valid_identifiers() {
 | 
				
			||||||
 | 
					        assert!(is_identifier("_abc"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("abc"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("_"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("a_b_c"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("abc123"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("abc_123"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("漢字"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("ひらがな"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("العربية"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("кириллица"));
 | 
				
			||||||
 | 
					        assert!(is_identifier("πr"));
 | 
				
			||||||
 | 
					        assert!(!is_identifier(""));
 | 
				
			||||||
 | 
					        assert!(!is_identifier("percentile_co³t"));
 | 
				
			||||||
 | 
					        assert!(!is_identifier("HelloWorld❤️"));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #[test]
 | 
					    #[test]
 | 
				
			||||||
    fn module_name() {
 | 
					    fn module_name() {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,7 +18,7 @@ ruff_source_file = { path = "../ruff_source_file" }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
memchr = { workspace = true }
 | 
					memchr = { workspace = true }
 | 
				
			||||||
smallvec = { workspace = true }
 | 
					smallvec = { workspace = true }
 | 
				
			||||||
unic-ucd-ident = "0.9.0"
 | 
					unic-ucd-ident = { workspace = true }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dev-dependencies]
 | 
					[dev-dependencies]
 | 
				
			||||||
insta = { workspace = true }
 | 
					insta = { workspace = true }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue