Apply NFKC normalization to unicode identifiers in the lexer (#10412)

This commit is contained in:
Alex Waygood 2024-03-18 11:56:56 +00:00 committed by GitHub
parent bb540718c2
commit 92e6026446
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 68 additions and 15 deletions

View file

@ -16,6 +16,9 @@ pub enum Tok {
/// Token value for a name, commonly known as an identifier.
Name {
/// The name value.
///
/// Unicode names are NFKC-normalized by the lexer,
/// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
name: Box<str>,
},
/// Token value for an integer.