Add identifier start unicode support for Postegres, MySql and Redshift (#1944)

This commit is contained in:
etgarperets 2025-07-15 10:26:11 +03:00 committed by GitHub
parent c5e6ba5e7d
commit ecd5d88638
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 18 additions and 8 deletions

View file

@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
// Identifiers which begin with a digit are recognized while tokenizing numbers, // Identifiers which begin with a digit are recognized while tokenizing numbers,
// so they can be distinguished from exponent numeric literals. // so they can be distinguished from exponent numeric literals.
// MySQL also implements non ascii utf-8 charecters
ch.is_alphabetic() ch.is_alphabetic()
|| ch == '_' || ch == '_'
|| ch == '$' || ch == '$'
|| ch == '@' || ch == '@'
|| ('\u{0080}'..='\u{ffff}').contains(&ch) || ('\u{0080}'..='\u{ffff}').contains(&ch)
|| !ch.is_ascii()
} }
fn is_identifier_part(&self, ch: char) -> bool { fn is_identifier_part(&self, ch: char) -> bool {

View file

@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
} }
fn is_identifier_start(&self, ch: char) -> bool { fn is_identifier_start(&self, ch: char) -> bool {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS ch.is_alphabetic() || ch == '_' ||
// We don't yet support identifiers beginning with "letters with // PostgreSQL implements Unicode characters in identifiers.
// diacritical marks" !ch.is_ascii()
ch.is_alphabetic() || ch == '_'
} }
fn is_identifier_part(&self, ch: char) -> bool { fn is_identifier_part(&self, ch: char) -> bool {

View file

@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
} }
fn is_identifier_start(&self, ch: char) -> bool { fn is_identifier_start(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp and UTF-8 multibyte chars // UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii() PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
} }
fn is_identifier_part(&self, ch: char) -> bool { fn is_identifier_part(&self, ch: char) -> bool {

View file

@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
let supported_dialects = TestedDialects::new(vec![ let supported_dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}), Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}), Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}), Box::new(MsSqlDialect {}),
Box::new(MySqlDialect {}),
]); ]);
assert!(supported_dialects assert!(supported_dialects
.parse_sql_statements("SELECT 💝 FROM table1") .parse_sql_statements("SELECT 💝 FROM table1")
@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
]); ]);
let _ = dialects.verified_stmt(sql); let _ = dialects.verified_stmt(sql);
} }
#[test]
fn test_identifier_unicode_start() {
let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
let dialects = TestedDialects::new(vec![
Box::new(MySqlDialect {}),
Box::new(RedshiftSqlDialect {}),
Box::new(PostgreSqlDialect {}),
]);
let _ = dialects.verified_stmt(sql);
}