Add identifier start unicode support for Postegres, MySql and Redshift (#1944)

This commit is contained in:
etgarperets 2025-07-15 10:26:11 +03:00 committed by GitHub
parent c5e6ba5e7d
commit ecd5d88638
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 18 additions and 8 deletions

View file

@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
// Identifiers which begin with a digit are recognized while tokenizing numbers,
// so they can be distinguished from exponent numeric literals.
// MySQL also implements non ascii utf-8 charecters
ch.is_alphabetic()
|| ch == '_'
|| ch == '$'
|| ch == '@'
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
|| !ch.is_ascii()
}
fn is_identifier_part(&self, ch: char) -> bool {

View file

@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
}
fn is_identifier_start(&self, ch: char) -> bool {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
// We don't yet support identifiers beginning with "letters with
// diacritical marks"
ch.is_alphabetic() || ch == '_'
ch.is_alphabetic() || ch == '_' ||
// PostgreSQL implements Unicode characters in identifiers.
!ch.is_ascii()
}
fn is_identifier_part(&self, ch: char) -> bool {

View file

@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
}
fn is_identifier_start(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
// UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii()
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
}
fn is_identifier_part(&self, ch: char) -> bool {

View file

@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
let supported_dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(MySqlDialect {}),
]);
assert!(supported_dialects
.parse_sql_statements("SELECT 💝 FROM table1")
@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
]);
let _ = dialects.verified_stmt(sql);
}
#[test]
fn test_identifier_unicode_start() {
let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
let dialects = TestedDialects::new(vec![
Box::new(MySqlDialect {}),
Box::new(RedshiftSqlDialect {}),
Box::new(PostgreSqlDialect {}),
]);
let _ = dialects.verified_stmt(sql);
}