From c5e6ba5e7d746161a59d6fd5e09846262a2fc01e Mon Sep 17 00:00:00 2001 From: etgarperets Date: Mon, 14 Jul 2025 11:24:13 +0300 Subject: [PATCH] Add identifier unicode support in Mysql, Postgres and Redshift (#1933) --- src/dialect/mysql.rs | 4 +++- src/dialect/postgresql.rs | 4 +++- src/dialect/redshift.rs | 4 ++-- tests/sqlparser_common.rs | 11 +++++++++++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index f69e4243..b50c8df5 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -51,7 +51,9 @@ impl Dialect for MySqlDialect { } fn is_identifier_part(&self, ch: char) -> bool { - self.is_identifier_start(ch) || ch.is_ascii_digit() + self.is_identifier_start(ch) || ch.is_ascii_digit() || + // MySQL implements Unicode characters in identifiers. + !ch.is_ascii() } fn is_delimited_identifier_start(&self, ch: char) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index b2d4014c..c1f02557 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -72,7 +72,9 @@ impl Dialect for PostgreSqlDialect { } fn is_identifier_part(&self, ch: char) -> bool { - ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' + ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' || + // PostgreSQL implements Unicode characters in identifiers. + !ch.is_ascii() } fn supports_unicode_string_literal(&self) -> bool { diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 8ffed98a..c910e4c7 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -86,9 +86,9 @@ impl Dialect for RedshiftSqlDialect { } fn is_identifier_part(&self, ch: char) -> bool { - // Extends Postgres dialect with sharp and UTF-8 multibyte chars + // UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect. // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html - PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' || !ch.is_ascii() + PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' } /// redshift has `CONVERT(type, value)` instead of `CONVERT(value, type)` diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2d2008c8..ba72399f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16136,3 +16136,14 @@ SELECT * FROM tbl2 assert_eq!(stmts.len(), 2); assert!(stmts.iter().all(|s| matches!(s, Statement::Query { .. }))); } + +#[test] +fn test_identifier_unicode_support() { + let sql = r#"SELECT phoneǤЖשचᎯ⻩☯♜🦄⚛🀄ᚠ⌛🌀 AS tbl FROM customers"#; + let dialects = TestedDialects::new(vec![ + Box::new(MySqlDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(PostgreSqlDialect {}), + ]); + let _ = dialects.verified_stmt(sql); +}