Add identifier unicode support in Mysql, Postgres and Redshift (#1933)

2025-08-31 11:17:23 +00:00 · 2025-07-14 11:24:13 +03:00 · 2025-07-14 11:24:13 +03:00 · c5e6ba5e7d
commit c5e6ba5e7d
parent 9b9ffe450c
4 changed files with 19 additions and 4 deletions
--- a/src/dialect/mysql.rs
+++ b/src/dialect/mysql.rs
@ -51,7 +51,9 @@ impl Dialect for MySqlDialect {
    }
    fn is_identifier_part(&self, ch: char) -> bool {
-        self.is_identifier_start(ch) || ch.is_ascii_digit()
+        self.is_identifier_start(ch) || ch.is_ascii_digit() ||
        // MySQL implements Unicode characters in identifiers.
        !ch.is_ascii()
    }
    fn is_delimited_identifier_start(&self, ch: char) -> bool {
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@ -72,7 +72,9 @@ impl Dialect for PostgreSqlDialect {
    }
    fn is_identifier_part(&self, ch: char) -> bool {
-        ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
+        ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'  ||
        // PostgreSQL implements Unicode characters in identifiers.
        !ch.is_ascii()
    }
    fn supports_unicode_string_literal(&self) -> bool {
--- a/src/dialect/redshift.rs
+++ b/src/dialect/redshift.rs
@ -86,9 +86,9 @@ impl Dialect for RedshiftSqlDialect {
    }
    fn is_identifier_part(&self, ch: char) -> bool {
-        // Extends Postgres dialect with sharp and UTF-8 multibyte chars
+        // UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
        // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
-        PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' || !ch.is_ascii()
+        PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#'
    }
    /// redshift has `CONVERT(type, value)` instead of `CONVERT(value, type)`
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@ -16136,3 +16136,14 @@ SELECT * FROM tbl2
    assert_eq!(stmts.len(), 2);
    assert!(stmts.iter().all(|s| matches!(s, Statement::Query { .. })));
 }
 #[test]
 fn test_identifier_unicode_support() {
    let sql = r#"SELECT phoneǤЖשचᎯ⻩☯♜🦄⚛🀄ᚠ⌛🌀 AS tbl FROM customers"#;
    let dialects = TestedDialects::new(vec![
        Box::new(MySqlDialect {}),
        Box::new(RedshiftSqlDialect {}),
        Box::new(PostgreSqlDialect {}),
    ]);
    let _ = dialects.verified_stmt(sql);
 }