diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index b50c8df5..f7b5f574 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -43,11 +43,13 @@ impl Dialect for MySqlDialect { // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. // Identifiers which begin with a digit are recognized while tokenizing numbers, // so they can be distinguished from exponent numeric literals. + // MySQL also implements non ascii utf-8 charecters ch.is_alphabetic() || ch == '_' || ch == '$' || ch == '@' || ('\u{0080}'..='\u{ffff}').contains(&ch) + || !ch.is_ascii() } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index c1f02557..9cea252c 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect { } fn is_identifier_start(&self, ch: char) -> bool { - // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS - // We don't yet support identifiers beginning with "letters with - // diacritical marks" - ch.is_alphabetic() || ch == '_' + ch.is_alphabetic() || ch == '_' || + // PostgreSQL implements Unicode characters in identifiers. + !ch.is_ascii() } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index c910e4c7..68e025d1 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect { } fn is_identifier_start(&self, ch: char) -> bool { - // Extends Postgres dialect with sharp and UTF-8 multibyte chars + // UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect. // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html - PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii() + PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' } fn is_identifier_part(&self, ch: char) -> bool { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ba72399f..e95c7e7b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() { let supported_dialects = TestedDialects::new(vec![ Box::new(GenericDialect {}), Box::new(DuckDbDialect {}), - Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), - Box::new(MySqlDialect {}), ]); assert!(supported_dialects .parse_sql_statements("SELECT πŸ’ FROM table1") @@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() { ]); let _ = dialects.verified_stmt(sql); } + +#[test] +fn test_identifier_unicode_start() { + let sql = r#"SELECT πŸ’phone AS πŸ’ FROM customers"#; + let dialects = TestedDialects::new(vec![ + Box::new(MySqlDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(PostgreSqlDialect {}), + ]); + let _ = dialects.verified_stmt(sql); +}