From f90ba8bdb76d7d62c5c3a60a13f2d9a81321c12d Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Mon, 30 Jun 2025 20:47:20 +0200 Subject: [PATCH] Redshift support for multibyte UTF-8 identifiers --- src/dialect/redshift.rs | 10 ++++++---- tests/sqlparser_common.rs | 9 ++++++++- tests/sqlparser_redshift.rs | 5 +++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index feccca5d..5d41ebcb 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -80,13 +80,15 @@ impl Dialect for RedshiftSqlDialect { } fn is_identifier_start(&self, ch: char) -> bool { - // Extends Postgres dialect with sharp - PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' + // Extends Postgres dialect with sharp and UTF-8 multibyte chars + // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html + PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || ch as u32 > 0x7F } fn is_identifier_part(&self, ch: char) -> bool { - // Extends Postgres dialect with sharp - PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' + // Extends Postgres dialect with sharp and UTF-8 multibyte chars + // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html + PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' || ch as u32 > 0x7F } /// redshift has `CONVERT(type, value)` instead of `CONVERT(value, type)` diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f6f51459..36d45ada 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11068,10 +11068,17 @@ fn parse_non_latin_identifiers() { Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), ]); - supported_dialects.verified_stmt("SELECT a.説明 FROM test.public.inter01 AS a"); supported_dialects.verified_stmt("SELECT a.説明 FROM inter01 AS a, inter01_transactions AS b WHERE a.説明 = b.取引 GROUP BY a.説明"); supported_dialects.verified_stmt("SELECT 説明, hühnervögel, garçon, Москва, 東京 FROM inter01"); + + let supported_dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(MySqlDialect {}), + ]); assert!(supported_dialects .parse_sql_statements("SELECT 💝 FROM table1") .is_err()); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index be2b6722..d539adf6 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -402,3 +402,8 @@ fn parse_extract_single_quotes() { fn parse_string_literal_backslash_escape() { redshift().one_statement_parses_to(r#"SELECT 'l\'auto'"#, "SELECT 'l''auto'"); } + +#[test] +fn parse_utf8_multibyte_idents() { + redshift().verified_stmt("SELECT 🚀.city AS 🎸 FROM customers AS 🚀"); +}