Redshift support for multibyte UTF-8 identifiers

This commit is contained in:
Yoav Cohen 2025-06-30 20:47:20 +02:00
parent 3bc94234df
commit f90ba8bdb7
3 changed files with 19 additions and 5 deletions

View file

@ -80,13 +80,15 @@ impl Dialect for RedshiftSqlDialect {
}
fn is_identifier_start(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || ch as u32 > 0x7F
}
fn is_identifier_part(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp
PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#'
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' || ch as u32 > 0x7F
}
/// redshift has `CONVERT(type, value)` instead of `CONVERT(value, type)`

View file

@ -11068,10 +11068,17 @@ fn parse_non_latin_identifiers() {
Box::new(RedshiftSqlDialect {}),
Box::new(MySqlDialect {}),
]);
supported_dialects.verified_stmt("SELECT a.説明 FROM test.public.inter01 AS a");
supported_dialects.verified_stmt("SELECT a.説明 FROM inter01 AS a, inter01_transactions AS b WHERE a.説明 = b.取引 GROUP BY a.説明");
supported_dialects.verified_stmt("SELECT 説明, hühnervögel, garçon, Москва, 東京 FROM inter01");
let supported_dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(MySqlDialect {}),
]);
assert!(supported_dialects
.parse_sql_statements("SELECT 💝 FROM table1")
.is_err());

View file

@ -402,3 +402,8 @@ fn parse_extract_single_quotes() {
fn parse_string_literal_backslash_escape() {
redshift().one_statement_parses_to(r#"SELECT 'l\'auto'"#, "SELECT 'l''auto'");
}
#[test]
fn parse_utf8_multibyte_idents() {
redshift().verified_stmt("SELECT 🚀.city AS 🎸 FROM customers AS 🚀");
}