mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-30 18:57:21 +00:00
Add identifier start unicode support for Postegres, MySql and Redshift (#1944)
This commit is contained in:
parent
c5e6ba5e7d
commit
ecd5d88638
4 changed files with 18 additions and 8 deletions
|
@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
|
|||
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
|
||||
// Identifiers which begin with a digit are recognized while tokenizing numbers,
|
||||
// so they can be distinguished from exponent numeric literals.
|
||||
// MySQL also implements non ascii utf-8 charecters
|
||||
ch.is_alphabetic()
|
||||
|| ch == '_'
|
||||
|| ch == '$'
|
||||
|| ch == '@'
|
||||
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
|
||||
|| !ch.is_ascii()
|
||||
}
|
||||
|
||||
fn is_identifier_part(&self, ch: char) -> bool {
|
||||
|
|
|
@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
|
|||
}
|
||||
|
||||
fn is_identifier_start(&self, ch: char) -> bool {
|
||||
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
|
||||
// We don't yet support identifiers beginning with "letters with
|
||||
// diacritical marks"
|
||||
ch.is_alphabetic() || ch == '_'
|
||||
ch.is_alphabetic() || ch == '_' ||
|
||||
// PostgreSQL implements Unicode characters in identifiers.
|
||||
!ch.is_ascii()
|
||||
}
|
||||
|
||||
fn is_identifier_part(&self, ch: char) -> bool {
|
||||
|
|
|
@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
|
|||
}
|
||||
|
||||
fn is_identifier_start(&self, ch: char) -> bool {
|
||||
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
|
||||
// UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
|
||||
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
|
||||
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii()
|
||||
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
|
||||
}
|
||||
|
||||
fn is_identifier_part(&self, ch: char) -> bool {
|
||||
|
|
|
@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
|
|||
let supported_dialects = TestedDialects::new(vec![
|
||||
Box::new(GenericDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(MsSqlDialect {}),
|
||||
Box::new(MySqlDialect {}),
|
||||
]);
|
||||
assert!(supported_dialects
|
||||
.parse_sql_statements("SELECT 💝 FROM table1")
|
||||
|
@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
|
|||
]);
|
||||
let _ = dialects.verified_stmt(sql);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identifier_unicode_start() {
|
||||
let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
|
||||
let dialects = TestedDialects::new(vec![
|
||||
Box::new(MySqlDialect {}),
|
||||
Box::new(RedshiftSqlDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
]);
|
||||
let _ = dialects.verified_stmt(sql);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue