mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-17 17:27:23 +00:00
Add identifier start unicode support for Postegres, MySql and Redshift (#1944)
This commit is contained in:
parent
c5e6ba5e7d
commit
ecd5d88638
4 changed files with 18 additions and 8 deletions
|
@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
|
||||||
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
|
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
|
||||||
// Identifiers which begin with a digit are recognized while tokenizing numbers,
|
// Identifiers which begin with a digit are recognized while tokenizing numbers,
|
||||||
// so they can be distinguished from exponent numeric literals.
|
// so they can be distinguished from exponent numeric literals.
|
||||||
|
// MySQL also implements non ascii utf-8 charecters
|
||||||
ch.is_alphabetic()
|
ch.is_alphabetic()
|
||||||
|| ch == '_'
|
|| ch == '_'
|
||||||
|| ch == '$'
|
|| ch == '$'
|
||||||
|| ch == '@'
|
|| ch == '@'
|
||||||
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
|
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
|
||||||
|
|| !ch.is_ascii()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_part(&self, ch: char) -> bool {
|
fn is_identifier_part(&self, ch: char) -> bool {
|
||||||
|
|
|
@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_start(&self, ch: char) -> bool {
|
fn is_identifier_start(&self, ch: char) -> bool {
|
||||||
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
|
ch.is_alphabetic() || ch == '_' ||
|
||||||
// We don't yet support identifiers beginning with "letters with
|
// PostgreSQL implements Unicode characters in identifiers.
|
||||||
// diacritical marks"
|
!ch.is_ascii()
|
||||||
ch.is_alphabetic() || ch == '_'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_part(&self, ch: char) -> bool {
|
fn is_identifier_part(&self, ch: char) -> bool {
|
||||||
|
|
|
@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_start(&self, ch: char) -> bool {
|
fn is_identifier_start(&self, ch: char) -> bool {
|
||||||
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
|
// UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
|
||||||
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
|
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
|
||||||
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii()
|
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_part(&self, ch: char) -> bool {
|
fn is_identifier_part(&self, ch: char) -> bool {
|
||||||
|
|
|
@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
|
||||||
let supported_dialects = TestedDialects::new(vec![
|
let supported_dialects = TestedDialects::new(vec![
|
||||||
Box::new(GenericDialect {}),
|
Box::new(GenericDialect {}),
|
||||||
Box::new(DuckDbDialect {}),
|
Box::new(DuckDbDialect {}),
|
||||||
Box::new(PostgreSqlDialect {}),
|
|
||||||
Box::new(MsSqlDialect {}),
|
Box::new(MsSqlDialect {}),
|
||||||
Box::new(MySqlDialect {}),
|
|
||||||
]);
|
]);
|
||||||
assert!(supported_dialects
|
assert!(supported_dialects
|
||||||
.parse_sql_statements("SELECT 💝 FROM table1")
|
.parse_sql_statements("SELECT 💝 FROM table1")
|
||||||
|
@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
|
||||||
]);
|
]);
|
||||||
let _ = dialects.verified_stmt(sql);
|
let _ = dialects.verified_stmt(sql);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_identifier_unicode_start() {
|
||||||
|
let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
|
||||||
|
let dialects = TestedDialects::new(vec![
|
||||||
|
Box::new(MySqlDialect {}),
|
||||||
|
Box::new(RedshiftSqlDialect {}),
|
||||||
|
Box::new(PostgreSqlDialect {}),
|
||||||
|
]);
|
||||||
|
let _ = dialects.verified_stmt(sql);
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue