Initial Databricks SQL dialect (#1220)

This commit is contained in:
Joey Hain 2024-04-26 11:01:09 -07:00 committed by GitHub
parent 0adf4c675c
commit 2490034948
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 135 additions and 18 deletions

32
src/dialect/databricks.rs Normal file
View file

@ -0,0 +1,32 @@
use crate::dialect::Dialect;
/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
///
/// See <https://docs.databricks.com/en/sql/language-manual/index.html>.
#[derive(Debug, Default)]
pub struct DatabricksDialect;
impl Dialect for DatabricksDialect {
// see https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html
fn is_delimited_identifier_start(&self, ch: char) -> bool {
matches!(ch, '`')
}
fn is_identifier_start(&self, ch: char) -> bool {
matches!(ch, 'a'..='z' | 'A'..='Z' | '_')
}
fn is_identifier_part(&self, ch: char) -> bool {
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
}
fn supports_filter_during_aggregation(&self) -> bool {
true
}
// https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html
fn supports_group_by_expr(&self) -> bool {
true
}
}

View file

@ -13,6 +13,7 @@
mod ansi;
mod bigquery;
mod clickhouse;
mod databricks;
mod duckdb;
mod generic;
mod hive;
@ -32,6 +33,7 @@ use core::str::Chars;
pub use self::ansi::AnsiDialect;
pub use self::bigquery::BigQueryDialect;
pub use self::clickhouse::ClickHouseDialect;
pub use self::databricks::DatabricksDialect;
pub use self::duckdb::DuckDbDialect;
pub use self::generic::GenericDialect;
pub use self::hive::HiveDialect;

View file

@ -207,6 +207,7 @@ pub fn all_dialects() -> TestedDialects {
Box::new(BigQueryDialect {}) as Box<dyn Dialect>,
Box::new(SQLiteDialect {}) as Box<dyn Dialect>,
Box::new(DuckDbDialect {}) as Box<dyn Dialect>,
Box::new(DatabricksDialect {}) as Box<dyn Dialect>,
];
TestedDialects {
dialects: all_dialects,

View file

@ -8937,6 +8937,76 @@ fn parse_map_access_expr() {
}
}
#[test]
fn test_selective_aggregation() {
let sql = concat!(
"SELECT ",
"ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), ",
"ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') AS agg2 ",
"FROM region"
);
assert_eq!(
all_dialects_where(|d| d.supports_filter_during_aggregation())
.verified_only_select(sql)
.projection,
vec![
SelectItem::UnnamedExpr(Expr::AggregateExpressionWithFilter {
expr: Box::new(Expr::ArrayAgg(ArrayAgg {
distinct: false,
expr: Box::new(Expr::Identifier(Ident::new("name"))),
order_by: None,
limit: None,
within_group: false,
})),
filter: Box::new(Expr::IsNotNull(Box::new(Expr::Identifier(Ident::new(
"name"
))))),
}),
SelectItem::ExprWithAlias {
expr: Expr::AggregateExpressionWithFilter {
expr: Box::new(Expr::ArrayAgg(ArrayAgg {
distinct: false,
expr: Box::new(Expr::Identifier(Ident::new("name"))),
order_by: None,
limit: None,
within_group: false,
})),
filter: Box::new(Expr::Like {
negated: false,
expr: Box::new(Expr::Identifier(Ident::new("name"))),
pattern: Box::new(Expr::Value(Value::SingleQuotedString("a%".to_owned()))),
escape_char: None,
}),
},
alias: Ident::new("agg2")
},
]
)
}
#[test]
fn test_group_by_grouping_sets() {
let sql = concat!(
"SELECT city, car_model, sum(quantity) AS sum ",
"FROM dealer ",
"GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ()) ",
"ORDER BY city",
);
assert_eq!(
all_dialects_where(|d| d.supports_group_by_expr())
.verified_only_select(sql)
.group_by,
GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![
vec![
Expr::Identifier(Ident::new("city")),
Expr::Identifier(Ident::new("car_model"))
],
vec![Expr::Identifier(Ident::new("city")),],
vec![Expr::Identifier(Ident::new("car_model"))],
vec![]
])])
);
}
#[test]
fn test_match_recognize() {
use MatchRecognizePattern::*;

View file

@ -0,0 +1,30 @@
use sqlparser::ast::*;
use sqlparser::dialect::DatabricksDialect;
use test_utils::*;
#[macro_use]
mod test_utils;
fn databricks() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(DatabricksDialect {})],
options: None,
}
}
#[test]
fn test_databricks_identifiers() {
// databricks uses backtick for delimited identifiers
assert_eq!(
databricks().verified_only_select("SELECT `Ä`").projection[0],
SelectItem::UnnamedExpr(Expr::Identifier(Ident::with_quote('`', "Ä")))
);
// double quotes produce string literals, not delimited identifiers
assert_eq!(
databricks()
.verified_only_select(r#"SELECT "Ä""#)
.projection[0],
SelectItem::UnnamedExpr(Expr::Value(Value::DoubleQuotedString("Ä".to_owned())))
);
}

View file

@ -336,24 +336,6 @@ fn parse_create_function() {
);
}
#[test]
fn filtering_during_aggregation() {
let rename = "SELECT \
ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), \
ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') \
FROM region";
println!("{}", hive().verified_stmt(rename));
}
#[test]
fn filtering_during_aggregation_aliased() {
let rename = "SELECT \
ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL) AS agg1, \
ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') AS agg2 \
FROM region";
println!("{}", hive().verified_stmt(rename));
}
#[test]
fn filter_as_alias() {
let sql = "SELECT name filter FROM region";