mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Initial Databricks SQL dialect (#1220)
This commit is contained in:
parent
0adf4c675c
commit
2490034948
6 changed files with 135 additions and 18 deletions
32
src/dialect/databricks.rs
Normal file
32
src/dialect/databricks.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use crate::dialect::Dialect;
|
||||
|
||||
/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
|
||||
///
|
||||
/// See <https://docs.databricks.com/en/sql/language-manual/index.html>.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DatabricksDialect;
|
||||
|
||||
impl Dialect for DatabricksDialect {
|
||||
// see https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html
|
||||
|
||||
fn is_delimited_identifier_start(&self, ch: char) -> bool {
|
||||
matches!(ch, '`')
|
||||
}
|
||||
|
||||
fn is_identifier_start(&self, ch: char) -> bool {
|
||||
matches!(ch, 'a'..='z' | 'A'..='Z' | '_')
|
||||
}
|
||||
|
||||
fn is_identifier_part(&self, ch: char) -> bool {
|
||||
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
|
||||
}
|
||||
|
||||
fn supports_filter_during_aggregation(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
// https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html
|
||||
fn supports_group_by_expr(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
|
@ -13,6 +13,7 @@
|
|||
mod ansi;
|
||||
mod bigquery;
|
||||
mod clickhouse;
|
||||
mod databricks;
|
||||
mod duckdb;
|
||||
mod generic;
|
||||
mod hive;
|
||||
|
@ -32,6 +33,7 @@ use core::str::Chars;
|
|||
pub use self::ansi::AnsiDialect;
|
||||
pub use self::bigquery::BigQueryDialect;
|
||||
pub use self::clickhouse::ClickHouseDialect;
|
||||
pub use self::databricks::DatabricksDialect;
|
||||
pub use self::duckdb::DuckDbDialect;
|
||||
pub use self::generic::GenericDialect;
|
||||
pub use self::hive::HiveDialect;
|
||||
|
|
|
@ -207,6 +207,7 @@ pub fn all_dialects() -> TestedDialects {
|
|||
Box::new(BigQueryDialect {}) as Box<dyn Dialect>,
|
||||
Box::new(SQLiteDialect {}) as Box<dyn Dialect>,
|
||||
Box::new(DuckDbDialect {}) as Box<dyn Dialect>,
|
||||
Box::new(DatabricksDialect {}) as Box<dyn Dialect>,
|
||||
];
|
||||
TestedDialects {
|
||||
dialects: all_dialects,
|
||||
|
|
|
@ -8937,6 +8937,76 @@ fn parse_map_access_expr() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_selective_aggregation() {
|
||||
let sql = concat!(
|
||||
"SELECT ",
|
||||
"ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), ",
|
||||
"ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') AS agg2 ",
|
||||
"FROM region"
|
||||
);
|
||||
assert_eq!(
|
||||
all_dialects_where(|d| d.supports_filter_during_aggregation())
|
||||
.verified_only_select(sql)
|
||||
.projection,
|
||||
vec![
|
||||
SelectItem::UnnamedExpr(Expr::AggregateExpressionWithFilter {
|
||||
expr: Box::new(Expr::ArrayAgg(ArrayAgg {
|
||||
distinct: false,
|
||||
expr: Box::new(Expr::Identifier(Ident::new("name"))),
|
||||
order_by: None,
|
||||
limit: None,
|
||||
within_group: false,
|
||||
})),
|
||||
filter: Box::new(Expr::IsNotNull(Box::new(Expr::Identifier(Ident::new(
|
||||
"name"
|
||||
))))),
|
||||
}),
|
||||
SelectItem::ExprWithAlias {
|
||||
expr: Expr::AggregateExpressionWithFilter {
|
||||
expr: Box::new(Expr::ArrayAgg(ArrayAgg {
|
||||
distinct: false,
|
||||
expr: Box::new(Expr::Identifier(Ident::new("name"))),
|
||||
order_by: None,
|
||||
limit: None,
|
||||
within_group: false,
|
||||
})),
|
||||
filter: Box::new(Expr::Like {
|
||||
negated: false,
|
||||
expr: Box::new(Expr::Identifier(Ident::new("name"))),
|
||||
pattern: Box::new(Expr::Value(Value::SingleQuotedString("a%".to_owned()))),
|
||||
escape_char: None,
|
||||
}),
|
||||
},
|
||||
alias: Ident::new("agg2")
|
||||
},
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_group_by_grouping_sets() {
|
||||
let sql = concat!(
|
||||
"SELECT city, car_model, sum(quantity) AS sum ",
|
||||
"FROM dealer ",
|
||||
"GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ()) ",
|
||||
"ORDER BY city",
|
||||
);
|
||||
assert_eq!(
|
||||
all_dialects_where(|d| d.supports_group_by_expr())
|
||||
.verified_only_select(sql)
|
||||
.group_by,
|
||||
GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![
|
||||
vec![
|
||||
Expr::Identifier(Ident::new("city")),
|
||||
Expr::Identifier(Ident::new("car_model"))
|
||||
],
|
||||
vec![Expr::Identifier(Ident::new("city")),],
|
||||
vec![Expr::Identifier(Ident::new("car_model"))],
|
||||
vec![]
|
||||
])])
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_match_recognize() {
|
||||
use MatchRecognizePattern::*;
|
||||
|
|
30
tests/sqlparser_databricks.rs
Normal file
30
tests/sqlparser_databricks.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use sqlparser::ast::*;
|
||||
use sqlparser::dialect::DatabricksDialect;
|
||||
use test_utils::*;
|
||||
|
||||
#[macro_use]
|
||||
mod test_utils;
|
||||
|
||||
fn databricks() -> TestedDialects {
|
||||
TestedDialects {
|
||||
dialects: vec![Box::new(DatabricksDialect {})],
|
||||
options: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_databricks_identifiers() {
|
||||
// databricks uses backtick for delimited identifiers
|
||||
assert_eq!(
|
||||
databricks().verified_only_select("SELECT `Ä`").projection[0],
|
||||
SelectItem::UnnamedExpr(Expr::Identifier(Ident::with_quote('`', "Ä")))
|
||||
);
|
||||
|
||||
// double quotes produce string literals, not delimited identifiers
|
||||
assert_eq!(
|
||||
databricks()
|
||||
.verified_only_select(r#"SELECT "Ä""#)
|
||||
.projection[0],
|
||||
SelectItem::UnnamedExpr(Expr::Value(Value::DoubleQuotedString("Ä".to_owned())))
|
||||
);
|
||||
}
|
|
@ -336,24 +336,6 @@ fn parse_create_function() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtering_during_aggregation() {
|
||||
let rename = "SELECT \
|
||||
ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), \
|
||||
ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') \
|
||||
FROM region";
|
||||
println!("{}", hive().verified_stmt(rename));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtering_during_aggregation_aliased() {
|
||||
let rename = "SELECT \
|
||||
ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL) AS agg1, \
|
||||
ARRAY_AGG(name) FILTER (WHERE name LIKE 'a%') AS agg2 \
|
||||
FROM region";
|
||||
println!("{}", hive().verified_stmt(rename));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_as_alias() {
|
||||
let sql = "SELECT name filter FROM region";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue