mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Add supports for Hive's SELECT ... GROUP BY .. GROUPING SETS
syntax (#1653)
Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
parent
a5bbb5e8ac
commit
1c0e5d3554
8 changed files with 132 additions and 60 deletions
|
@ -2547,13 +2547,18 @@ impl fmt::Display for SelectInto {
|
|||
/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS
|
||||
///
|
||||
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
|
||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum GroupByWithModifier {
|
||||
Rollup,
|
||||
Cube,
|
||||
Totals,
|
||||
/// Hive supports GROUP BY GROUPING SETS syntax.
|
||||
/// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month))
|
||||
///
|
||||
/// [Hive]: <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-GROUPINGSETSclause>
|
||||
GroupingSets(Expr),
|
||||
}
|
||||
|
||||
impl fmt::Display for GroupByWithModifier {
|
||||
|
@ -2562,6 +2567,9 @@ impl fmt::Display for GroupByWithModifier {
|
|||
GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"),
|
||||
GroupByWithModifier::Cube => write!(f, "WITH CUBE"),
|
||||
GroupByWithModifier::Totals => write!(f, "WITH TOTALS"),
|
||||
GroupByWithModifier::GroupingSets(expr) => {
|
||||
write!(f, "{expr}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,4 +79,14 @@ impl Dialect for ClickHouseDialect {
|
|||
fn supports_from_first_select(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
// See <https://clickhouse.com/docs/en/sql-reference/aggregate-functions/grouping_function#grouping-sets>
|
||||
fn supports_group_by_expr(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// See <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
|
||||
fn supports_group_by_with_modifier(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,6 +48,10 @@ impl Dialect for GenericDialect {
|
|||
true
|
||||
}
|
||||
|
||||
fn supports_group_by_with_modifier(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_connect_by(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
|
|
@ -52,18 +52,23 @@ impl Dialect for HiveDialect {
|
|||
true
|
||||
}
|
||||
|
||||
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
|
||||
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
|
||||
fn supports_bang_not_operator(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
|
||||
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
|
||||
fn supports_load_data(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// See Hive <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
|
||||
/// See <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
|
||||
fn supports_table_sample_before_alias(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-CubesandRollupsr>
|
||||
fn supports_group_by_with_modifier(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -245,6 +245,12 @@ pub trait Dialect: Debug + Any {
|
|||
false
|
||||
}
|
||||
|
||||
/// Returns true if the dialects supports `GROUP BY` modifiers prefixed by a `WITH` keyword.
|
||||
/// Example: `GROUP BY value WITH ROLLUP`.
|
||||
fn supports_group_by_with_modifier(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true if the dialect supports CONNECT BY.
|
||||
fn supports_connect_by(&self) -> bool {
|
||||
false
|
||||
|
|
|
@ -9148,7 +9148,7 @@ impl<'a> Parser<'a> {
|
|||
};
|
||||
|
||||
let mut modifiers = vec![];
|
||||
if dialect_of!(self is ClickHouseDialect | GenericDialect) {
|
||||
if self.dialect.supports_group_by_with_modifier() {
|
||||
loop {
|
||||
if !self.parse_keyword(Keyword::WITH) {
|
||||
break;
|
||||
|
@ -9171,6 +9171,14 @@ impl<'a> Parser<'a> {
|
|||
});
|
||||
}
|
||||
}
|
||||
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
modifiers.push(GroupByWithModifier::GroupingSets(Expr::GroupingSets(
|
||||
result,
|
||||
)));
|
||||
};
|
||||
let group_by = match expressions {
|
||||
None => GroupByExpr::All(modifiers),
|
||||
Some(exprs) => GroupByExpr::Expressions(exprs, modifiers),
|
||||
|
|
|
@ -1069,61 +1069,6 @@ fn parse_create_materialized_view() {
|
|||
clickhouse_and_generic().verified_stmt(sql);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_group_by_with_modifier() {
|
||||
let clauses = ["x", "a, b", "ALL"];
|
||||
let modifiers = [
|
||||
"WITH ROLLUP",
|
||||
"WITH CUBE",
|
||||
"WITH TOTALS",
|
||||
"WITH ROLLUP WITH CUBE",
|
||||
];
|
||||
let expected_modifiers = [
|
||||
vec![GroupByWithModifier::Rollup],
|
||||
vec![GroupByWithModifier::Cube],
|
||||
vec![GroupByWithModifier::Totals],
|
||||
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
|
||||
];
|
||||
for clause in &clauses {
|
||||
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
|
||||
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
|
||||
match clickhouse_and_generic().verified_stmt(&sql) {
|
||||
Statement::Query(query) => {
|
||||
let group_by = &query.body.as_select().unwrap().group_by;
|
||||
if clause == &"ALL" {
|
||||
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
|
||||
} else {
|
||||
assert_eq!(
|
||||
group_by,
|
||||
&GroupByExpr::Expressions(
|
||||
clause
|
||||
.split(", ")
|
||||
.map(|c| Identifier(Ident::new(c)))
|
||||
.collect(),
|
||||
expected_modifier.to_vec()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// invalid cases
|
||||
let invalid_cases = [
|
||||
"SELECT * FROM t GROUP BY x WITH",
|
||||
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
|
||||
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
|
||||
"SELECT * FROM t GROUP BY WITH ROLLUP",
|
||||
];
|
||||
for sql in invalid_cases {
|
||||
clickhouse_and_generic()
|
||||
.parse_sql_statements(sql)
|
||||
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_select_order_by_with_fill_interpolate() {
|
||||
let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \
|
||||
|
|
|
@ -2447,6 +2447,92 @@ fn parse_select_group_by_all() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_group_by_with_modifier() {
|
||||
let clauses = ["x", "a, b", "ALL"];
|
||||
let modifiers = [
|
||||
"WITH ROLLUP",
|
||||
"WITH CUBE",
|
||||
"WITH TOTALS",
|
||||
"WITH ROLLUP WITH CUBE",
|
||||
];
|
||||
let expected_modifiers = [
|
||||
vec![GroupByWithModifier::Rollup],
|
||||
vec![GroupByWithModifier::Cube],
|
||||
vec![GroupByWithModifier::Totals],
|
||||
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
|
||||
];
|
||||
let dialects = all_dialects_where(|d| d.supports_group_by_with_modifier());
|
||||
|
||||
for clause in &clauses {
|
||||
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
|
||||
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
|
||||
match dialects.verified_stmt(&sql) {
|
||||
Statement::Query(query) => {
|
||||
let group_by = &query.body.as_select().unwrap().group_by;
|
||||
if clause == &"ALL" {
|
||||
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
|
||||
} else {
|
||||
assert_eq!(
|
||||
group_by,
|
||||
&GroupByExpr::Expressions(
|
||||
clause
|
||||
.split(", ")
|
||||
.map(|c| Identifier(Ident::new(c)))
|
||||
.collect(),
|
||||
expected_modifier.to_vec()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// invalid cases
|
||||
let invalid_cases = [
|
||||
"SELECT * FROM t GROUP BY x WITH",
|
||||
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
|
||||
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
|
||||
"SELECT * FROM t GROUP BY WITH ROLLUP",
|
||||
];
|
||||
for sql in invalid_cases {
|
||||
dialects
|
||||
.parse_sql_statements(sql)
|
||||
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_group_by_special_grouping_sets() {
|
||||
let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())";
|
||||
match all_dialects().verified_stmt(sql) {
|
||||
Statement::Query(query) => {
|
||||
let group_by = &query.body.as_select().unwrap().group_by;
|
||||
assert_eq!(
|
||||
group_by,
|
||||
&GroupByExpr::Expressions(
|
||||
vec![
|
||||
Expr::Identifier(Ident::new("a")),
|
||||
Expr::Identifier(Ident::new("b"))
|
||||
],
|
||||
vec![GroupByWithModifier::GroupingSets(Expr::GroupingSets(vec![
|
||||
vec![
|
||||
Expr::Identifier(Ident::new("a")),
|
||||
Expr::Identifier(Ident::new("b"))
|
||||
],
|
||||
vec![Expr::Identifier(Ident::new("a")),],
|
||||
vec![Expr::Identifier(Ident::new("b"))],
|
||||
vec![]
|
||||
]))]
|
||||
)
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_select_having() {
|
||||
let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue