Support GROUP BY WITH MODIFIER for ClickHouse (#1323)

Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
hulk 2024-06-30 19:33:43 +08:00 committed by GitHub
parent 0b1a413e64
commit 44d7a20f64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 215 additions and 76 deletions

View file

@ -43,8 +43,8 @@ pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml,
GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, JoinOperator,
JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint,
JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr,
NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem,
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,

View file

@ -299,10 +299,10 @@ impl fmt::Display for Select {
write!(f, " WHERE {selection}")?;
}
match &self.group_by {
GroupByExpr::All => write!(f, " GROUP BY ALL")?,
GroupByExpr::Expressions(exprs) => {
GroupByExpr::All(_) => write!(f, " {}", self.group_by)?,
GroupByExpr::Expressions(exprs, _) => {
if !exprs.is_empty() {
write!(f, " GROUP BY {}", display_comma_separated(exprs))?;
write!(f, " {}", self.group_by)?
}
}
}
@ -1866,27 +1866,65 @@ impl fmt::Display for SelectInto {
}
}
/// ClickHouse supports GROUP BY WITH modifiers(includes ROLLUP|CUBE|TOTALS).
/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS
///
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum GroupByWithModifier {
Rollup,
Cube,
Totals,
}
impl fmt::Display for GroupByWithModifier {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"),
GroupByWithModifier::Cube => write!(f, "WITH CUBE"),
GroupByWithModifier::Totals => write!(f, "WITH TOTALS"),
}
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum GroupByExpr {
/// ALL syntax of [Snowflake], and [DuckDB]
/// ALL syntax of [Snowflake], [DuckDB] and [ClickHouse].
///
/// [Snowflake]: <https://docs.snowflake.com/en/sql-reference/constructs/group-by#label-group-by-all-columns>
/// [DuckDB]: <https://duckdb.org/docs/sql/query_syntax/groupby.html>
All,
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#group-by-all>
///
/// ClickHouse also supports WITH modifiers after GROUP BY ALL and expressions.
///
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
All(Vec<GroupByWithModifier>),
/// Expressions
Expressions(Vec<Expr>),
Expressions(Vec<Expr>, Vec<GroupByWithModifier>),
}
impl fmt::Display for GroupByExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
GroupByExpr::All => write!(f, "GROUP BY ALL"),
GroupByExpr::Expressions(col_names) => {
GroupByExpr::All(modifiers) => {
write!(f, "GROUP BY ALL")?;
if !modifiers.is_empty() {
write!(f, " {}", display_separated(modifiers, " "))?;
}
Ok(())
}
GroupByExpr::Expressions(col_names, modifiers) => {
let col_names = display_comma_separated(col_names);
write!(f, "GROUP BY ({col_names})")
write!(f, "GROUP BY {col_names}")?;
if !modifiers.is_empty() {
write!(f, " {}", display_separated(modifiers, " "))?;
}
Ok(())
}
}
}

View file

@ -721,6 +721,7 @@ define_keywords!(
TINYINT,
TO,
TOP,
TOTALS,
TRAILING,
TRANSACTION,
TRANSIENT,

View file

@ -8319,13 +8319,42 @@ impl<'a> Parser<'a> {
};
let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
if self.parse_keyword(Keyword::ALL) {
GroupByExpr::All
let expressions = if self.parse_keyword(Keyword::ALL) {
None
} else {
GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?)
Some(self.parse_comma_separated(Parser::parse_group_by_expr)?)
};
let mut modifiers = vec![];
if dialect_of!(self is ClickHouseDialect | GenericDialect) {
loop {
if !self.parse_keyword(Keyword::WITH) {
break;
}
let keyword = self.expect_one_of_keywords(&[
Keyword::ROLLUP,
Keyword::CUBE,
Keyword::TOTALS,
])?;
modifiers.push(match keyword {
Keyword::ROLLUP => GroupByWithModifier::Rollup,
Keyword::CUBE => GroupByWithModifier::Cube,
Keyword::TOTALS => GroupByWithModifier::Totals,
_ => {
return parser_err!(
"BUG: expected to match GroupBy modifier keyword",
self.peek_token().location
)
}
});
}
}
match expressions {
None => GroupByExpr::All(modifiers),
Some(exprs) => GroupByExpr::Expressions(exprs, modifiers),
}
} else {
GroupByExpr::Expressions(vec![])
GroupByExpr::Expressions(vec![], vec![])
};
let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) {

View file

@ -88,7 +88,7 @@ fn parse_map_access_expr() {
right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))),
}),
}),
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -626,6 +626,61 @@ fn parse_create_materialized_view() {
clickhouse_and_generic().verified_stmt(sql);
}
#[test]
fn parse_group_by_with_modifier() {
let clauses = ["x", "a, b", "ALL"];
let modifiers = [
"WITH ROLLUP",
"WITH CUBE",
"WITH TOTALS",
"WITH ROLLUP WITH CUBE",
];
let expected_modifiers = [
vec![GroupByWithModifier::Rollup],
vec![GroupByWithModifier::Cube],
vec![GroupByWithModifier::Totals],
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
];
for clause in &clauses {
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
match clickhouse_and_generic().verified_stmt(&sql) {
Statement::Query(query) => {
let group_by = &query.body.as_select().unwrap().group_by;
if clause == &"ALL" {
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
} else {
assert_eq!(
group_by,
&GroupByExpr::Expressions(
clause
.split(", ")
.map(|c| Identifier(Ident::new(c)))
.collect(),
expected_modifier.to_vec()
)
);
}
}
_ => unreachable!(),
}
}
}
// invalid cases
let invalid_cases = [
"SELECT * FROM t GROUP BY x WITH",
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
"SELECT * FROM t GROUP BY WITH ROLLUP",
];
for sql in invalid_cases {
clickhouse_and_generic()
.parse_sql_statements(sql)
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
}
}
fn clickhouse() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(ClickHouseDialect {})],

View file

@ -392,9 +392,10 @@ fn parse_update_set_from() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![Expr::Identifier(Ident::new(
"id"
))]),
group_by: GroupByExpr::Expressions(
vec![Expr::Identifier(Ident::new("id"))],
vec![]
),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -2119,10 +2120,13 @@ fn parse_select_group_by() {
let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname";
let select = verified_only_select(sql);
assert_eq!(
GroupByExpr::Expressions(vec![
Expr::Identifier(Ident::new("lname")),
Expr::Identifier(Ident::new("fname")),
]),
GroupByExpr::Expressions(
vec![
Expr::Identifier(Ident::new("lname")),
Expr::Identifier(Ident::new("fname")),
],
vec![]
),
select.group_by
);
@ -2137,7 +2141,7 @@ fn parse_select_group_by() {
fn parse_select_group_by_all() {
let sql = "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL";
let select = verified_only_select(sql);
assert_eq!(GroupByExpr::All, select.group_by);
assert_eq!(GroupByExpr::All(vec![]), select.group_by);
one_statement_parses_to(
"SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL",
@ -4545,7 +4549,7 @@ fn test_parse_named_window() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -4974,7 +4978,7 @@ fn parse_interval_and_or_xor() {
}),
}),
}),
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -6908,7 +6912,7 @@ fn lateral_function() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -7627,7 +7631,7 @@ fn parse_merge() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -9133,7 +9137,7 @@ fn parse_unload() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -9276,7 +9280,7 @@ fn parse_connect_by() {
into: None,
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -9364,7 +9368,7 @@ fn parse_connect_by() {
op: BinaryOperator::NotEq,
right: Box::new(Expr::Value(number("42"))),
}),
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -9484,15 +9488,18 @@ fn test_group_by_grouping_sets() {
all_dialects_where(|d| d.supports_group_by_expr())
.verified_only_select(sql)
.group_by,
GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![
vec![
Expr::Identifier(Ident::new("city")),
Expr::Identifier(Ident::new("car_model"))
],
vec![Expr::Identifier(Ident::new("city")),],
vec![Expr::Identifier(Ident::new("car_model"))],
GroupByExpr::Expressions(
vec![Expr::GroupingSets(vec![
vec![
Expr::Identifier(Ident::new("city")),
Expr::Identifier(Ident::new("car_model"))
],
vec![Expr::Identifier(Ident::new("city")),],
vec![Expr::Identifier(Ident::new("car_model"))],
vec![]
])],
vec![]
])])
)
);
}

View file

@ -171,7 +171,7 @@ fn test_select_union_by_name() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -209,7 +209,7 @@ fn test_select_union_by_name() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],

View file

@ -111,7 +111,7 @@ fn parse_create_procedure() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -528,7 +528,7 @@ fn parse_substring_in_select() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],

View file

@ -907,7 +907,7 @@ fn parse_escaped_quote_identifiers_with_escape() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -954,7 +954,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -998,7 +998,7 @@ fn parse_escaped_backticks_with_escape() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -1042,7 +1042,7 @@ fn parse_escaped_backticks_with_no_escape() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -1703,7 +1703,7 @@ fn parse_select_with_numeric_prefix_column_name() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -1756,7 +1756,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -2255,7 +2255,7 @@ fn parse_substring_in_select() {
}],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -2559,7 +2559,7 @@ fn parse_hex_string_introducer() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],

View file

@ -1075,7 +1075,7 @@ fn parse_copy_to() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
having: None,
named_window: vec![],
window_before_qualify: false,
@ -2383,7 +2383,7 @@ fn parse_array_subquery_expr() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -2402,7 +2402,7 @@ fn parse_array_subquery_expr() {
from: vec![],
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
@ -3711,14 +3711,17 @@ fn parse_select_group_by_grouping_sets() {
"SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())"
);
assert_eq!(
GroupByExpr::Expressions(vec![
Expr::Identifier(Ident::new("size")),
Expr::GroupingSets(vec![
vec![Expr::Identifier(Ident::new("brand"))],
vec![Expr::Identifier(Ident::new("size"))],
vec![],
]),
]),
GroupByExpr::Expressions(
vec![
Expr::Identifier(Ident::new("size")),
Expr::GroupingSets(vec![
vec![Expr::Identifier(Ident::new("brand"))],
vec![Expr::Identifier(Ident::new("size"))],
vec![],
]),
],
vec![]
),
select.group_by
);
}
@ -3729,13 +3732,16 @@ fn parse_select_group_by_rollup() {
"SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)",
);
assert_eq!(
GroupByExpr::Expressions(vec![
Expr::Identifier(Ident::new("size")),
Expr::Rollup(vec![
vec![Expr::Identifier(Ident::new("brand"))],
vec![Expr::Identifier(Ident::new("size"))],
]),
]),
GroupByExpr::Expressions(
vec![
Expr::Identifier(Ident::new("size")),
Expr::Rollup(vec![
vec![Expr::Identifier(Ident::new("brand"))],
vec![Expr::Identifier(Ident::new("size"))],
]),
],
vec![]
),
select.group_by
);
}
@ -3746,13 +3752,16 @@ fn parse_select_group_by_cube() {
"SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)",
);
assert_eq!(
GroupByExpr::Expressions(vec![
Expr::Identifier(Ident::new("size")),
Expr::Cube(vec![
vec![Expr::Identifier(Ident::new("brand"))],
vec![Expr::Identifier(Ident::new("size"))],
]),
]),
GroupByExpr::Expressions(
vec![
Expr::Identifier(Ident::new("size")),
Expr::Cube(vec![
vec![Expr::Identifier(Ident::new("brand"))],
vec![Expr::Identifier(Ident::new("size"))],
]),
],
vec![]
),
select.group_by
);
}