mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-31 11:17:23 +00:00
Provide LISTAGG implementation (#174)
This patch provides an initial implemenation of LISTAGG[1]. Notably this implemenation deviates from ANSI SQL by allowing both WITHIN GROUP and the delimiter to be optional. We do so because Redshift SQL works this way and this approach is ultimately more flexible. Fixes #169. [1] https://modern-sql.com/feature/listagg
This commit is contained in:
parent
418b9631ce
commit
5f3c1bda01
5 changed files with 207 additions and 17 deletions
|
@ -20,6 +20,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
|
|||
- Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r!
|
||||
- Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno!
|
||||
- Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp!
|
||||
- Support `LISTAGG()` (#174) - thanks @maxcountryman!
|
||||
|
||||
### Fixed
|
||||
- Report an error for unterminated string literals (#165)
|
||||
|
|
|
@ -224,6 +224,8 @@ pub enum Expr {
|
|||
/// A parenthesized subquery `(SELECT ...)`, used in expression like
|
||||
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
|
||||
Subquery(Box<Query>),
|
||||
/// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)`
|
||||
ListAgg(ListAgg),
|
||||
}
|
||||
|
||||
impl fmt::Display for Expr {
|
||||
|
@ -299,6 +301,7 @@ impl fmt::Display for Expr {
|
|||
}
|
||||
Expr::Exists(s) => write!(f, "EXISTS ({})", s),
|
||||
Expr::Subquery(s) => write!(f, "({})", s),
|
||||
Expr::ListAgg(listagg) => write!(f, "{}", listagg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -850,6 +853,77 @@ impl FromStr for FileFormat {
|
|||
}
|
||||
}
|
||||
|
||||
/// A `LISTAGG` invocation `LISTAGG( [ DISTINCT ] <expr>[, <separator> ] [ON OVERFLOW <on_overflow>] ) )
|
||||
/// [ WITHIN GROUP (ORDER BY <within_group1>[, ...] ) ]`
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ListAgg {
|
||||
pub distinct: bool,
|
||||
pub expr: Box<Expr>,
|
||||
pub separator: Option<Box<Expr>>,
|
||||
pub on_overflow: Option<ListAggOnOverflow>,
|
||||
pub within_group: Vec<OrderByExpr>,
|
||||
}
|
||||
|
||||
impl fmt::Display for ListAgg {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"LISTAGG({}{}",
|
||||
if self.distinct { "DISTINCT " } else { "" },
|
||||
self.expr
|
||||
)?;
|
||||
if let Some(separator) = &self.separator {
|
||||
write!(f, ", {}", separator)?;
|
||||
}
|
||||
if let Some(on_overflow) = &self.on_overflow {
|
||||
write!(f, "{}", on_overflow)?;
|
||||
}
|
||||
write!(f, ")")?;
|
||||
if !self.within_group.is_empty() {
|
||||
write!(
|
||||
f,
|
||||
" WITHIN GROUP (ORDER BY {})",
|
||||
display_comma_separated(&self.within_group)
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The `ON OVERFLOW` clause of a LISTAGG invocation
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ListAggOnOverflow {
|
||||
/// `ON OVERFLOW ERROR`
|
||||
Error,
|
||||
|
||||
/// `ON OVERFLOW TRUNCATE [ <filler> ] WITH[OUT] COUNT`
|
||||
Truncate {
|
||||
filler: Option<Box<Expr>>,
|
||||
with_count: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Display for ListAggOnOverflow {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, " ON OVERFLOW")?;
|
||||
match self {
|
||||
ListAggOnOverflow::Error => write!(f, " ERROR"),
|
||||
ListAggOnOverflow::Truncate { filler, with_count } => {
|
||||
write!(f, " TRUNCATE")?;
|
||||
if let Some(filler) = filler {
|
||||
write!(f, " {}", filler)?;
|
||||
}
|
||||
if *with_count {
|
||||
write!(f, " WITH")?;
|
||||
} else {
|
||||
write!(f, " WITHOUT")?;
|
||||
}
|
||||
write!(f, " COUNT")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ObjectType {
|
||||
Table,
|
||||
|
|
|
@ -161,6 +161,7 @@ define_keywords!(
|
|||
END_FRAME,
|
||||
END_PARTITION,
|
||||
EQUALS,
|
||||
ERROR,
|
||||
ESCAPE,
|
||||
EVERY,
|
||||
EXCEPT,
|
||||
|
@ -230,6 +231,7 @@ define_keywords!(
|
|||
LIKE,
|
||||
LIKE_REGEX,
|
||||
LIMIT,
|
||||
LISTAGG,
|
||||
LN,
|
||||
LOCAL,
|
||||
LOCALTIME,
|
||||
|
@ -279,6 +281,7 @@ define_keywords!(
|
|||
OUT,
|
||||
OUTER,
|
||||
OVER,
|
||||
OVERFLOW,
|
||||
OVERLAPS,
|
||||
OVERLAY,
|
||||
PARAMETER,
|
||||
|
|
|
@ -191,6 +191,7 @@ impl Parser {
|
|||
"EXISTS" => self.parse_exists_expr(),
|
||||
"EXTRACT" => self.parse_extract_expr(),
|
||||
"INTERVAL" => self.parse_literal_interval(),
|
||||
"LISTAGG" => self.parse_listagg_expr(),
|
||||
"NOT" => Ok(Expr::UnaryOp {
|
||||
op: UnaryOperator::Not,
|
||||
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
|
||||
|
@ -272,14 +273,7 @@ impl Parser {
|
|||
|
||||
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let all = self.parse_keyword("ALL");
|
||||
let distinct = self.parse_keyword("DISTINCT");
|
||||
if all && distinct {
|
||||
return parser_err!(format!(
|
||||
"Cannot specify both ALL and DISTINCT in function: {}",
|
||||
name.to_string(),
|
||||
));
|
||||
}
|
||||
let distinct = self.parse_all_or_distinct()?;
|
||||
let args = self.parse_optional_args()?;
|
||||
let over = if self.parse_keyword("OVER") {
|
||||
// TBD: support window names (`OVER mywin`) in place of inline specification
|
||||
|
@ -423,6 +417,66 @@ impl Parser {
|
|||
})
|
||||
}
|
||||
|
||||
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
|
||||
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let distinct = self.parse_all_or_distinct()?;
|
||||
let expr = Box::new(self.parse_expr()?);
|
||||
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
|
||||
// choose to make the separator optional as this provides the more general implementation.
|
||||
let separator = if self.consume_token(&Token::Comma) {
|
||||
Some(Box::new(self.parse_expr()?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let on_overflow = if self.parse_keywords(vec!["ON", "OVERFLOW"]) {
|
||||
if self.parse_keyword("ERROR") {
|
||||
Some(ListAggOnOverflow::Error)
|
||||
} else {
|
||||
self.expect_keyword("TRUNCATE")?;
|
||||
let filler = match self.peek_token() {
|
||||
Some(Token::Word(kw)) if kw.keyword == "WITH" || kw.keyword == "WITHOUT" => {
|
||||
None
|
||||
}
|
||||
Some(Token::SingleQuotedString(_))
|
||||
| Some(Token::NationalStringLiteral(_))
|
||||
| Some(Token::HexStringLiteral(_)) => Some(Box::new(self.parse_expr()?)),
|
||||
_ => self.expected(
|
||||
"either filler, WITH, or WITHOUT in LISTAGG",
|
||||
self.peek_token(),
|
||||
)?,
|
||||
};
|
||||
let with_count = self.parse_keyword("WITH");
|
||||
if !with_count && !self.parse_keyword("WITHOUT") {
|
||||
self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?;
|
||||
}
|
||||
self.expect_keyword("COUNT")?;
|
||||
Some(ListAggOnOverflow::Truncate { filler, with_count })
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.expect_token(&Token::RParen)?;
|
||||
// Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the
|
||||
// more general implementation.
|
||||
let within_group = if self.parse_keywords(vec!["WITHIN", "GROUP"]) {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
self.expect_keywords(&["ORDER", "BY"])?;
|
||||
let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
order_by_expr
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
Ok(Expr::ListAgg(ListAgg {
|
||||
distinct,
|
||||
expr,
|
||||
separator,
|
||||
on_overflow,
|
||||
within_group,
|
||||
}))
|
||||
}
|
||||
|
||||
// This function parses date/time fields for both the EXTRACT function-like
|
||||
// operator and interval qualifiers. EXTRACT supports a wider set of
|
||||
// date/time fields than interval qualifiers, so this function may need to
|
||||
|
@ -851,6 +905,18 @@ impl Parser {
|
|||
Ok(values)
|
||||
}
|
||||
|
||||
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
|
||||
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
|
||||
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
|
||||
let all = self.parse_keyword("ALL");
|
||||
let distinct = self.parse_keyword("DISTINCT");
|
||||
if all && distinct {
|
||||
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
|
||||
} else {
|
||||
Ok(distinct)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a SQL CREATE statement
|
||||
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
|
||||
if self.parse_keyword("TABLE") {
|
||||
|
@ -1635,11 +1701,7 @@ impl Parser {
|
|||
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`),
|
||||
/// assuming the initial `SELECT` was already consumed
|
||||
pub fn parse_select(&mut self) -> Result<Select, ParserError> {
|
||||
let all = self.parse_keyword("ALL");
|
||||
let distinct = self.parse_keyword("DISTINCT");
|
||||
if all && distinct {
|
||||
return parser_err!("Cannot specify both ALL and DISTINCT in SELECT");
|
||||
}
|
||||
let distinct = self.parse_all_or_distinct()?;
|
||||
|
||||
let top = if self.parse_keyword("TOP") {
|
||||
Some(self.parse_top()?)
|
||||
|
|
|
@ -244,7 +244,7 @@ fn parse_select_all() {
|
|||
fn parse_select_all_distinct() {
|
||||
let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer");
|
||||
assert_eq!(
|
||||
ParserError::ParserError("Cannot specify both ALL and DISTINCT in SELECT".to_string()),
|
||||
ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()),
|
||||
result.unwrap_err(),
|
||||
);
|
||||
}
|
||||
|
@ -357,9 +357,7 @@ fn parse_select_count_distinct() {
|
|||
let sql = "SELECT COUNT(ALL DISTINCT + x) FROM customer";
|
||||
let res = parse_sql_statements(sql);
|
||||
assert_eq!(
|
||||
ParserError::ParserError(
|
||||
"Cannot specify both ALL and DISTINCT in function: COUNT".to_string()
|
||||
),
|
||||
ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()),
|
||||
res.unwrap_err()
|
||||
);
|
||||
}
|
||||
|
@ -914,6 +912,58 @@ fn parse_extract() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_listagg() {
|
||||
let sql = "SELECT LISTAGG(DISTINCT dateid, ', ' ON OVERFLOW TRUNCATE '%' WITHOUT COUNT) \
|
||||
WITHIN GROUP (ORDER BY id, username)";
|
||||
let select = verified_only_select(sql);
|
||||
|
||||
verified_stmt("SELECT LISTAGG(sellerid) WITHIN GROUP (ORDER BY dateid)");
|
||||
verified_stmt("SELECT LISTAGG(dateid)");
|
||||
verified_stmt("SELECT LISTAGG(DISTINCT dateid)");
|
||||
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW ERROR)");
|
||||
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE N'...' WITH COUNT)");
|
||||
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE X'deadbeef' WITH COUNT)");
|
||||
|
||||
let expr = Box::new(Expr::Identifier(Ident::new("dateid")));
|
||||
let on_overflow = Some(ListAggOnOverflow::Truncate {
|
||||
filler: Some(Box::new(Expr::Value(Value::SingleQuotedString(
|
||||
"%".to_string(),
|
||||
)))),
|
||||
with_count: false,
|
||||
});
|
||||
let within_group = vec![
|
||||
OrderByExpr {
|
||||
expr: Expr::Identifier(Ident {
|
||||
value: "id".to_string(),
|
||||
quote_style: None,
|
||||
}),
|
||||
asc: None,
|
||||
nulls_first: None,
|
||||
},
|
||||
OrderByExpr {
|
||||
expr: Expr::Identifier(Ident {
|
||||
value: "username".to_string(),
|
||||
quote_style: None,
|
||||
}),
|
||||
asc: None,
|
||||
nulls_first: None,
|
||||
},
|
||||
];
|
||||
assert_eq!(
|
||||
&Expr::ListAgg(ListAgg {
|
||||
distinct: true,
|
||||
expr,
|
||||
separator: Some(Box::new(Expr::Value(Value::SingleQuotedString(
|
||||
", ".to_string()
|
||||
)))),
|
||||
on_overflow,
|
||||
within_group
|
||||
}),
|
||||
expr_from_projection(only(&select.projection))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_table() {
|
||||
let sql = "CREATE TABLE uk_cities (\
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue