mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-09 21:42:05 +00:00
ISSUE-1088: Fix array_agg wildcard behavior (#1093)
Co-authored-by: Andrew Repp <arepp@cloudflare.com>
This commit is contained in:
parent
398a81029e
commit
70764a17e9
4 changed files with 111 additions and 36 deletions
|
@ -380,7 +380,10 @@ pub enum Expr {
|
||||||
right: Box<Expr>,
|
right: Box<Expr>,
|
||||||
},
|
},
|
||||||
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
|
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
|
||||||
CompositeAccess { expr: Box<Expr>, key: Ident },
|
CompositeAccess {
|
||||||
|
expr: Box<Expr>,
|
||||||
|
key: Ident,
|
||||||
|
},
|
||||||
/// `IS FALSE` operator
|
/// `IS FALSE` operator
|
||||||
IsFalse(Box<Expr>),
|
IsFalse(Box<Expr>),
|
||||||
/// `IS NOT FALSE` operator
|
/// `IS NOT FALSE` operator
|
||||||
|
@ -474,7 +477,10 @@ pub enum Expr {
|
||||||
right: Box<Expr>,
|
right: Box<Expr>,
|
||||||
},
|
},
|
||||||
/// Unary operation e.g. `NOT foo`
|
/// Unary operation e.g. `NOT foo`
|
||||||
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
|
UnaryOp {
|
||||||
|
op: UnaryOperator,
|
||||||
|
expr: Box<Expr>,
|
||||||
|
},
|
||||||
/// CONVERT a value to a different data type or character encoding. e.g. `CONVERT(foo USING utf8mb4)`
|
/// CONVERT a value to a different data type or character encoding. e.g. `CONVERT(foo USING utf8mb4)`
|
||||||
Convert {
|
Convert {
|
||||||
/// The expression to convert
|
/// The expression to convert
|
||||||
|
@ -545,7 +551,10 @@ pub enum Expr {
|
||||||
/// ```sql
|
/// ```sql
|
||||||
/// POSITION(<expr> in <expr>)
|
/// POSITION(<expr> in <expr>)
|
||||||
/// ```
|
/// ```
|
||||||
Position { expr: Box<Expr>, r#in: Box<Expr> },
|
Position {
|
||||||
|
expr: Box<Expr>,
|
||||||
|
r#in: Box<Expr>,
|
||||||
|
},
|
||||||
/// ```sql
|
/// ```sql
|
||||||
/// SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])
|
/// SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])
|
||||||
/// ```
|
/// ```
|
||||||
|
@ -589,20 +598,32 @@ pub enum Expr {
|
||||||
/// A literal value, such as string, number, date or NULL
|
/// A literal value, such as string, number, date or NULL
|
||||||
Value(Value),
|
Value(Value),
|
||||||
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
|
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
|
||||||
IntroducedString { introducer: String, value: Value },
|
IntroducedString {
|
||||||
|
introducer: String,
|
||||||
|
value: Value,
|
||||||
|
},
|
||||||
/// A constant of form `<data_type> 'value'`.
|
/// A constant of form `<data_type> 'value'`.
|
||||||
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
||||||
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
||||||
TypedString { data_type: DataType, value: String },
|
TypedString {
|
||||||
|
data_type: DataType,
|
||||||
|
value: String,
|
||||||
|
},
|
||||||
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
|
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
|
||||||
/// Note that depending on the dialect, struct like accesses may be
|
/// Note that depending on the dialect, struct like accesses may be
|
||||||
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
|
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
|
||||||
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
|
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
|
||||||
MapAccess { column: Box<Expr>, keys: Vec<Expr> },
|
MapAccess {
|
||||||
|
column: Box<Expr>,
|
||||||
|
keys: Vec<Expr>,
|
||||||
|
},
|
||||||
/// Scalar function call e.g. `LEFT(foo, 5)`
|
/// Scalar function call e.g. `LEFT(foo, 5)`
|
||||||
Function(Function),
|
Function(Function),
|
||||||
/// Aggregate function with filter
|
/// Aggregate function with filter
|
||||||
AggregateExpressionWithFilter { expr: Box<Expr>, filter: Box<Expr> },
|
AggregateExpressionWithFilter {
|
||||||
|
expr: Box<Expr>,
|
||||||
|
filter: Box<Expr>,
|
||||||
|
},
|
||||||
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
|
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
|
||||||
///
|
///
|
||||||
/// Note we only recognize a complete single expression as `<condition>`,
|
/// Note we only recognize a complete single expression as `<condition>`,
|
||||||
|
@ -616,7 +637,10 @@ pub enum Expr {
|
||||||
},
|
},
|
||||||
/// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like
|
/// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like
|
||||||
/// `WHERE [ NOT ] EXISTS (SELECT ...)`.
|
/// `WHERE [ NOT ] EXISTS (SELECT ...)`.
|
||||||
Exists { subquery: Box<Query>, negated: bool },
|
Exists {
|
||||||
|
subquery: Box<Query>,
|
||||||
|
negated: bool,
|
||||||
|
},
|
||||||
/// A parenthesized subquery `(SELECT ...)`, used in expression like
|
/// A parenthesized subquery `(SELECT ...)`, used in expression like
|
||||||
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
|
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
|
||||||
Subquery(Box<Query>),
|
Subquery(Box<Query>),
|
||||||
|
@ -653,9 +677,15 @@ pub enum Expr {
|
||||||
/// 1 AS A
|
/// 1 AS A
|
||||||
/// ```
|
/// ```
|
||||||
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||||
Named { expr: Box<Expr>, name: Ident },
|
Named {
|
||||||
|
expr: Box<Expr>,
|
||||||
|
name: Ident,
|
||||||
|
},
|
||||||
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
|
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
|
||||||
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
|
ArrayIndex {
|
||||||
|
obj: Box<Expr>,
|
||||||
|
indexes: Vec<Expr>,
|
||||||
|
},
|
||||||
/// An array expression e.g. `ARRAY[1, 2]`
|
/// An array expression e.g. `ARRAY[1, 2]`
|
||||||
Array(Array),
|
Array(Array),
|
||||||
/// An interval expression e.g. `INTERVAL '1' YEAR`
|
/// An interval expression e.g. `INTERVAL '1' YEAR`
|
||||||
|
@ -678,6 +708,10 @@ pub enum Expr {
|
||||||
/// `<search modifier>`
|
/// `<search modifier>`
|
||||||
opt_search_modifier: Option<SearchModifier>,
|
opt_search_modifier: Option<SearchModifier>,
|
||||||
},
|
},
|
||||||
|
Wildcard,
|
||||||
|
/// Qualified wildcard, e.g. `alias.*` or `schema.table.*`.
|
||||||
|
/// (Same caveats apply to `QualifiedWildcard` as to `Wildcard`.)
|
||||||
|
QualifiedWildcard(ObjectName),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for CastFormat {
|
impl fmt::Display for CastFormat {
|
||||||
|
@ -704,6 +738,8 @@ impl fmt::Display for Expr {
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Expr::Wildcard => f.write_str("*"),
|
||||||
|
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),
|
||||||
Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
|
Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
|
||||||
Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"),
|
Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"),
|
||||||
Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"),
|
Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"),
|
||||||
|
@ -4188,6 +4224,16 @@ pub enum FunctionArgExpr {
|
||||||
Wildcard,
|
Wildcard,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<Expr> for FunctionArgExpr {
|
||||||
|
fn from(wildcard_expr: Expr) -> Self {
|
||||||
|
match wildcard_expr {
|
||||||
|
Expr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix),
|
||||||
|
Expr::Wildcard => Self::Wildcard,
|
||||||
|
expr => Self::Expr(expr),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for FunctionArgExpr {
|
impl fmt::Display for FunctionArgExpr {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
|
|
|
@ -161,16 +161,6 @@ pub enum WildcardExpr {
|
||||||
Wildcard,
|
Wildcard,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<WildcardExpr> for FunctionArgExpr {
|
|
||||||
fn from(wildcard_expr: WildcardExpr) -> Self {
|
|
||||||
match wildcard_expr {
|
|
||||||
WildcardExpr::Expr(expr) => Self::Expr(expr),
|
|
||||||
WildcardExpr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix),
|
|
||||||
WildcardExpr::Wildcard => Self::Wildcard,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<TokenizerError> for ParserError {
|
impl From<TokenizerError> for ParserError {
|
||||||
fn from(e: TokenizerError) -> Self {
|
fn from(e: TokenizerError) -> Self {
|
||||||
ParserError::TokenizerError(e.to_string())
|
ParserError::TokenizerError(e.to_string())
|
||||||
|
@ -734,7 +724,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a new expression including wildcard & qualified wildcard
|
/// Parse a new expression including wildcard & qualified wildcard
|
||||||
pub fn parse_wildcard_expr(&mut self) -> Result<WildcardExpr, ParserError> {
|
pub fn parse_wildcard_expr(&mut self) -> Result<Expr, ParserError> {
|
||||||
let index = self.index;
|
let index = self.index;
|
||||||
|
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
|
@ -756,7 +746,7 @@ impl<'a> Parser<'a> {
|
||||||
id_parts.push(Ident::with_quote('\'', s))
|
id_parts.push(Ident::with_quote('\'', s))
|
||||||
}
|
}
|
||||||
Token::Mul => {
|
Token::Mul => {
|
||||||
return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts)));
|
return Ok(Expr::QualifiedWildcard(ObjectName(id_parts)));
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
return self
|
return self
|
||||||
|
@ -767,13 +757,13 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Token::Mul => {
|
Token::Mul => {
|
||||||
return Ok(WildcardExpr::Wildcard);
|
return Ok(Expr::Wildcard);
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
};
|
};
|
||||||
|
|
||||||
self.index = index;
|
self.index = index;
|
||||||
self.parse_expr().map(WildcardExpr::Expr)
|
self.parse_expr()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a new expression
|
/// Parse a new expression
|
||||||
|
@ -969,10 +959,22 @@ impl<'a> Parser<'a> {
|
||||||
_ => match self.peek_token().token {
|
_ => match self.peek_token().token {
|
||||||
Token::LParen | Token::Period => {
|
Token::LParen | Token::Period => {
|
||||||
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
|
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
|
||||||
|
let mut ends_with_wildcard = false;
|
||||||
while self.consume_token(&Token::Period) {
|
while self.consume_token(&Token::Period) {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
match next_token.token {
|
match next_token.token {
|
||||||
Token::Word(w) => id_parts.push(w.to_ident()),
|
Token::Word(w) => id_parts.push(w.to_ident()),
|
||||||
|
Token::Mul => {
|
||||||
|
// Postgres explicitly allows funcnm(tablenm.*) and the
|
||||||
|
// function array_agg traverses this control flow
|
||||||
|
if dialect_of!(self is PostgreSqlDialect) {
|
||||||
|
ends_with_wildcard = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
return self
|
||||||
|
.expected("an identifier after '.'", next_token);
|
||||||
|
}
|
||||||
|
}
|
||||||
Token::SingleQuotedString(s) => {
|
Token::SingleQuotedString(s) => {
|
||||||
id_parts.push(Ident::with_quote('\'', s))
|
id_parts.push(Ident::with_quote('\'', s))
|
||||||
}
|
}
|
||||||
|
@ -983,7 +985,9 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.consume_token(&Token::LParen) {
|
if ends_with_wildcard {
|
||||||
|
Ok(Expr::QualifiedWildcard(ObjectName(id_parts)))
|
||||||
|
} else if self.consume_token(&Token::LParen) {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
self.parse_function(ObjectName(id_parts))
|
self.parse_function(ObjectName(id_parts))
|
||||||
} else {
|
} else {
|
||||||
|
@ -8051,9 +8055,9 @@ impl<'a> Parser<'a> {
|
||||||
let subquery = self.parse_query()?;
|
let subquery = self.parse_query()?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
return Ok((
|
return Ok((
|
||||||
vec![FunctionArg::Unnamed(FunctionArgExpr::from(
|
vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
|
||||||
WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))),
|
Box::new(subquery),
|
||||||
))],
|
)))],
|
||||||
vec![],
|
vec![],
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@ -8072,7 +8076,14 @@ impl<'a> Parser<'a> {
|
||||||
/// Parse a comma-delimited list of projections after SELECT
|
/// Parse a comma-delimited list of projections after SELECT
|
||||||
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
|
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
|
||||||
match self.parse_wildcard_expr()? {
|
match self.parse_wildcard_expr()? {
|
||||||
WildcardExpr::Expr(expr) => {
|
Expr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
|
||||||
|
prefix,
|
||||||
|
self.parse_wildcard_additional_options()?,
|
||||||
|
)),
|
||||||
|
Expr::Wildcard => Ok(SelectItem::Wildcard(
|
||||||
|
self.parse_wildcard_additional_options()?,
|
||||||
|
)),
|
||||||
|
expr => {
|
||||||
let expr: Expr = if self.dialect.supports_filter_during_aggregation()
|
let expr: Expr = if self.dialect.supports_filter_during_aggregation()
|
||||||
&& self.parse_keyword(Keyword::FILTER)
|
&& self.parse_keyword(Keyword::FILTER)
|
||||||
{
|
{
|
||||||
|
@ -8097,13 +8108,6 @@ impl<'a> Parser<'a> {
|
||||||
None => SelectItem::UnnamedExpr(expr),
|
None => SelectItem::UnnamedExpr(expr),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
WildcardExpr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
|
|
||||||
prefix,
|
|
||||||
self.parse_wildcard_additional_options()?,
|
|
||||||
)),
|
|
||||||
WildcardExpr::Wildcard => Ok(SelectItem::Wildcard(
|
|
||||||
self.parse_wildcard_additional_options()?,
|
|
||||||
)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2403,6 +2403,12 @@ fn parse_array_agg_func() {
|
||||||
] {
|
] {
|
||||||
supported_dialects.verified_stmt(sql);
|
supported_dialects.verified_stmt(sql);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// follows special-case array_agg code path. fails in everything except postgres
|
||||||
|
let wc_sql = "SELECT ARRAY_AGG(sections_tbl.*) AS sections FROM sections_tbl";
|
||||||
|
all_dialects_but_pg()
|
||||||
|
.parse_sql_statements(wc_sql)
|
||||||
|
.expect_err("should have failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -3800,3 +3800,22 @@ fn test_simple_insert_with_quoted_alias() {
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_array_agg() {
|
||||||
|
// follows general function with wildcard code path
|
||||||
|
let sql = r#"SELECT GREATEST(sections_tbl.*) AS sections FROM sections_tbl"#;
|
||||||
|
pg().verified_stmt(sql);
|
||||||
|
|
||||||
|
// follows special-case array_agg code path
|
||||||
|
let sql2 = "SELECT ARRAY_AGG(sections_tbl.*) AS sections FROM sections_tbl";
|
||||||
|
pg().verified_stmt(sql2);
|
||||||
|
|
||||||
|
// handles multi-part identifier with general code path
|
||||||
|
let sql3 = "SELECT GREATEST(my_schema.sections_tbl.*) AS sections FROM sections_tbl";
|
||||||
|
pg().verified_stmt(sql3);
|
||||||
|
|
||||||
|
// handles multi-part identifier with array_agg code path
|
||||||
|
let sql4 = "SELECT ARRAY_AGG(my_schema.sections_tbl.*) AS sections FROM sections_tbl";
|
||||||
|
pg().verified_stmt(sql4);
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue