mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-22 15:04:04 +00:00
ISSUE-1088: Fix array_agg wildcard behavior (#1093)
Co-authored-by: Andrew Repp <arepp@cloudflare.com>
This commit is contained in:
parent
398a81029e
commit
70764a17e9
4 changed files with 111 additions and 36 deletions
|
@ -380,7 +380,10 @@ pub enum Expr {
|
|||
right: Box<Expr>,
|
||||
},
|
||||
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
|
||||
CompositeAccess { expr: Box<Expr>, key: Ident },
|
||||
CompositeAccess {
|
||||
expr: Box<Expr>,
|
||||
key: Ident,
|
||||
},
|
||||
/// `IS FALSE` operator
|
||||
IsFalse(Box<Expr>),
|
||||
/// `IS NOT FALSE` operator
|
||||
|
@ -474,7 +477,10 @@ pub enum Expr {
|
|||
right: Box<Expr>,
|
||||
},
|
||||
/// Unary operation e.g. `NOT foo`
|
||||
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
|
||||
UnaryOp {
|
||||
op: UnaryOperator,
|
||||
expr: Box<Expr>,
|
||||
},
|
||||
/// CONVERT a value to a different data type or character encoding. e.g. `CONVERT(foo USING utf8mb4)`
|
||||
Convert {
|
||||
/// The expression to convert
|
||||
|
@ -545,7 +551,10 @@ pub enum Expr {
|
|||
/// ```sql
|
||||
/// POSITION(<expr> in <expr>)
|
||||
/// ```
|
||||
Position { expr: Box<Expr>, r#in: Box<Expr> },
|
||||
Position {
|
||||
expr: Box<Expr>,
|
||||
r#in: Box<Expr>,
|
||||
},
|
||||
/// ```sql
|
||||
/// SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])
|
||||
/// ```
|
||||
|
@ -589,20 +598,32 @@ pub enum Expr {
|
|||
/// A literal value, such as string, number, date or NULL
|
||||
Value(Value),
|
||||
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
|
||||
IntroducedString { introducer: String, value: Value },
|
||||
IntroducedString {
|
||||
introducer: String,
|
||||
value: Value,
|
||||
},
|
||||
/// A constant of form `<data_type> 'value'`.
|
||||
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
||||
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
||||
TypedString { data_type: DataType, value: String },
|
||||
TypedString {
|
||||
data_type: DataType,
|
||||
value: String,
|
||||
},
|
||||
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
|
||||
/// Note that depending on the dialect, struct like accesses may be
|
||||
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
|
||||
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
|
||||
MapAccess { column: Box<Expr>, keys: Vec<Expr> },
|
||||
MapAccess {
|
||||
column: Box<Expr>,
|
||||
keys: Vec<Expr>,
|
||||
},
|
||||
/// Scalar function call e.g. `LEFT(foo, 5)`
|
||||
Function(Function),
|
||||
/// Aggregate function with filter
|
||||
AggregateExpressionWithFilter { expr: Box<Expr>, filter: Box<Expr> },
|
||||
AggregateExpressionWithFilter {
|
||||
expr: Box<Expr>,
|
||||
filter: Box<Expr>,
|
||||
},
|
||||
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
|
||||
///
|
||||
/// Note we only recognize a complete single expression as `<condition>`,
|
||||
|
@ -616,7 +637,10 @@ pub enum Expr {
|
|||
},
|
||||
/// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like
|
||||
/// `WHERE [ NOT ] EXISTS (SELECT ...)`.
|
||||
Exists { subquery: Box<Query>, negated: bool },
|
||||
Exists {
|
||||
subquery: Box<Query>,
|
||||
negated: bool,
|
||||
},
|
||||
/// A parenthesized subquery `(SELECT ...)`, used in expression like
|
||||
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
|
||||
Subquery(Box<Query>),
|
||||
|
@ -653,9 +677,15 @@ pub enum Expr {
|
|||
/// 1 AS A
|
||||
/// ```
|
||||
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||
Named { expr: Box<Expr>, name: Ident },
|
||||
Named {
|
||||
expr: Box<Expr>,
|
||||
name: Ident,
|
||||
},
|
||||
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
|
||||
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
|
||||
ArrayIndex {
|
||||
obj: Box<Expr>,
|
||||
indexes: Vec<Expr>,
|
||||
},
|
||||
/// An array expression e.g. `ARRAY[1, 2]`
|
||||
Array(Array),
|
||||
/// An interval expression e.g. `INTERVAL '1' YEAR`
|
||||
|
@ -678,6 +708,10 @@ pub enum Expr {
|
|||
/// `<search modifier>`
|
||||
opt_search_modifier: Option<SearchModifier>,
|
||||
},
|
||||
Wildcard,
|
||||
/// Qualified wildcard, e.g. `alias.*` or `schema.table.*`.
|
||||
/// (Same caveats apply to `QualifiedWildcard` as to `Wildcard`.)
|
||||
QualifiedWildcard(ObjectName),
|
||||
}
|
||||
|
||||
impl fmt::Display for CastFormat {
|
||||
|
@ -704,6 +738,8 @@ impl fmt::Display for Expr {
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
Expr::Wildcard => f.write_str("*"),
|
||||
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),
|
||||
Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
|
||||
Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"),
|
||||
Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"),
|
||||
|
@ -4188,6 +4224,16 @@ pub enum FunctionArgExpr {
|
|||
Wildcard,
|
||||
}
|
||||
|
||||
impl From<Expr> for FunctionArgExpr {
|
||||
fn from(wildcard_expr: Expr) -> Self {
|
||||
match wildcard_expr {
|
||||
Expr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix),
|
||||
Expr::Wildcard => Self::Wildcard,
|
||||
expr => Self::Expr(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FunctionArgExpr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
|
|
|
@ -161,16 +161,6 @@ pub enum WildcardExpr {
|
|||
Wildcard,
|
||||
}
|
||||
|
||||
impl From<WildcardExpr> for FunctionArgExpr {
|
||||
fn from(wildcard_expr: WildcardExpr) -> Self {
|
||||
match wildcard_expr {
|
||||
WildcardExpr::Expr(expr) => Self::Expr(expr),
|
||||
WildcardExpr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix),
|
||||
WildcardExpr::Wildcard => Self::Wildcard,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TokenizerError> for ParserError {
|
||||
fn from(e: TokenizerError) -> Self {
|
||||
ParserError::TokenizerError(e.to_string())
|
||||
|
@ -734,7 +724,7 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
/// Parse a new expression including wildcard & qualified wildcard
|
||||
pub fn parse_wildcard_expr(&mut self) -> Result<WildcardExpr, ParserError> {
|
||||
pub fn parse_wildcard_expr(&mut self) -> Result<Expr, ParserError> {
|
||||
let index = self.index;
|
||||
|
||||
let next_token = self.next_token();
|
||||
|
@ -756,7 +746,7 @@ impl<'a> Parser<'a> {
|
|||
id_parts.push(Ident::with_quote('\'', s))
|
||||
}
|
||||
Token::Mul => {
|
||||
return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts)));
|
||||
return Ok(Expr::QualifiedWildcard(ObjectName(id_parts)));
|
||||
}
|
||||
_ => {
|
||||
return self
|
||||
|
@ -767,13 +757,13 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
Token::Mul => {
|
||||
return Ok(WildcardExpr::Wildcard);
|
||||
return Ok(Expr::Wildcard);
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
|
||||
self.index = index;
|
||||
self.parse_expr().map(WildcardExpr::Expr)
|
||||
self.parse_expr()
|
||||
}
|
||||
|
||||
/// Parse a new expression
|
||||
|
@ -969,10 +959,22 @@ impl<'a> Parser<'a> {
|
|||
_ => match self.peek_token().token {
|
||||
Token::LParen | Token::Period => {
|
||||
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
|
||||
let mut ends_with_wildcard = false;
|
||||
while self.consume_token(&Token::Period) {
|
||||
let next_token = self.next_token();
|
||||
match next_token.token {
|
||||
Token::Word(w) => id_parts.push(w.to_ident()),
|
||||
Token::Mul => {
|
||||
// Postgres explicitly allows funcnm(tablenm.*) and the
|
||||
// function array_agg traverses this control flow
|
||||
if dialect_of!(self is PostgreSqlDialect) {
|
||||
ends_with_wildcard = true;
|
||||
break;
|
||||
} else {
|
||||
return self
|
||||
.expected("an identifier after '.'", next_token);
|
||||
}
|
||||
}
|
||||
Token::SingleQuotedString(s) => {
|
||||
id_parts.push(Ident::with_quote('\'', s))
|
||||
}
|
||||
|
@ -983,7 +985,9 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
if self.consume_token(&Token::LParen) {
|
||||
if ends_with_wildcard {
|
||||
Ok(Expr::QualifiedWildcard(ObjectName(id_parts)))
|
||||
} else if self.consume_token(&Token::LParen) {
|
||||
self.prev_token();
|
||||
self.parse_function(ObjectName(id_parts))
|
||||
} else {
|
||||
|
@ -8051,9 +8055,9 @@ impl<'a> Parser<'a> {
|
|||
let subquery = self.parse_query()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
return Ok((
|
||||
vec![FunctionArg::Unnamed(FunctionArgExpr::from(
|
||||
WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))),
|
||||
))],
|
||||
vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
|
||||
Box::new(subquery),
|
||||
)))],
|
||||
vec![],
|
||||
));
|
||||
}
|
||||
|
@ -8072,7 +8076,14 @@ impl<'a> Parser<'a> {
|
|||
/// Parse a comma-delimited list of projections after SELECT
|
||||
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
|
||||
match self.parse_wildcard_expr()? {
|
||||
WildcardExpr::Expr(expr) => {
|
||||
Expr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
|
||||
prefix,
|
||||
self.parse_wildcard_additional_options()?,
|
||||
)),
|
||||
Expr::Wildcard => Ok(SelectItem::Wildcard(
|
||||
self.parse_wildcard_additional_options()?,
|
||||
)),
|
||||
expr => {
|
||||
let expr: Expr = if self.dialect.supports_filter_during_aggregation()
|
||||
&& self.parse_keyword(Keyword::FILTER)
|
||||
{
|
||||
|
@ -8097,13 +8108,6 @@ impl<'a> Parser<'a> {
|
|||
None => SelectItem::UnnamedExpr(expr),
|
||||
})
|
||||
}
|
||||
WildcardExpr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
|
||||
prefix,
|
||||
self.parse_wildcard_additional_options()?,
|
||||
)),
|
||||
WildcardExpr::Wildcard => Ok(SelectItem::Wildcard(
|
||||
self.parse_wildcard_additional_options()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2403,6 +2403,12 @@ fn parse_array_agg_func() {
|
|||
] {
|
||||
supported_dialects.verified_stmt(sql);
|
||||
}
|
||||
|
||||
// follows special-case array_agg code path. fails in everything except postgres
|
||||
let wc_sql = "SELECT ARRAY_AGG(sections_tbl.*) AS sections FROM sections_tbl";
|
||||
all_dialects_but_pg()
|
||||
.parse_sql_statements(wc_sql)
|
||||
.expect_err("should have failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -3800,3 +3800,22 @@ fn test_simple_insert_with_quoted_alias() {
|
|||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_array_agg() {
|
||||
// follows general function with wildcard code path
|
||||
let sql = r#"SELECT GREATEST(sections_tbl.*) AS sections FROM sections_tbl"#;
|
||||
pg().verified_stmt(sql);
|
||||
|
||||
// follows special-case array_agg code path
|
||||
let sql2 = "SELECT ARRAY_AGG(sections_tbl.*) AS sections FROM sections_tbl";
|
||||
pg().verified_stmt(sql2);
|
||||
|
||||
// handles multi-part identifier with general code path
|
||||
let sql3 = "SELECT GREATEST(my_schema.sections_tbl.*) AS sections FROM sections_tbl";
|
||||
pg().verified_stmt(sql3);
|
||||
|
||||
// handles multi-part identifier with array_agg code path
|
||||
let sql4 = "SELECT ARRAY_AGG(my_schema.sections_tbl.*) AS sections FROM sections_tbl";
|
||||
pg().verified_stmt(sql4);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue