ISSUE-1088: Fix array_agg wildcard behavior (#1093)

Co-authored-by: Andrew Repp <arepp@cloudflare.com>
This commit is contained in:
Andrew Repp 2024-01-24 06:42:39 -06:00 committed by GitHub
parent 398a81029e
commit 70764a17e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 111 additions and 36 deletions

View file

@ -380,7 +380,10 @@ pub enum Expr {
right: Box<Expr>,
},
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
CompositeAccess { expr: Box<Expr>, key: Ident },
CompositeAccess {
expr: Box<Expr>,
key: Ident,
},
/// `IS FALSE` operator
IsFalse(Box<Expr>),
/// `IS NOT FALSE` operator
@ -474,7 +477,10 @@ pub enum Expr {
right: Box<Expr>,
},
/// Unary operation e.g. `NOT foo`
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
UnaryOp {
op: UnaryOperator,
expr: Box<Expr>,
},
/// CONVERT a value to a different data type or character encoding. e.g. `CONVERT(foo USING utf8mb4)`
Convert {
/// The expression to convert
@ -545,7 +551,10 @@ pub enum Expr {
/// ```sql
/// POSITION(<expr> in <expr>)
/// ```
Position { expr: Box<Expr>, r#in: Box<Expr> },
Position {
expr: Box<Expr>,
r#in: Box<Expr>,
},
/// ```sql
/// SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])
/// ```
@ -589,20 +598,32 @@ pub enum Expr {
/// A literal value, such as string, number, date or NULL
Value(Value),
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
IntroducedString { introducer: String, value: Value },
IntroducedString {
introducer: String,
value: Value,
},
/// A constant of form `<data_type> 'value'`.
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
/// as well as constants of other types (a non-standard PostgreSQL extension).
TypedString { data_type: DataType, value: String },
TypedString {
data_type: DataType,
value: String,
},
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
/// Note that depending on the dialect, struct like accesses may be
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
MapAccess { column: Box<Expr>, keys: Vec<Expr> },
MapAccess {
column: Box<Expr>,
keys: Vec<Expr>,
},
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
/// Aggregate function with filter
AggregateExpressionWithFilter { expr: Box<Expr>, filter: Box<Expr> },
AggregateExpressionWithFilter {
expr: Box<Expr>,
filter: Box<Expr>,
},
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
///
/// Note we only recognize a complete single expression as `<condition>`,
@ -616,7 +637,10 @@ pub enum Expr {
},
/// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like
/// `WHERE [ NOT ] EXISTS (SELECT ...)`.
Exists { subquery: Box<Query>, negated: bool },
Exists {
subquery: Box<Query>,
negated: bool,
},
/// A parenthesized subquery `(SELECT ...)`, used in expression like
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
Subquery(Box<Query>),
@ -653,9 +677,15 @@ pub enum Expr {
/// 1 AS A
/// ```
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Named { expr: Box<Expr>, name: Ident },
Named {
expr: Box<Expr>,
name: Ident,
},
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
ArrayIndex {
obj: Box<Expr>,
indexes: Vec<Expr>,
},
/// An array expression e.g. `ARRAY[1, 2]`
Array(Array),
/// An interval expression e.g. `INTERVAL '1' YEAR`
@ -678,6 +708,10 @@ pub enum Expr {
/// `<search modifier>`
opt_search_modifier: Option<SearchModifier>,
},
Wildcard,
/// Qualified wildcard, e.g. `alias.*` or `schema.table.*`.
/// (Same caveats apply to `QualifiedWildcard` as to `Wildcard`.)
QualifiedWildcard(ObjectName),
}
impl fmt::Display for CastFormat {
@ -704,6 +738,8 @@ impl fmt::Display for Expr {
}
Ok(())
}
Expr::Wildcard => f.write_str("*"),
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),
Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"),
Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"),
@ -4188,6 +4224,16 @@ pub enum FunctionArgExpr {
Wildcard,
}
impl From<Expr> for FunctionArgExpr {
fn from(wildcard_expr: Expr) -> Self {
match wildcard_expr {
Expr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix),
Expr::Wildcard => Self::Wildcard,
expr => Self::Expr(expr),
}
}
}
impl fmt::Display for FunctionArgExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {

View file

@ -161,16 +161,6 @@ pub enum WildcardExpr {
Wildcard,
}
impl From<WildcardExpr> for FunctionArgExpr {
fn from(wildcard_expr: WildcardExpr) -> Self {
match wildcard_expr {
WildcardExpr::Expr(expr) => Self::Expr(expr),
WildcardExpr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix),
WildcardExpr::Wildcard => Self::Wildcard,
}
}
}
impl From<TokenizerError> for ParserError {
fn from(e: TokenizerError) -> Self {
ParserError::TokenizerError(e.to_string())
@ -734,7 +724,7 @@ impl<'a> Parser<'a> {
}
/// Parse a new expression including wildcard & qualified wildcard
pub fn parse_wildcard_expr(&mut self) -> Result<WildcardExpr, ParserError> {
pub fn parse_wildcard_expr(&mut self) -> Result<Expr, ParserError> {
let index = self.index;
let next_token = self.next_token();
@ -756,7 +746,7 @@ impl<'a> Parser<'a> {
id_parts.push(Ident::with_quote('\'', s))
}
Token::Mul => {
return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts)));
return Ok(Expr::QualifiedWildcard(ObjectName(id_parts)));
}
_ => {
return self
@ -767,13 +757,13 @@ impl<'a> Parser<'a> {
}
}
Token::Mul => {
return Ok(WildcardExpr::Wildcard);
return Ok(Expr::Wildcard);
}
_ => (),
};
self.index = index;
self.parse_expr().map(WildcardExpr::Expr)
self.parse_expr()
}
/// Parse a new expression
@ -969,10 +959,22 @@ impl<'a> Parser<'a> {
_ => match self.peek_token().token {
Token::LParen | Token::Period => {
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
let mut ends_with_wildcard = false;
while self.consume_token(&Token::Period) {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => id_parts.push(w.to_ident()),
Token::Mul => {
// Postgres explicitly allows funcnm(tablenm.*) and the
// function array_agg traverses this control flow
if dialect_of!(self is PostgreSqlDialect) {
ends_with_wildcard = true;
break;
} else {
return self
.expected("an identifier after '.'", next_token);
}
}
Token::SingleQuotedString(s) => {
id_parts.push(Ident::with_quote('\'', s))
}
@ -983,7 +985,9 @@ impl<'a> Parser<'a> {
}
}
if self.consume_token(&Token::LParen) {
if ends_with_wildcard {
Ok(Expr::QualifiedWildcard(ObjectName(id_parts)))
} else if self.consume_token(&Token::LParen) {
self.prev_token();
self.parse_function(ObjectName(id_parts))
} else {
@ -8051,9 +8055,9 @@ impl<'a> Parser<'a> {
let subquery = self.parse_query()?;
self.expect_token(&Token::RParen)?;
return Ok((
vec![FunctionArg::Unnamed(FunctionArgExpr::from(
WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))),
))],
vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
Box::new(subquery),
)))],
vec![],
));
}
@ -8072,7 +8076,14 @@ impl<'a> Parser<'a> {
/// Parse a comma-delimited list of projections after SELECT
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
match self.parse_wildcard_expr()? {
WildcardExpr::Expr(expr) => {
Expr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
prefix,
self.parse_wildcard_additional_options()?,
)),
Expr::Wildcard => Ok(SelectItem::Wildcard(
self.parse_wildcard_additional_options()?,
)),
expr => {
let expr: Expr = if self.dialect.supports_filter_during_aggregation()
&& self.parse_keyword(Keyword::FILTER)
{
@ -8097,13 +8108,6 @@ impl<'a> Parser<'a> {
None => SelectItem::UnnamedExpr(expr),
})
}
WildcardExpr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
prefix,
self.parse_wildcard_additional_options()?,
)),
WildcardExpr::Wildcard => Ok(SelectItem::Wildcard(
self.parse_wildcard_additional_options()?,
)),
}
}

View file

@ -2403,6 +2403,12 @@ fn parse_array_agg_func() {
] {
supported_dialects.verified_stmt(sql);
}
// follows special-case array_agg code path. fails in everything except postgres
let wc_sql = "SELECT ARRAY_AGG(sections_tbl.*) AS sections FROM sections_tbl";
all_dialects_but_pg()
.parse_sql_statements(wc_sql)
.expect_err("should have failed");
}
#[test]

View file

@ -3800,3 +3800,22 @@ fn test_simple_insert_with_quoted_alias() {
}
)
}
#[test]
fn parse_array_agg() {
// follows general function with wildcard code path
let sql = r#"SELECT GREATEST(sections_tbl.*) AS sections FROM sections_tbl"#;
pg().verified_stmt(sql);
// follows special-case array_agg code path
let sql2 = "SELECT ARRAY_AGG(sections_tbl.*) AS sections FROM sections_tbl";
pg().verified_stmt(sql2);
// handles multi-part identifier with general code path
let sql3 = "SELECT GREATEST(my_schema.sections_tbl.*) AS sections FROM sections_tbl";
pg().verified_stmt(sql3);
// handles multi-part identifier with array_agg code path
let sql4 = "SELECT ARRAY_AGG(my_schema.sections_tbl.*) AS sections FROM sections_tbl";
pg().verified_stmt(sql4);
}