mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-09 23:46:20 +00:00
Support BigQuery window function null treatment (#1239)
This commit is contained in:
parent
8626051513
commit
fb20f8ccbe
6 changed files with 204 additions and 37 deletions
|
@ -13,11 +13,16 @@
|
||||||
//! SQL Abstract Syntax Tree (AST) types
|
//! SQL Abstract Syntax Tree (AST) types
|
||||||
#[cfg(not(feature = "std"))]
|
#[cfg(not(feature = "std"))]
|
||||||
use alloc::{
|
use alloc::{
|
||||||
|
borrow::Cow,
|
||||||
boxed::Box,
|
boxed::Box,
|
||||||
format,
|
format,
|
||||||
string::{String, ToString},
|
string::{String, ToString},
|
||||||
vec::Vec,
|
vec::Vec,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use core::fmt::{self, Display};
|
use core::fmt::{self, Display};
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
|
@ -1406,6 +1411,35 @@ impl fmt::Display for NullTreatment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Specifies Ignore / Respect NULL within window functions.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub enum NullTreatmentType {
|
||||||
|
/// The declaration is part of the function's arguments.
|
||||||
|
///
|
||||||
|
/// ```sql
|
||||||
|
/// FIRST_VALUE(x IGNORE NULLS) OVER ()
|
||||||
|
/// ```
|
||||||
|
FunctionArg(NullTreatment),
|
||||||
|
/// The declaration occurs after the function call.
|
||||||
|
///
|
||||||
|
/// ```sql
|
||||||
|
/// FIRST_VALUE(x) IGNORE NULLS OVER ()
|
||||||
|
/// ```
|
||||||
|
AfterFunction(NullTreatment),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for NullTreatmentType {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let null_treatment = match self {
|
||||||
|
NullTreatmentType::FunctionArg(n) => n,
|
||||||
|
NullTreatmentType::AfterFunction(n) => n,
|
||||||
|
};
|
||||||
|
write!(f, "{null_treatment}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Specifies [WindowFrame]'s `start_bound` and `end_bound`
|
/// Specifies [WindowFrame]'s `start_bound` and `end_bound`
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
@ -4787,15 +4821,18 @@ pub struct Function {
|
||||||
pub args: Vec<FunctionArg>,
|
pub args: Vec<FunctionArg>,
|
||||||
/// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)`
|
/// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)`
|
||||||
pub filter: Option<Box<Expr>>,
|
pub filter: Option<Box<Expr>>,
|
||||||
// Snowflake/MSSQL supports different options for null treatment in rank functions
|
/// Specifies Ignore / Respect NULL within window functions.
|
||||||
pub null_treatment: Option<NullTreatment>,
|
///
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value)
|
||||||
|
/// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/first_value)
|
||||||
|
pub null_treatment: Option<NullTreatmentType>,
|
||||||
pub over: Option<WindowType>,
|
pub over: Option<WindowType>,
|
||||||
// aggregate functions may specify eg `COUNT(DISTINCT x)`
|
/// aggregate functions may specify eg `COUNT(DISTINCT x)`
|
||||||
pub distinct: bool,
|
pub distinct: bool,
|
||||||
// Some functions must be called without trailing parentheses, for example Postgres
|
/// Some functions must be called without trailing parentheses, for example Postgres
|
||||||
// do it for current_catalog, current_schema, etc. This flags is used for formatting.
|
/// do it for current_catalog, current_schema, etc. This flags is used for formatting.
|
||||||
pub special: bool,
|
pub special: bool,
|
||||||
// Required ordering for the function (if empty, there is no requirement).
|
/// Required ordering for the function (if empty, there is no requirement).
|
||||||
pub order_by: Vec<OrderByExpr>,
|
pub order_by: Vec<OrderByExpr>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4830,19 +4867,25 @@ impl fmt::Display for Function {
|
||||||
};
|
};
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{}({}{}{order_by}{})",
|
"{}({}{}{order_by}{}{})",
|
||||||
self.name,
|
self.name,
|
||||||
if self.distinct { "DISTINCT " } else { "" },
|
if self.distinct { "DISTINCT " } else { "" },
|
||||||
display_comma_separated(&self.args),
|
display_comma_separated(&self.args),
|
||||||
display_comma_separated(&self.order_by),
|
display_comma_separated(&self.order_by),
|
||||||
|
match self.null_treatment {
|
||||||
|
Some(NullTreatmentType::FunctionArg(null_treatment)) => {
|
||||||
|
Cow::from(format!(" {null_treatment}"))
|
||||||
|
}
|
||||||
|
_ => Cow::from(""),
|
||||||
|
}
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(filter_cond) = &self.filter {
|
if let Some(filter_cond) = &self.filter {
|
||||||
write!(f, " FILTER (WHERE {filter_cond})")?;
|
write!(f, " FILTER (WHERE {filter_cond})")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(o) = &self.null_treatment {
|
if let Some(NullTreatmentType::AfterFunction(null_treatment)) = &self.null_treatment {
|
||||||
write!(f, " {o}")?;
|
write!(f, " {null_treatment}")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(o) = &self.over {
|
if let Some(o) = &self.over {
|
||||||
|
|
|
@ -30,6 +30,11 @@ impl Dialect for BigQueryDialect {
|
||||||
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
|
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value)
|
||||||
|
fn supports_window_function_null_treatment_arg(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
|
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
|
||||||
fn supports_string_literal_backslash_escape(&self) -> bool {
|
fn supports_string_literal_backslash_escape(&self) -> bool {
|
||||||
true
|
true
|
||||||
|
|
|
@ -51,6 +51,10 @@ impl Dialect for GenericDialect {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn supports_window_function_null_treatment_arg(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
fn supports_dictionary_syntax(&self) -> bool {
|
fn supports_dictionary_syntax(&self) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
|
@ -185,6 +185,20 @@ pub trait Dialect: Debug + Any {
|
||||||
fn supports_named_fn_args_with_eq_operator(&self) -> bool {
|
fn supports_named_fn_args_with_eq_operator(&self) -> bool {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
/// Returns true if the dialects supports specifying null treatment
|
||||||
|
/// as part of a window function's parameter list. As opposed
|
||||||
|
/// to after the parameter list.
|
||||||
|
/// i.e The following syntax returns true
|
||||||
|
/// ```sql
|
||||||
|
/// FIRST_VALUE(a IGNORE NULLS) OVER ()
|
||||||
|
/// ```
|
||||||
|
/// while the following syntax returns false
|
||||||
|
/// ```sql
|
||||||
|
/// FIRST_VALUE(a) IGNORE NULLS OVER ()
|
||||||
|
/// ```
|
||||||
|
fn supports_window_function_null_treatment_arg(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
/// Returns true if the dialect supports defining structs or objects using a
|
/// Returns true if the dialect supports defining structs or objects using a
|
||||||
/// syntax like `{'x': 1, 'y': 2, 'z': 3}`.
|
/// syntax like `{'x': 1, 'y': 2, 'z': 3}`.
|
||||||
fn supports_dictionary_syntax(&self) -> bool {
|
fn supports_dictionary_syntax(&self) -> bool {
|
||||||
|
|
|
@ -208,6 +208,13 @@ impl From<bool> for MatchedTrailingBracket {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Output of the [`Parser::parse_window_function_args`] function.
|
||||||
|
struct ParseWindowFunctionArgsOutput {
|
||||||
|
args: Vec<FunctionArg>,
|
||||||
|
order_by: Vec<OrderByExpr>,
|
||||||
|
null_treatment: Option<NullTreatment>,
|
||||||
|
}
|
||||||
|
|
||||||
/// Options that control how the [`Parser`] parses SQL text
|
/// Options that control how the [`Parser`] parses SQL text
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct ParserOptions {
|
pub struct ParserOptions {
|
||||||
|
@ -1229,7 +1236,11 @@ impl<'a> Parser<'a> {
|
||||||
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
||||||
self.expect_token(&Token::LParen)?;
|
self.expect_token(&Token::LParen)?;
|
||||||
let distinct = self.parse_all_or_distinct()?.is_some();
|
let distinct = self.parse_all_or_distinct()?.is_some();
|
||||||
let (args, order_by) = self.parse_optional_args_with_orderby()?;
|
let ParseWindowFunctionArgsOutput {
|
||||||
|
args,
|
||||||
|
order_by,
|
||||||
|
null_treatment,
|
||||||
|
} = self.parse_window_function_args()?;
|
||||||
let filter = if self.dialect.supports_filter_during_aggregation()
|
let filter = if self.dialect.supports_filter_during_aggregation()
|
||||||
&& self.parse_keyword(Keyword::FILTER)
|
&& self.parse_keyword(Keyword::FILTER)
|
||||||
&& self.consume_token(&Token::LParen)
|
&& self.consume_token(&Token::LParen)
|
||||||
|
@ -1241,19 +1252,15 @@ impl<'a> Parser<'a> {
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
let null_treatment = match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE])
|
|
||||||
{
|
|
||||||
Some(keyword) => {
|
|
||||||
self.expect_keyword(Keyword::NULLS)?;
|
|
||||||
|
|
||||||
match keyword {
|
// Syntax for null treatment shows up either in the args list
|
||||||
Keyword::RESPECT => Some(NullTreatment::RespectNulls),
|
// or after the function call, but not both.
|
||||||
Keyword::IGNORE => Some(NullTreatment::IgnoreNulls),
|
let mut null_treatment = null_treatment.map(NullTreatmentType::FunctionArg);
|
||||||
_ => None,
|
if null_treatment.is_none() {
|
||||||
}
|
null_treatment = self
|
||||||
}
|
.parse_null_treatment()?
|
||||||
None => None,
|
.map(NullTreatmentType::AfterFunction);
|
||||||
};
|
}
|
||||||
let over = if self.parse_keyword(Keyword::OVER) {
|
let over = if self.parse_keyword(Keyword::OVER) {
|
||||||
if self.consume_token(&Token::LParen) {
|
if self.consume_token(&Token::LParen) {
|
||||||
let window_spec = self.parse_window_spec()?;
|
let window_spec = self.parse_window_spec()?;
|
||||||
|
@ -1276,17 +1283,37 @@ impl<'a> Parser<'a> {
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Optionally parses a null treatment clause.
|
||||||
|
fn parse_null_treatment(&mut self) -> Result<Option<NullTreatment>, ParserError> {
|
||||||
|
match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) {
|
||||||
|
Some(keyword) => {
|
||||||
|
self.expect_keyword(Keyword::NULLS)?;
|
||||||
|
|
||||||
|
Ok(match keyword {
|
||||||
|
Keyword::RESPECT => Some(NullTreatment::RespectNulls),
|
||||||
|
Keyword::IGNORE => Some(NullTreatment::IgnoreNulls),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_time_functions(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
pub fn parse_time_functions(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
||||||
let (args, order_by, special) = if self.consume_token(&Token::LParen) {
|
let (args, order_by, null_treatment, special) = if self.consume_token(&Token::LParen) {
|
||||||
let (args, order_by) = self.parse_optional_args_with_orderby()?;
|
let ParseWindowFunctionArgsOutput {
|
||||||
(args, order_by, false)
|
args,
|
||||||
|
order_by,
|
||||||
|
null_treatment,
|
||||||
|
} = self.parse_window_function_args()?;
|
||||||
|
(args, order_by, null_treatment, false)
|
||||||
} else {
|
} else {
|
||||||
(vec![], vec![], true)
|
(vec![], vec![], None, true)
|
||||||
};
|
};
|
||||||
Ok(Expr::Function(Function {
|
Ok(Expr::Function(Function {
|
||||||
name,
|
name,
|
||||||
args,
|
args,
|
||||||
null_treatment: None,
|
null_treatment: null_treatment.map(NullTreatmentType::FunctionArg),
|
||||||
filter: None,
|
filter: None,
|
||||||
over: None,
|
over: None,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
|
@ -9326,11 +9353,21 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_optional_args_with_orderby(
|
/// Parses a potentially empty list of arguments to a window function
|
||||||
&mut self,
|
/// (including the closing parenthesis).
|
||||||
) -> Result<(Vec<FunctionArg>, Vec<OrderByExpr>), ParserError> {
|
///
|
||||||
|
/// Examples:
|
||||||
|
/// ```sql
|
||||||
|
/// FIRST_VALUE(x ORDER BY 1,2,3);
|
||||||
|
/// FIRST_VALUE(x IGNORE NULL);
|
||||||
|
/// ```
|
||||||
|
fn parse_window_function_args(&mut self) -> Result<ParseWindowFunctionArgsOutput, ParserError> {
|
||||||
if self.consume_token(&Token::RParen) {
|
if self.consume_token(&Token::RParen) {
|
||||||
Ok((vec![], vec![]))
|
Ok(ParseWindowFunctionArgsOutput {
|
||||||
|
args: vec![],
|
||||||
|
order_by: vec![],
|
||||||
|
null_treatment: None,
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
// Snowflake permits a subquery to be passed as an argument without
|
// Snowflake permits a subquery to be passed as an argument without
|
||||||
// an enclosing set of parens if it's the only argument.
|
// an enclosing set of parens if it's the only argument.
|
||||||
|
@ -9342,22 +9379,34 @@ impl<'a> Parser<'a> {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
let subquery = self.parse_boxed_query()?;
|
let subquery = self.parse_boxed_query()?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
return Ok((
|
return Ok(ParseWindowFunctionArgsOutput {
|
||||||
vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
|
args: vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
|
||||||
subquery,
|
subquery,
|
||||||
)))],
|
)))],
|
||||||
vec![],
|
order_by: vec![],
|
||||||
));
|
null_treatment: None,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let args = self.parse_comma_separated(Parser::parse_function_args)?;
|
let args = self.parse_comma_separated(Parser::parse_function_args)?;
|
||||||
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
|
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
|
||||||
self.parse_comma_separated(Parser::parse_order_by_expr)?
|
self.parse_comma_separated(Parser::parse_order_by_expr)?
|
||||||
} else {
|
} else {
|
||||||
vec![]
|
Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let null_treatment = if self.dialect.supports_window_function_null_treatment_arg() {
|
||||||
|
self.parse_null_treatment()?
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok((args, order_by))
|
Ok(ParseWindowFunctionArgsOutput {
|
||||||
|
args,
|
||||||
|
order_by,
|
||||||
|
null_treatment,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2644,6 +2644,58 @@ fn parse_window_rank_function() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_window_function_null_treatment_arg() {
|
||||||
|
let dialects = all_dialects_where(|d| d.supports_window_function_null_treatment_arg());
|
||||||
|
let sql = "SELECT \
|
||||||
|
FIRST_VALUE(a IGNORE NULLS) OVER (), \
|
||||||
|
FIRST_VALUE(b RESPECT NULLS) OVER () \
|
||||||
|
FROM mytable";
|
||||||
|
let Select { projection, .. } = dialects.verified_only_select(sql);
|
||||||
|
for (i, (expected_expr, expected_null_treatment)) in [
|
||||||
|
("a", NullTreatment::IgnoreNulls),
|
||||||
|
("b", NullTreatment::RespectNulls),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
let SelectItem::UnnamedExpr(Expr::Function(actual)) = &projection[i] else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
assert_eq!(ObjectName(vec![Ident::new("FIRST_VALUE")]), actual.name);
|
||||||
|
assert!(actual.order_by.is_empty());
|
||||||
|
assert_eq!(1, actual.args.len());
|
||||||
|
let FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(actual_expr))) =
|
||||||
|
&actual.args[0]
|
||||||
|
else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
assert_eq!(&Ident::new(expected_expr), actual_expr);
|
||||||
|
let Some(NullTreatmentType::FunctionArg(actual_null_treatment)) = actual.null_treatment
|
||||||
|
else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
assert_eq!(expected_null_treatment, actual_null_treatment);
|
||||||
|
}
|
||||||
|
|
||||||
|
let sql = "SELECT FIRST_VALUE(a ORDER BY b IGNORE NULLS) OVER () FROM t1";
|
||||||
|
dialects.verified_stmt(sql);
|
||||||
|
|
||||||
|
let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1";
|
||||||
|
assert_eq!(
|
||||||
|
dialects.parse_sql_statements(sql).unwrap_err(),
|
||||||
|
ParserError::ParserError("Expected end of statement, found: NULLS".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1";
|
||||||
|
assert_eq!(
|
||||||
|
all_dialects_where(|d| !d.supports_window_function_null_treatment_arg())
|
||||||
|
.parse_sql_statements(sql)
|
||||||
|
.unwrap_err(),
|
||||||
|
ParserError::ParserError("Expected ), found: IGNORE".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_create_table() {
|
fn parse_create_table() {
|
||||||
let sql = "CREATE TABLE uk_cities (\
|
let sql = "CREATE TABLE uk_cities (\
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue