mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-25 16:34:04 +00:00
Support DISTINCT ON (...)
(#852)
* Support "DISTINCT ON (...)" * a test * fix the merge
This commit is contained in:
parent
f72e2ec382
commit
3b1076c194
7 changed files with 96 additions and 31 deletions
|
@ -34,10 +34,10 @@ pub use self::ddl::{
|
||||||
};
|
};
|
||||||
pub use self::operator::{BinaryOperator, UnaryOperator};
|
pub use self::operator::{BinaryOperator, UnaryOperator};
|
||||||
pub use self::query::{
|
pub use self::query::{
|
||||||
Cte, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, JoinConstraint,
|
Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join,
|
||||||
JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows, OrderByExpr,
|
JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows,
|
||||||
Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto,
|
OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select,
|
||||||
SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
|
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
|
||||||
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
|
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
|
||||||
};
|
};
|
||||||
pub use self::value::{
|
pub use self::value::{
|
||||||
|
|
|
@ -193,7 +193,7 @@ impl fmt::Display for Table {
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
pub struct Select {
|
pub struct Select {
|
||||||
pub distinct: bool,
|
pub distinct: Option<Distinct>,
|
||||||
/// MSSQL syntax: `TOP (<N>) [ PERCENT ] [ WITH TIES ]`
|
/// MSSQL syntax: `TOP (<N>) [ PERCENT ] [ WITH TIES ]`
|
||||||
pub top: Option<Top>,
|
pub top: Option<Top>,
|
||||||
/// projection expressions
|
/// projection expressions
|
||||||
|
@ -222,7 +222,10 @@ pub struct Select {
|
||||||
|
|
||||||
impl fmt::Display for Select {
|
impl fmt::Display for Select {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "SELECT{}", if self.distinct { " DISTINCT" } else { "" })?;
|
write!(f, "SELECT")?;
|
||||||
|
if let Some(ref distinct) = self.distinct {
|
||||||
|
write!(f, " {distinct}")?;
|
||||||
|
}
|
||||||
if let Some(ref top) = self.top {
|
if let Some(ref top) = self.top {
|
||||||
write!(f, " {top}")?;
|
write!(f, " {top}")?;
|
||||||
}
|
}
|
||||||
|
@ -1079,6 +1082,29 @@ impl fmt::Display for NonBlock {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub enum Distinct {
|
||||||
|
/// DISTINCT
|
||||||
|
Distinct,
|
||||||
|
|
||||||
|
/// DISTINCT ON({column names})
|
||||||
|
On(Vec<Expr>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Distinct {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Distinct::Distinct => write!(f, "DISTINCT"),
|
||||||
|
Distinct::On(col_names) => {
|
||||||
|
let col_names = display_comma_separated(col_names);
|
||||||
|
write!(f, "DISTINCT ON ({col_names})")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
@ -1105,7 +1131,7 @@ impl fmt::Display for Top {
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
pub struct Values {
|
pub struct Values {
|
||||||
/// Was there an explict ROWs keyword (MySQL)?
|
/// Was there an explicit ROWs keyword (MySQL)?
|
||||||
/// <https://dev.mysql.com/doc/refman/8.0/en/values.html>
|
/// <https://dev.mysql.com/doc/refman/8.0/en/values.html>
|
||||||
pub explicit_row: bool,
|
pub explicit_row: bool,
|
||||||
pub rows: Vec<Vec<Expr>>,
|
pub rows: Vec<Vec<Expr>>,
|
||||||
|
|
|
@ -879,7 +879,7 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
|
||||||
self.expect_token(&Token::LParen)?;
|
self.expect_token(&Token::LParen)?;
|
||||||
let distinct = self.parse_all_or_distinct()?;
|
let distinct = self.parse_all_or_distinct()?.is_some();
|
||||||
let args = self.parse_optional_args()?;
|
let args = self.parse_optional_args()?;
|
||||||
let over = if self.parse_keyword(Keyword::OVER) {
|
let over = if self.parse_keyword(Keyword::OVER) {
|
||||||
// TBD: support window names (`OVER mywin`) in place of inline specification
|
// TBD: support window names (`OVER mywin`) in place of inline specification
|
||||||
|
@ -1302,7 +1302,7 @@ impl<'a> Parser<'a> {
|
||||||
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
|
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
|
||||||
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
|
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
|
||||||
self.expect_token(&Token::LParen)?;
|
self.expect_token(&Token::LParen)?;
|
||||||
let distinct = self.parse_all_or_distinct()?;
|
let distinct = self.parse_all_or_distinct()?.is_some();
|
||||||
let expr = Box::new(self.parse_expr()?);
|
let expr = Box::new(self.parse_expr()?);
|
||||||
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
|
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
|
||||||
// choose to make the separator optional as this provides the more general implementation.
|
// choose to make the separator optional as this provides the more general implementation.
|
||||||
|
@ -2300,16 +2300,31 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
|
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
|
||||||
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
|
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
|
||||||
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
|
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
|
||||||
let all = self.parse_keyword(Keyword::ALL);
|
let all = self.parse_keyword(Keyword::ALL);
|
||||||
let distinct = self.parse_keyword(Keyword::DISTINCT);
|
let distinct = self.parse_keyword(Keyword::DISTINCT);
|
||||||
if all && distinct {
|
if !distinct {
|
||||||
parser_err!("Cannot specify both ALL and DISTINCT".to_string())
|
return Ok(None);
|
||||||
} else {
|
|
||||||
Ok(distinct)
|
|
||||||
}
|
}
|
||||||
|
if all {
|
||||||
|
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
|
||||||
|
}
|
||||||
|
let on = self.parse_keyword(Keyword::ON);
|
||||||
|
if !on {
|
||||||
|
return Ok(Some(Distinct::Distinct));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect_token(&Token::LParen)?;
|
||||||
|
let col_names = if self.consume_token(&Token::RParen) {
|
||||||
|
self.prev_token();
|
||||||
|
Vec::new()
|
||||||
|
} else {
|
||||||
|
self.parse_comma_separated(Parser::parse_expr)?
|
||||||
|
};
|
||||||
|
self.expect_token(&Token::RParen)?;
|
||||||
|
Ok(Some(Distinct::On(col_names)))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a SQL CREATE statement
|
/// Parse a SQL CREATE statement
|
||||||
|
|
|
@ -32,7 +32,7 @@ fn parse_map_access_expr() {
|
||||||
let select = clickhouse().verified_only_select(sql);
|
let select = clickhouse().verified_only_select(sql);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Select {
|
Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![UnnamedExpr(MapAccess {
|
projection: vec![UnnamedExpr(MapAccess {
|
||||||
column: Box::new(Identifier(Ident {
|
column: Box::new(Identifier(Ident {
|
||||||
|
|
|
@ -225,7 +225,7 @@ fn parse_update_set_from() {
|
||||||
subquery: Box::new(Query {
|
subquery: Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![
|
projection: vec![
|
||||||
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
|
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
|
||||||
|
@ -597,7 +597,7 @@ fn parse_top_level() {
|
||||||
fn parse_simple_select() {
|
fn parse_simple_select() {
|
||||||
let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5";
|
let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5";
|
||||||
let select = verified_only_select(sql);
|
let select = verified_only_select(sql);
|
||||||
assert!(!select.distinct);
|
assert!(select.distinct.is_none());
|
||||||
assert_eq!(3, select.projection.len());
|
assert_eq!(3, select.projection.len());
|
||||||
let select = verified_query(sql);
|
let select = verified_query(sql);
|
||||||
assert_eq!(Some(Expr::Value(number("5"))), select.limit);
|
assert_eq!(Some(Expr::Value(number("5"))), select.limit);
|
||||||
|
@ -622,7 +622,7 @@ fn parse_limit_is_not_an_alias() {
|
||||||
fn parse_select_distinct() {
|
fn parse_select_distinct() {
|
||||||
let sql = "SELECT DISTINCT name FROM customer";
|
let sql = "SELECT DISTINCT name FROM customer";
|
||||||
let select = verified_only_select(sql);
|
let select = verified_only_select(sql);
|
||||||
assert!(select.distinct);
|
assert!(select.distinct.is_some());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
|
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
|
||||||
only(&select.projection)
|
only(&select.projection)
|
||||||
|
@ -633,7 +633,7 @@ fn parse_select_distinct() {
|
||||||
fn parse_select_distinct_two_fields() {
|
fn parse_select_distinct_two_fields() {
|
||||||
let sql = "SELECT DISTINCT name, id FROM customer";
|
let sql = "SELECT DISTINCT name, id FROM customer";
|
||||||
let select = verified_only_select(sql);
|
let select = verified_only_select(sql);
|
||||||
assert!(select.distinct);
|
assert!(select.distinct.is_some());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
|
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
|
||||||
&select.projection[0]
|
&select.projection[0]
|
||||||
|
@ -657,6 +657,30 @@ fn parse_select_distinct_tuple() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_distinct_on() {
|
||||||
|
let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds";
|
||||||
|
let select = verified_only_select(sql);
|
||||||
|
assert_eq!(
|
||||||
|
&Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])),
|
||||||
|
&select.distinct
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds";
|
||||||
|
let select = verified_only_select(sql);
|
||||||
|
assert_eq!(&Some(Distinct::On(vec![])), &select.distinct);
|
||||||
|
|
||||||
|
let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track";
|
||||||
|
let select = verified_only_select(sql);
|
||||||
|
assert_eq!(
|
||||||
|
&Some(Distinct::On(vec![
|
||||||
|
Expr::Identifier(Ident::new("album_id")),
|
||||||
|
Expr::Identifier(Ident::new("milliseconds")),
|
||||||
|
])),
|
||||||
|
&select.distinct
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_select_distinct_missing_paren() {
|
fn parse_select_distinct_missing_paren() {
|
||||||
let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer");
|
let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer");
|
||||||
|
@ -3517,7 +3541,7 @@ fn parse_interval_and_or_xor() {
|
||||||
let expected_ast = vec![Statement::Query(Box::new(Query {
|
let expected_ast = vec![Statement::Query(Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![UnnamedExpr(Expr::Identifier(Ident {
|
projection: vec![UnnamedExpr(Expr::Identifier(Ident {
|
||||||
value: "col".to_string(),
|
value: "col".to_string(),
|
||||||
|
@ -5834,7 +5858,7 @@ fn parse_merge() {
|
||||||
subquery: Box::new(Query {
|
subquery: Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::Wildcard(
|
projection: vec![SelectItem::Wildcard(
|
||||||
WildcardAdditionalOptions::default()
|
WildcardAdditionalOptions::default()
|
||||||
|
|
|
@ -445,7 +445,7 @@ fn parse_quote_identifiers_2() {
|
||||||
Statement::Query(Box::new(Query {
|
Statement::Query(Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||||
value: "quoted ` identifier".into(),
|
value: "quoted ` identifier".into(),
|
||||||
|
@ -479,7 +479,7 @@ fn parse_quote_identifiers_3() {
|
||||||
Statement::Query(Box::new(Query {
|
Statement::Query(Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||||
value: "`quoted identifier`".into(),
|
value: "`quoted identifier`".into(),
|
||||||
|
@ -857,7 +857,7 @@ fn parse_select_with_numeric_prefix_column_name() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
q.body,
|
q.body,
|
||||||
Box::new(SetExpr::Select(Box::new(Select {
|
Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
|
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
|
||||||
"123col_$@123abc"
|
"123col_$@123abc"
|
||||||
|
@ -896,7 +896,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
q.body,
|
q.body,
|
||||||
Box::new(SetExpr::Select(Box::new(Select {
|
Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![
|
projection: vec![
|
||||||
SelectItem::UnnamedExpr(Expr::Value(Value::Number(
|
SelectItem::UnnamedExpr(Expr::Value(Value::Number(
|
||||||
|
@ -1075,7 +1075,7 @@ fn parse_substring_in_select() {
|
||||||
Box::new(Query {
|
Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: true,
|
distinct: Some(Distinct::Distinct),
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::Substring {
|
projection: vec![SelectItem::UnnamedExpr(Expr::Substring {
|
||||||
expr: Box::new(Expr::Identifier(Ident {
|
expr: Box::new(Expr::Identifier(Ident {
|
||||||
|
@ -1372,7 +1372,7 @@ fn parse_hex_string_introducer() {
|
||||||
Statement::Query(Box::new(Query {
|
Statement::Query(Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
|
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
|
||||||
introducer: "_latin1".to_string(),
|
introducer: "_latin1".to_string(),
|
||||||
|
|
|
@ -1694,7 +1694,7 @@ fn parse_array_subquery_expr() {
|
||||||
op: SetOperator::Union,
|
op: SetOperator::Union,
|
||||||
set_quantifier: SetQuantifier::None,
|
set_quantifier: SetQuantifier::None,
|
||||||
left: Box::new(SetExpr::Select(Box::new(Select {
|
left: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
|
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
|
||||||
#[cfg(not(feature = "bigdecimal"))]
|
#[cfg(not(feature = "bigdecimal"))]
|
||||||
|
@ -1715,7 +1715,7 @@ fn parse_array_subquery_expr() {
|
||||||
qualify: None,
|
qualify: None,
|
||||||
}))),
|
}))),
|
||||||
right: Box::new(SetExpr::Select(Box::new(Select {
|
right: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
distinct: false,
|
distinct: None,
|
||||||
top: None,
|
top: None,
|
||||||
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
|
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
|
||||||
#[cfg(not(feature = "bigdecimal"))]
|
#[cfg(not(feature = "bigdecimal"))]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue