Support DISTINCT ON (...) (#852)

* Support "DISTINCT ON (...)"

* a test

* fix the merge
This commit is contained in:
Aljaž Mur Eržen 2023-04-27 21:34:54 +02:00 committed by GitHub
parent f72e2ec382
commit 3b1076c194
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 96 additions and 31 deletions

View file

@ -34,10 +34,10 @@ pub use self::ddl::{
}; };
pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{ pub use self::query::{
Cte, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, JoinConstraint, Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join,
JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows, OrderByExpr, JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows,
Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select,
SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
TableWithJoins, Top, Values, WildcardAdditionalOptions, With, TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
}; };
pub use self::value::{ pub use self::value::{

View file

@ -193,7 +193,7 @@ impl fmt::Display for Table {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Select { pub struct Select {
pub distinct: bool, pub distinct: Option<Distinct>,
/// MSSQL syntax: `TOP (<N>) [ PERCENT ] [ WITH TIES ]` /// MSSQL syntax: `TOP (<N>) [ PERCENT ] [ WITH TIES ]`
pub top: Option<Top>, pub top: Option<Top>,
/// projection expressions /// projection expressions
@ -222,7 +222,10 @@ pub struct Select {
impl fmt::Display for Select { impl fmt::Display for Select {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "SELECT{}", if self.distinct { " DISTINCT" } else { "" })?; write!(f, "SELECT")?;
if let Some(ref distinct) = self.distinct {
write!(f, " {distinct}")?;
}
if let Some(ref top) = self.top { if let Some(ref top) = self.top {
write!(f, " {top}")?; write!(f, " {top}")?;
} }
@ -1079,6 +1082,29 @@ impl fmt::Display for NonBlock {
} }
} }
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum Distinct {
/// DISTINCT
Distinct,
/// DISTINCT ON({column names})
On(Vec<Expr>),
}
impl fmt::Display for Distinct {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Distinct::Distinct => write!(f, "DISTINCT"),
Distinct::On(col_names) => {
let col_names = display_comma_separated(col_names);
write!(f, "DISTINCT ON ({col_names})")
}
}
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
@ -1105,7 +1131,7 @@ impl fmt::Display for Top {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Values { pub struct Values {
/// Was there an explict ROWs keyword (MySQL)? /// Was there an explicit ROWs keyword (MySQL)?
/// <https://dev.mysql.com/doc/refman/8.0/en/values.html> /// <https://dev.mysql.com/doc/refman/8.0/en/values.html>
pub explicit_row: bool, pub explicit_row: bool,
pub rows: Vec<Vec<Expr>>, pub rows: Vec<Vec<Expr>>,

View file

@ -879,7 +879,7 @@ impl<'a> Parser<'a> {
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> { pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?; self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?; let distinct = self.parse_all_or_distinct()?.is_some();
let args = self.parse_optional_args()?; let args = self.parse_optional_args()?;
let over = if self.parse_keyword(Keyword::OVER) { let over = if self.parse_keyword(Keyword::OVER) {
// TBD: support window names (`OVER mywin`) in place of inline specification // TBD: support window names (`OVER mywin`) in place of inline specification
@ -1302,7 +1302,7 @@ impl<'a> Parser<'a> {
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> { pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?; self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?; let distinct = self.parse_all_or_distinct()?.is_some();
let expr = Box::new(self.parse_expr()?); let expr = Box::new(self.parse_expr()?);
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we // While ANSI SQL would would require the separator, Redshift makes this optional. Here we
// choose to make the separator optional as this provides the more general implementation. // choose to make the separator optional as this provides the more general implementation.
@ -2300,16 +2300,31 @@ impl<'a> Parser<'a> {
} }
} }
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
/// `ParserError` if both `ALL` and `DISTINCT` are fround. /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> { pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
let all = self.parse_keyword(Keyword::ALL); let all = self.parse_keyword(Keyword::ALL);
let distinct = self.parse_keyword(Keyword::DISTINCT); let distinct = self.parse_keyword(Keyword::DISTINCT);
if all && distinct { if !distinct {
parser_err!("Cannot specify both ALL and DISTINCT".to_string()) return Ok(None);
} else {
Ok(distinct)
} }
if all {
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
}
let on = self.parse_keyword(Keyword::ON);
if !on {
return Ok(Some(Distinct::Distinct));
}
self.expect_token(&Token::LParen)?;
let col_names = if self.consume_token(&Token::RParen) {
self.prev_token();
Vec::new()
} else {
self.parse_comma_separated(Parser::parse_expr)?
};
self.expect_token(&Token::RParen)?;
Ok(Some(Distinct::On(col_names)))
} }
/// Parse a SQL CREATE statement /// Parse a SQL CREATE statement

View file

@ -32,7 +32,7 @@ fn parse_map_access_expr() {
let select = clickhouse().verified_only_select(sql); let select = clickhouse().verified_only_select(sql);
assert_eq!( assert_eq!(
Select { Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![UnnamedExpr(MapAccess { projection: vec![UnnamedExpr(MapAccess {
column: Box::new(Identifier(Ident { column: Box::new(Identifier(Ident {

View file

@ -225,7 +225,7 @@ fn parse_update_set_from() {
subquery: Box::new(Query { subquery: Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![ projection: vec![
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
@ -597,7 +597,7 @@ fn parse_top_level() {
fn parse_simple_select() { fn parse_simple_select() {
let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5"; let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5";
let select = verified_only_select(sql); let select = verified_only_select(sql);
assert!(!select.distinct); assert!(select.distinct.is_none());
assert_eq!(3, select.projection.len()); assert_eq!(3, select.projection.len());
let select = verified_query(sql); let select = verified_query(sql);
assert_eq!(Some(Expr::Value(number("5"))), select.limit); assert_eq!(Some(Expr::Value(number("5"))), select.limit);
@ -622,7 +622,7 @@ fn parse_limit_is_not_an_alias() {
fn parse_select_distinct() { fn parse_select_distinct() {
let sql = "SELECT DISTINCT name FROM customer"; let sql = "SELECT DISTINCT name FROM customer";
let select = verified_only_select(sql); let select = verified_only_select(sql);
assert!(select.distinct); assert!(select.distinct.is_some());
assert_eq!( assert_eq!(
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), &SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
only(&select.projection) only(&select.projection)
@ -633,7 +633,7 @@ fn parse_select_distinct() {
fn parse_select_distinct_two_fields() { fn parse_select_distinct_two_fields() {
let sql = "SELECT DISTINCT name, id FROM customer"; let sql = "SELECT DISTINCT name, id FROM customer";
let select = verified_only_select(sql); let select = verified_only_select(sql);
assert!(select.distinct); assert!(select.distinct.is_some());
assert_eq!( assert_eq!(
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), &SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
&select.projection[0] &select.projection[0]
@ -657,6 +657,30 @@ fn parse_select_distinct_tuple() {
); );
} }
#[test]
fn parse_select_distinct_on() {
let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds";
let select = verified_only_select(sql);
assert_eq!(
&Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])),
&select.distinct
);
let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds";
let select = verified_only_select(sql);
assert_eq!(&Some(Distinct::On(vec![])), &select.distinct);
let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track";
let select = verified_only_select(sql);
assert_eq!(
&Some(Distinct::On(vec![
Expr::Identifier(Ident::new("album_id")),
Expr::Identifier(Ident::new("milliseconds")),
])),
&select.distinct
);
}
#[test] #[test]
fn parse_select_distinct_missing_paren() { fn parse_select_distinct_missing_paren() {
let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer"); let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer");
@ -3517,7 +3541,7 @@ fn parse_interval_and_or_xor() {
let expected_ast = vec![Statement::Query(Box::new(Query { let expected_ast = vec![Statement::Query(Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![UnnamedExpr(Expr::Identifier(Ident { projection: vec![UnnamedExpr(Expr::Identifier(Ident {
value: "col".to_string(), value: "col".to_string(),
@ -5834,7 +5858,7 @@ fn parse_merge() {
subquery: Box::new(Query { subquery: Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::Wildcard( projection: vec![SelectItem::Wildcard(
WildcardAdditionalOptions::default() WildcardAdditionalOptions::default()

View file

@ -445,7 +445,7 @@ fn parse_quote_identifiers_2() {
Statement::Query(Box::new(Query { Statement::Query(Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "quoted ` identifier".into(), value: "quoted ` identifier".into(),
@ -479,7 +479,7 @@ fn parse_quote_identifiers_3() {
Statement::Query(Box::new(Query { Statement::Query(Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "`quoted identifier`".into(), value: "`quoted identifier`".into(),
@ -857,7 +857,7 @@ fn parse_select_with_numeric_prefix_column_name() {
assert_eq!( assert_eq!(
q.body, q.body,
Box::new(SetExpr::Select(Box::new(Select { Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new( projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
"123col_$@123abc" "123col_$@123abc"
@ -896,7 +896,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() {
assert_eq!( assert_eq!(
q.body, q.body,
Box::new(SetExpr::Select(Box::new(Select { Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![ projection: vec![
SelectItem::UnnamedExpr(Expr::Value(Value::Number( SelectItem::UnnamedExpr(Expr::Value(Value::Number(
@ -1075,7 +1075,7 @@ fn parse_substring_in_select() {
Box::new(Query { Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: true, distinct: Some(Distinct::Distinct),
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Substring { projection: vec![SelectItem::UnnamedExpr(Expr::Substring {
expr: Box::new(Expr::Identifier(Ident { expr: Box::new(Expr::Identifier(Ident {
@ -1372,7 +1372,7 @@ fn parse_hex_string_introducer() {
Statement::Query(Box::new(Query { Statement::Query(Box::new(Query {
with: None, with: None,
body: Box::new(SetExpr::Select(Box::new(Select { body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString { projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
introducer: "_latin1".to_string(), introducer: "_latin1".to_string(),

View file

@ -1694,7 +1694,7 @@ fn parse_array_subquery_expr() {
op: SetOperator::Union, op: SetOperator::Union,
set_quantifier: SetQuantifier::None, set_quantifier: SetQuantifier::None,
left: Box::new(SetExpr::Select(Box::new(Select { left: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number( projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
#[cfg(not(feature = "bigdecimal"))] #[cfg(not(feature = "bigdecimal"))]
@ -1715,7 +1715,7 @@ fn parse_array_subquery_expr() {
qualify: None, qualify: None,
}))), }))),
right: Box::new(SetExpr::Select(Box::new(Select { right: Box::new(SetExpr::Select(Box::new(Select {
distinct: false, distinct: None,
top: None, top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number( projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
#[cfg(not(feature = "bigdecimal"))] #[cfg(not(feature = "bigdecimal"))]