Support DISTINCT ON (...) (#852)

* Support "DISTINCT ON (...)"

* a test

* fix the merge
This commit is contained in:
Aljaž Mur Eržen 2023-04-27 21:34:54 +02:00 committed by GitHub
parent f72e2ec382
commit 3b1076c194
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 96 additions and 31 deletions

View file

@ -34,10 +34,10 @@ pub use self::ddl::{
};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
Cte, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, JoinConstraint,
JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows, OrderByExpr,
Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto,
SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join,
JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows,
OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select,
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{

View file

@ -193,7 +193,7 @@ impl fmt::Display for Table {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Select {
pub distinct: bool,
pub distinct: Option<Distinct>,
/// MSSQL syntax: `TOP (<N>) [ PERCENT ] [ WITH TIES ]`
pub top: Option<Top>,
/// projection expressions
@ -222,7 +222,10 @@ pub struct Select {
impl fmt::Display for Select {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "SELECT{}", if self.distinct { " DISTINCT" } else { "" })?;
write!(f, "SELECT")?;
if let Some(ref distinct) = self.distinct {
write!(f, " {distinct}")?;
}
if let Some(ref top) = self.top {
write!(f, " {top}")?;
}
@ -1079,6 +1082,29 @@ impl fmt::Display for NonBlock {
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum Distinct {
/// DISTINCT
Distinct,
/// DISTINCT ON({column names})
On(Vec<Expr>),
}
impl fmt::Display for Distinct {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Distinct::Distinct => write!(f, "DISTINCT"),
Distinct::On(col_names) => {
let col_names = display_comma_separated(col_names);
write!(f, "DISTINCT ON ({col_names})")
}
}
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
@ -1105,7 +1131,7 @@ impl fmt::Display for Top {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Values {
/// Was there an explict ROWs keyword (MySQL)?
/// Was there an explicit ROWs keyword (MySQL)?
/// <https://dev.mysql.com/doc/refman/8.0/en/values.html>
pub explicit_row: bool,
pub rows: Vec<Vec<Expr>>,

View file

@ -879,7 +879,7 @@ impl<'a> Parser<'a> {
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?;
let distinct = self.parse_all_or_distinct()?.is_some();
let args = self.parse_optional_args()?;
let over = if self.parse_keyword(Keyword::OVER) {
// TBD: support window names (`OVER mywin`) in place of inline specification
@ -1302,7 +1302,7 @@ impl<'a> Parser<'a> {
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?;
let distinct = self.parse_all_or_distinct()?.is_some();
let expr = Box::new(self.parse_expr()?);
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
// choose to make the separator optional as this provides the more general implementation.
@ -2300,16 +2300,31 @@ impl<'a> Parser<'a> {
}
}
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
let all = self.parse_keyword(Keyword::ALL);
let distinct = self.parse_keyword(Keyword::DISTINCT);
if all && distinct {
parser_err!("Cannot specify both ALL and DISTINCT".to_string())
} else {
Ok(distinct)
if !distinct {
return Ok(None);
}
if all {
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
}
let on = self.parse_keyword(Keyword::ON);
if !on {
return Ok(Some(Distinct::Distinct));
}
self.expect_token(&Token::LParen)?;
let col_names = if self.consume_token(&Token::RParen) {
self.prev_token();
Vec::new()
} else {
self.parse_comma_separated(Parser::parse_expr)?
};
self.expect_token(&Token::RParen)?;
Ok(Some(Distinct::On(col_names)))
}
/// Parse a SQL CREATE statement

View file

@ -32,7 +32,7 @@ fn parse_map_access_expr() {
let select = clickhouse().verified_only_select(sql);
assert_eq!(
Select {
distinct: false,
distinct: None,
top: None,
projection: vec![UnnamedExpr(MapAccess {
column: Box::new(Identifier(Ident {

View file

@ -225,7 +225,7 @@ fn parse_update_set_from() {
subquery: Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
@ -597,7 +597,7 @@ fn parse_top_level() {
fn parse_simple_select() {
let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5";
let select = verified_only_select(sql);
assert!(!select.distinct);
assert!(select.distinct.is_none());
assert_eq!(3, select.projection.len());
let select = verified_query(sql);
assert_eq!(Some(Expr::Value(number("5"))), select.limit);
@ -622,7 +622,7 @@ fn parse_limit_is_not_an_alias() {
fn parse_select_distinct() {
let sql = "SELECT DISTINCT name FROM customer";
let select = verified_only_select(sql);
assert!(select.distinct);
assert!(select.distinct.is_some());
assert_eq!(
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
only(&select.projection)
@ -633,7 +633,7 @@ fn parse_select_distinct() {
fn parse_select_distinct_two_fields() {
let sql = "SELECT DISTINCT name, id FROM customer";
let select = verified_only_select(sql);
assert!(select.distinct);
assert!(select.distinct.is_some());
assert_eq!(
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
&select.projection[0]
@ -657,6 +657,30 @@ fn parse_select_distinct_tuple() {
);
}
#[test]
fn parse_select_distinct_on() {
let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds";
let select = verified_only_select(sql);
assert_eq!(
&Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])),
&select.distinct
);
let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds";
let select = verified_only_select(sql);
assert_eq!(&Some(Distinct::On(vec![])), &select.distinct);
let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track";
let select = verified_only_select(sql);
assert_eq!(
&Some(Distinct::On(vec![
Expr::Identifier(Ident::new("album_id")),
Expr::Identifier(Ident::new("milliseconds")),
])),
&select.distinct
);
}
#[test]
fn parse_select_distinct_missing_paren() {
let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer");
@ -3517,7 +3541,7 @@ fn parse_interval_and_or_xor() {
let expected_ast = vec![Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![UnnamedExpr(Expr::Identifier(Ident {
value: "col".to_string(),
@ -5834,7 +5858,7 @@ fn parse_merge() {
subquery: Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::Wildcard(
WildcardAdditionalOptions::default()

View file

@ -445,7 +445,7 @@ fn parse_quote_identifiers_2() {
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "quoted ` identifier".into(),
@ -479,7 +479,7 @@ fn parse_quote_identifiers_3() {
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "`quoted identifier`".into(),
@ -857,7 +857,7 @@ fn parse_select_with_numeric_prefix_column_name() {
assert_eq!(
q.body,
Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
"123col_$@123abc"
@ -896,7 +896,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() {
assert_eq!(
q.body,
Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![
SelectItem::UnnamedExpr(Expr::Value(Value::Number(
@ -1075,7 +1075,7 @@ fn parse_substring_in_select() {
Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: true,
distinct: Some(Distinct::Distinct),
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Substring {
expr: Box::new(Expr::Identifier(Ident {
@ -1372,7 +1372,7 @@ fn parse_hex_string_introducer() {
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
introducer: "_latin1".to_string(),

View file

@ -1694,7 +1694,7 @@ fn parse_array_subquery_expr() {
op: SetOperator::Union,
set_quantifier: SetQuantifier::None,
left: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
#[cfg(not(feature = "bigdecimal"))]
@ -1715,7 +1715,7 @@ fn parse_array_subquery_expr() {
qualify: None,
}))),
right: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
#[cfg(not(feature = "bigdecimal"))]