Support for CONNECT BY (#1138)

This commit is contained in:
Joey Hain 2024-04-27 03:52:21 -07:00 committed by GitHub
parent deaa6d8151
commit 0b5722afbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 319 additions and 4 deletions

View file

@ -40,9 +40,9 @@ pub use self::ddl::{
pub use self::dml::{Delete, Insert};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem,
ExcludeSelectItem, Fetch, ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias,
IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn,
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause, ForJson, ForXml, GroupByExpr,
IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn,
JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern,
MatchRecognizeSymbol, Measure, NamedWindowDefinition, NonBlock, Offset, OffsetRows,
OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement,
@ -798,6 +798,8 @@ pub enum Expr {
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/where#joins-in-the-where-clause>.
OuterJoin(Box<Expr>),
/// A reference to the prior level in a CONNECT BY clause.
Prior(Box<Expr>),
}
impl fmt::Display for CastFormat {
@ -1255,6 +1257,7 @@ impl fmt::Display for Expr {
Expr::OuterJoin(expr) => {
write!(f, "{expr} (+)")
}
Expr::Prior(expr) => write!(f, "PRIOR {expr}"),
}
}
}

View file

@ -247,6 +247,8 @@ pub struct Select {
pub qualify: Option<Expr>,
/// BigQuery syntax: `SELECT AS VALUE | SELECT AS STRUCT`
pub value_table_mode: Option<ValueTableMode>,
/// STARTING WITH .. CONNECT BY
pub connect_by: Option<ConnectBy>,
}
impl fmt::Display for Select {
@ -314,6 +316,9 @@ impl fmt::Display for Select {
if let Some(ref qualify) = self.qualify {
write!(f, " QUALIFY {qualify}")?;
}
if let Some(ref connect_by) = self.connect_by {
write!(f, " {connect_by}")?;
}
Ok(())
}
}
@ -731,6 +736,30 @@ impl fmt::Display for TableWithJoins {
}
}
/// Joins a table to itself to process hierarchical data in the table.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/connect-by>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct ConnectBy {
/// START WITH
pub condition: Expr,
/// CONNECT BY
pub relationships: Vec<Expr>,
}
impl fmt::Display for ConnectBy {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"START WITH {condition} CONNECT BY {relationships}",
condition = self.condition,
relationships = display_comma_separated(&self.relationships)
)
}
}
/// A table name or a parenthesized subquery with an optional alias
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]

View file

@ -39,6 +39,10 @@ impl Dialect for GenericDialect {
true
}
fn supports_connect_by(&self) -> bool {
true
}
fn supports_match_recognize(&self) -> bool {
true
}

View file

@ -154,6 +154,10 @@ pub trait Dialect: Debug + Any {
fn supports_group_by_expr(&self) -> bool {
false
}
/// Returns true if the dialect supports CONNECT BY.
fn supports_connect_by(&self) -> bool {
false
}
/// Returns true if the dialect supports the MATCH_RECOGNIZE operation.
fn supports_match_recognize(&self) -> bool {
false

View file

@ -40,4 +40,8 @@ impl Dialect for MsSqlDialect {
fn convert_type_before_value(&self) -> bool {
true
}
fn supports_connect_by(&self) -> bool {
true
}
}

View file

@ -59,4 +59,8 @@ impl Dialect for RedshiftSqlDialect {
fn convert_type_before_value(&self) -> bool {
true
}
fn supports_connect_by(&self) -> bool {
true
}
}

View file

@ -55,6 +55,10 @@ impl Dialect for SnowflakeDialect {
true
}
fn supports_connect_by(&self) -> bool {
true
}
fn supports_match_recognize(&self) -> bool {
true
}

View file

@ -819,6 +819,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::FOR,
// for MYSQL PARTITION SELECTION
Keyword::PARTITION,
// for Snowflake START WITH .. CONNECT BY
Keyword::START,
Keyword::CONNECT,
// Reserved for snowflake MATCH_RECOGNIZE
Keyword::MATCH_RECOGNIZE,
];

View file

@ -256,10 +256,22 @@ impl ParserOptions {
}
}
#[derive(Copy, Clone)]
enum ParserState {
/// The default state of the parser.
Normal,
/// The state when parsing a CONNECT BY expression. This allows parsing
/// PRIOR expressions while still allowing prior as an identifier name
/// in other contexts.
ConnectBy,
}
pub struct Parser<'a> {
tokens: Vec<TokenWithLocation>,
/// The index of the first unprocessed token in `self.tokens`
index: usize,
/// The current state of the parser.
state: ParserState,
/// The current dialect to use
dialect: &'a dyn Dialect,
/// Additional options that allow you to mix & match behavior
@ -290,6 +302,7 @@ impl<'a> Parser<'a> {
Self {
tokens: vec![],
index: 0,
state: ParserState::Normal,
dialect,
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
options: ParserOptions::default(),
@ -1040,6 +1053,10 @@ impl<'a> Parser<'a> {
self.prev_token();
self.parse_bigquery_struct_literal()
}
Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => {
let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?;
Ok(Expr::Prior(Box::new(expr)))
}
// Here `w` is a word, check if it's a part of a multi-part
// identifier, a function call, or a simple identifier:
_ => match self.peek_token().token {
@ -7695,6 +7712,17 @@ impl<'a> Parser<'a> {
None
};
let connect_by = if self.dialect.supports_connect_by()
&& self
.parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT])
.is_some()
{
self.prev_token();
Some(self.parse_connect_by()?)
} else {
None
};
Ok(Select {
distinct,
top,
@ -7711,6 +7739,44 @@ impl<'a> Parser<'a> {
named_window: named_windows,
qualify,
value_table_mode,
connect_by,
})
}
/// Invoke `f` after first setting the parser's `ParserState` to `state`.
///
/// Upon return, restores the parser's state to what it started at.
fn with_state<T, F>(&mut self, state: ParserState, mut f: F) -> Result<T, ParserError>
where
F: FnMut(&mut Parser) -> Result<T, ParserError>,
{
let current_state = self.state;
self.state = state;
let res = f(self);
self.state = current_state;
res
}
pub fn parse_connect_by(&mut self) -> Result<ConnectBy, ParserError> {
let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) {
let relationships = self.with_state(ParserState::ConnectBy, |parser| {
parser.parse_comma_separated(Parser::parse_expr)
})?;
self.expect_keywords(&[Keyword::START, Keyword::WITH])?;
let condition = self.parse_expr()?;
(condition, relationships)
} else {
self.expect_keywords(&[Keyword::START, Keyword::WITH])?;
let condition = self.parse_expr()?;
self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?;
let relationships = self.with_state(ParserState::ConnectBy, |parser| {
parser.parse_comma_separated(Parser::parse_expr)
})?;
(condition, relationships)
};
Ok(ConnectBy {
condition,
relationships,
})
}

View file

@ -157,6 +157,16 @@ impl TestedDialects {
}
}
/// Ensures that `sql` parses as a single [Query], and that
/// re-serializing the parse result matches the given canonical
/// sql string.
pub fn verified_query_with_canonical(&self, query: &str, canonical: &str) -> Query {
match self.one_statement_parses_to(query, canonical) {
Statement::Query(query) => *query,
_ => panic!("Expected Query"),
}
}
/// Ensures that `sql` parses as a single [Select], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).

View file

@ -116,6 +116,7 @@ fn parse_map_access_expr() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
},
select
);

View file

@ -402,6 +402,7 @@ fn parse_update_set_from() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -4469,6 +4470,7 @@ fn test_parse_named_window() {
],
qualify: None,
value_table_mode: None,
connect_by: None,
};
assert_eq!(actual_select_only, expected);
}
@ -4825,6 +4827,7 @@ fn parse_interval_and_or_xor() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -6778,6 +6781,7 @@ fn lateral_function() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
};
assert_eq!(actual_select_only, expected);
}
@ -7422,6 +7426,7 @@ fn parse_merge() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -8817,6 +8822,7 @@ fn parse_unload() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
with: None,
limit: None,
@ -8937,6 +8943,167 @@ fn parse_map_access_expr() {
}
}
#[test]
fn parse_connect_by() {
let expect_query = Select {
distinct: None,
top: None,
projection: vec![
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))),
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))),
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))),
],
from: vec![TableWithJoins {
relation: TableFactor::Table {
name: ObjectName(vec![Ident::new("employees")]),
alias: None,
args: None,
with_hints: vec![],
version: None,
partitions: vec![],
},
joins: vec![],
}],
into: None,
lateral_views: vec![],
selection: None,
group_by: GroupByExpr::Expressions(vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
having: None,
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: Some(ConnectBy {
condition: Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("title"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"president".to_owned(),
))),
},
relationships: vec![Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("manager_id"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new(
"employee_id",
))))),
}],
}),
};
let connect_by_1 = concat!(
"SELECT employee_id, manager_id, title FROM employees ",
"START WITH title = 'president' ",
"CONNECT BY manager_id = PRIOR employee_id ",
"ORDER BY employee_id"
);
assert_eq!(
all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_1),
expect_query
);
// CONNECT BY can come before START WITH
let connect_by_2 = concat!(
"SELECT employee_id, manager_id, title FROM employees ",
"CONNECT BY manager_id = PRIOR employee_id ",
"START WITH title = 'president' ",
"ORDER BY employee_id"
);
assert_eq!(
all_dialects_where(|d| d.supports_connect_by())
.verified_only_select_with_canonical(connect_by_2, connect_by_1),
expect_query
);
// WHERE must come before CONNECT BY
let connect_by_3 = concat!(
"SELECT employee_id, manager_id, title FROM employees ",
"WHERE employee_id <> 42 ",
"START WITH title = 'president' ",
"CONNECT BY manager_id = PRIOR employee_id ",
"ORDER BY employee_id"
);
assert_eq!(
all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_3),
Select {
distinct: None,
top: None,
projection: vec![
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))),
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))),
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))),
],
from: vec![TableWithJoins {
relation: TableFactor::Table {
name: ObjectName(vec![Ident::new("employees")]),
alias: None,
args: None,
with_hints: vec![],
version: None,
partitions: vec![],
},
joins: vec![],
}],
into: None,
lateral_views: vec![],
selection: Some(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("employee_id"))),
op: BinaryOperator::NotEq,
right: Box::new(Expr::Value(number("42"))),
}),
group_by: GroupByExpr::Expressions(vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
having: None,
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: Some(ConnectBy {
condition: Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("title"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"president".to_owned(),
))),
},
relationships: vec![Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("manager_id"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new(
"employee_id",
))))),
}],
}),
}
);
let connect_by_4 = concat!(
"SELECT employee_id, manager_id, title FROM employees ",
"START WITH title = 'president' ",
"CONNECT BY manager_id = PRIOR employee_id ",
"WHERE employee_id <> 42 ",
"ORDER BY employee_id"
);
all_dialects_where(|d| d.supports_connect_by())
.parse_sql_statements(connect_by_4)
.expect_err("should have failed");
// PRIOR expressions are only valid within a CONNECT BY, and the the token
// `prior` is valid as an identifier anywhere else.
assert_eq!(
all_dialects()
.verified_only_select("SELECT prior FROM some_table")
.projection,
vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
"prior"
)))]
);
}
#[test]
fn test_selective_aggregation() {
let sql = concat!(
@ -9007,6 +9174,7 @@ fn test_group_by_grouping_sets() {
])])
);
}
#[test]
fn test_match_recognize() {
use MatchRecognizePattern::*;

View file

@ -179,6 +179,7 @@ fn test_select_union_by_name() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
right: Box::<SetExpr>::new(SetExpr::Select(Box::new(Select {
distinct: None,
@ -215,6 +216,7 @@ fn test_select_union_by_name() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
});
assert_eq!(ast.body, expected);

View file

@ -114,6 +114,7 @@ fn parse_create_procedure() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
})))
}))],
params: Some(vec![
@ -490,6 +491,7 @@ fn parse_substring_in_select() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,

View file

@ -909,6 +909,7 @@ fn parse_escaped_quote_identifiers_with_escape() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -954,6 +955,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -996,6 +998,7 @@ fn parse_escaped_backticks_with_escape() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -1038,6 +1041,7 @@ fn parse_escaped_backticks_with_no_escape() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -1742,6 +1746,7 @@ fn parse_select_with_numeric_prefix_column_name() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
})))
);
}
@ -1793,6 +1798,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
})))
);
}
@ -2287,6 +2293,7 @@ fn parse_substring_in_select() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -2598,7 +2605,8 @@ fn parse_hex_string_introducer() {
named_window: vec![],
qualify: None,
value_table_mode: None,
into: None
into: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,

View file

@ -1083,6 +1083,7 @@ fn parse_copy_to() {
sort_by: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
order_by: vec![],
limit: None,
@ -2170,6 +2171,7 @@ fn parse_array_subquery_expr() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
right: Box::new(SetExpr::Select(Box::new(Select {
distinct: None,
@ -2187,6 +2189,7 @@ fn parse_array_subquery_expr() {
named_window: vec![],
qualify: None,
value_table_mode: None,
connect_by: None,
}))),
}),
order_by: vec![],