Support Snowflake MATCH_RECOGNIZE syntax (#1222)

This commit is contained in:
Joey Hain 2024-04-22 13:17:50 -07:00 committed by GitHub
parent bf89b7d808
commit 39980e8976
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 847 additions and 9 deletions

View file

@ -40,13 +40,15 @@ pub use self::ddl::{
pub use self::dml::{Delete, Insert};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause,
ForJson, ForXml, GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint,
JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem,
ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator,
SetQuantifier, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity,
ValueTableMode, Values, WildcardAdditionalOptions, With,
AfterMatchSkip, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem,
ExcludeSelectItem, Fetch, ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias,
IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn,
JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern,
MatchRecognizeSymbol, Measure, NamedWindowDefinition, NonBlock, Offset, OffsetRows,
OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement,
ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator,
SetQuantifier, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, TableWithJoins,
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,

View file

@ -852,6 +852,238 @@ pub enum TableFactor {
columns: Vec<Ident>,
alias: Option<TableAlias>,
},
/// A `MATCH_RECOGNIZE` operation on a table.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize>.
MatchRecognize {
table: Box<TableFactor>,
/// `PARTITION BY <expr> [, ... ]`
partition_by: Vec<Expr>,
/// `ORDER BY <expr> [, ... ]`
order_by: Vec<OrderByExpr>,
/// `MEASURES <expr> [AS] <alias> [, ... ]`
measures: Vec<Measure>,
/// `ONE ROW PER MATCH | ALL ROWS PER MATCH [ <option> ]`
rows_per_match: Option<RowsPerMatch>,
/// `AFTER MATCH SKIP <option>`
after_match_skip: Option<AfterMatchSkip>,
/// `PATTERN ( <pattern> )`
pattern: MatchRecognizePattern,
/// `DEFINE <symbol> AS <expr> [, ... ]`
symbols: Vec<SymbolDefinition>,
alias: Option<TableAlias>,
},
}
/// An item in the `MEASURES` subclause of a `MATCH_RECOGNIZE` operation.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#measures-specifying-additional-output-columns>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Measure {
pub expr: Expr,
pub alias: Ident,
}
impl fmt::Display for Measure {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} AS {}", self.expr, self.alias)
}
}
/// The rows per match option in a `MATCH_RECOGNIZE` operation.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#row-s-per-match-specifying-the-rows-to-return>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum RowsPerMatch {
/// `ONE ROW PER MATCH`
OneRow,
/// `ALL ROWS PER MATCH <mode>`
AllRows(Option<EmptyMatchesMode>),
}
impl fmt::Display for RowsPerMatch {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
RowsPerMatch::OneRow => write!(f, "ONE ROW PER MATCH"),
RowsPerMatch::AllRows(mode) => {
write!(f, "ALL ROWS PER MATCH")?;
if let Some(mode) = mode {
write!(f, " {}", mode)?;
}
Ok(())
}
}
}
}
/// The after match skip option in a `MATCH_RECOGNIZE` operation.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#after-match-skip-specifying-where-to-continue-after-a-match>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum AfterMatchSkip {
/// `PAST LAST ROW`
PastLastRow,
/// `TO NEXT ROW`
ToNextRow,
/// `TO FIRST <symbol>`
ToFirst(Ident),
/// `TO LAST <symbol>`
ToLast(Ident),
}
impl fmt::Display for AfterMatchSkip {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "AFTER MATCH SKIP ")?;
match self {
AfterMatchSkip::PastLastRow => write!(f, "PAST LAST ROW"),
AfterMatchSkip::ToNextRow => write!(f, " TO NEXT ROW"),
AfterMatchSkip::ToFirst(symbol) => write!(f, "TO FIRST {symbol}"),
AfterMatchSkip::ToLast(symbol) => write!(f, "TO LAST {symbol}"),
}
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum EmptyMatchesMode {
/// `SHOW EMPTY MATCHES`
Show,
/// `OMIT EMPTY MATCHES`
Omit,
/// `WITH UNMATCHED ROWS`
WithUnmatched,
}
impl fmt::Display for EmptyMatchesMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
EmptyMatchesMode::Show => write!(f, "SHOW EMPTY MATCHES"),
EmptyMatchesMode::Omit => write!(f, "OMIT EMPTY MATCHES"),
EmptyMatchesMode::WithUnmatched => write!(f, "WITH UNMATCHED ROWS"),
}
}
}
/// A symbol defined in a `MATCH_RECOGNIZE` operation.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#define-defining-symbols>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct SymbolDefinition {
pub symbol: Ident,
pub definition: Expr,
}
impl fmt::Display for SymbolDefinition {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} AS {}", self.symbol, self.definition)
}
}
/// A symbol in a `MATCH_RECOGNIZE` pattern.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum MatchRecognizeSymbol {
/// A named symbol, e.g. `S1`.
Named(Ident),
/// A virtual symbol representing the start of the of partition (`^`).
Start,
/// A virtual symbol representing the end of the partition (`$`).
End,
}
impl fmt::Display for MatchRecognizeSymbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MatchRecognizeSymbol::Named(symbol) => write!(f, "{symbol}"),
MatchRecognizeSymbol::Start => write!(f, "^"),
MatchRecognizeSymbol::End => write!(f, "$"),
}
}
}
/// The pattern in a `MATCH_RECOGNIZE` operation.
///
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#pattern-specifying-the-pattern-to-match>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum MatchRecognizePattern {
/// A named symbol such as `S1` or a virtual symbol such as `^`.
Symbol(MatchRecognizeSymbol),
/// {- symbol -}
Exclude(MatchRecognizeSymbol),
/// PERMUTE(symbol_1, ..., symbol_n)
Permute(Vec<MatchRecognizeSymbol>),
/// pattern_1 pattern_2 ... pattern_n
Concat(Vec<MatchRecognizePattern>),
/// ( pattern )
Group(Box<MatchRecognizePattern>),
/// pattern_1 | pattern_2 | ... | pattern_n
Alternation(Vec<MatchRecognizePattern>),
/// e.g. pattern*
Repetition(Box<MatchRecognizePattern>, RepetitionQuantifier),
}
impl fmt::Display for MatchRecognizePattern {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use MatchRecognizePattern::*;
match self {
Symbol(symbol) => write!(f, "{}", symbol),
Exclude(symbol) => write!(f, "{{- {symbol} -}}"),
Permute(symbols) => write!(f, "PERMUTE({})", display_comma_separated(symbols)),
Concat(patterns) => write!(f, "{}", display_separated(patterns, " ")),
Group(pattern) => write!(f, "( {pattern} )"),
Alternation(patterns) => write!(f, "{}", display_separated(patterns, " | ")),
Repetition(pattern, op) => write!(f, "{pattern}{op}"),
}
}
}
/// Determines the minimum and maximum allowed occurrences of a pattern in a
/// `MATCH_RECOGNIZE` operation.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum RepetitionQuantifier {
/// `*`
ZeroOrMore,
/// `+`
OneOrMore,
/// `?`
AtMostOne,
/// `{n}`
Exactly(u32),
/// `{n,}`
AtLeast(u32),
/// `{,n}`
AtMost(u32),
/// `{n,m}
Range(u32, u32),
}
impl fmt::Display for RepetitionQuantifier {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use RepetitionQuantifier::*;
match self {
ZeroOrMore => write!(f, "*"),
OneOrMore => write!(f, "+"),
AtMostOne => write!(f, "?"),
Exactly(n) => write!(f, "{{{n}}}"),
AtLeast(n) => write!(f, "{{{n},}}"),
AtMost(n) => write!(f, "{{,{n}}}"),
Range(n, m) => write!(f, "{{{n},{m}}}"),
}
}
}
impl fmt::Display for TableFactor {
@ -1005,6 +1237,40 @@ impl fmt::Display for TableFactor {
}
Ok(())
}
TableFactor::MatchRecognize {
table,
partition_by,
order_by,
measures,
rows_per_match,
after_match_skip,
pattern,
symbols,
alias,
} => {
write!(f, "{table} MATCH_RECOGNIZE(")?;
if !partition_by.is_empty() {
write!(f, "PARTITION BY {} ", display_comma_separated(partition_by))?;
}
if !order_by.is_empty() {
write!(f, "ORDER BY {} ", display_comma_separated(order_by))?;
}
if !measures.is_empty() {
write!(f, "MEASURES {} ", display_comma_separated(measures))?;
}
if let Some(rows_per_match) = rows_per_match {
write!(f, "{rows_per_match} ")?;
}
if let Some(after_match_skip) = after_match_skip {
write!(f, "{after_match_skip} ")?;
}
write!(f, "PATTERN ({pattern}) ")?;
write!(f, "DEFINE {})", display_comma_separated(symbols))?;
if alias.is_some() {
write!(f, " AS {}", alias.as_ref().unwrap())?;
}
Ok(())
}
}
}
}

View file

@ -39,6 +39,10 @@ impl Dialect for GenericDialect {
true
}
fn supports_match_recognize(&self) -> bool {
true
}
fn supports_start_transaction_modifier(&self) -> bool {
true
}

View file

@ -152,6 +152,10 @@ pub trait Dialect: Debug + Any {
fn supports_group_by_expr(&self) -> bool {
false
}
/// Returns true if the dialect supports the MATCH_RECOGNIZE operation.
fn supports_match_recognize(&self) -> bool {
false
}
/// Returns true if the dialect supports `(NOT) IN ()` expressions
fn supports_in_empty_list(&self) -> bool {
false

View file

@ -55,6 +55,10 @@ impl Dialect for SnowflakeDialect {
true
}
fn supports_match_recognize(&self) -> bool {
true
}
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
if parser.parse_keyword(Keyword::CREATE) {
// possibly CREATE STAGE

View file

@ -214,6 +214,7 @@ define_keywords!(
DEFAULT,
DEFERRABLE,
DEFERRED,
DEFINE,
DEFINED,
DELAYED,
DELETE,
@ -418,9 +419,12 @@ define_keywords!(
MAP,
MATCH,
MATCHED,
MATCHES,
MATCH_RECOGNIZE,
MATERIALIZED,
MAX,
MAXVALUE,
MEASURES,
MEDIUMINT,
MEMBER,
MERGE,
@ -482,7 +486,9 @@ define_keywords!(
OF,
OFFSET,
OLD,
OMIT,
ON,
ONE,
ONLY,
OPEN,
OPERATOR,
@ -509,8 +515,10 @@ define_keywords!(
PARTITIONED,
PARTITIONS,
PASSWORD,
PAST,
PATH,
PATTERN,
PER,
PERCENT,
PERCENTILE_CONT,
PERCENTILE_DISC,
@ -712,6 +720,7 @@ define_keywords!(
UNLOAD,
UNLOCK,
UNLOGGED,
UNMATCHED,
UNNEST,
UNPIVOT,
UNSAFE,
@ -808,6 +817,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::FOR,
// for MYSQL PARTITION SELECTION
Keyword::PARTITION,
// Reserved for snowflake MATCH_RECOGNIZE
Keyword::MATCH_RECOGNIZE,
];
/// Can't be used as a column alias, so that `SELECT <expr> alias`

View file

@ -8228,6 +8228,7 @@ impl<'a> Parser<'a> {
| TableFactor::TableFunction { alias, .. }
| TableFactor::Pivot { alias, .. }
| TableFactor::Unpivot { alias, .. }
| TableFactor::MatchRecognize { alias, .. }
| TableFactor::NestedJoin { alias, .. } => {
// but not `FROM (mytable AS alias1) AS alias2`.
if let Some(inner_alias) = alias {
@ -8351,10 +8352,246 @@ impl<'a> Parser<'a> {
}
}
if self.dialect.supports_match_recognize()
&& self.parse_keyword(Keyword::MATCH_RECOGNIZE)
{
table = self.parse_match_recognize(table)?;
}
Ok(table)
}
}
fn parse_match_recognize(&mut self, table: TableFactor) -> Result<TableFactor, ParserError> {
self.expect_token(&Token::LParen)?;
let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_order_by_expr)?
} else {
vec![]
};
let measures = if self.parse_keyword(Keyword::MEASURES) {
self.parse_comma_separated(|p| {
let expr = p.parse_expr()?;
let _ = p.parse_keyword(Keyword::AS);
let alias = p.parse_identifier(false)?;
Ok(Measure { expr, alias })
})?
} else {
vec![]
};
let rows_per_match =
if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) {
Some(RowsPerMatch::OneRow)
} else if self.parse_keywords(&[
Keyword::ALL,
Keyword::ROWS,
Keyword::PER,
Keyword::MATCH,
]) {
Some(RowsPerMatch::AllRows(
if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) {
Some(EmptyMatchesMode::Show)
} else if self.parse_keywords(&[
Keyword::OMIT,
Keyword::EMPTY,
Keyword::MATCHES,
]) {
Some(EmptyMatchesMode::Omit)
} else if self.parse_keywords(&[
Keyword::WITH,
Keyword::UNMATCHED,
Keyword::ROWS,
]) {
Some(EmptyMatchesMode::WithUnmatched)
} else {
None
},
))
} else {
None
};
let after_match_skip =
if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) {
if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) {
Some(AfterMatchSkip::PastLastRow)
} else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) {
Some(AfterMatchSkip::ToNextRow)
} else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) {
Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?))
} else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) {
Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?))
} else {
let found = self.next_token();
return self.expected("after match skip option", found);
}
} else {
None
};
self.expect_keyword(Keyword::PATTERN)?;
self.expect_token(&Token::LParen)?;
let pattern = self.parse_pattern()?;
self.expect_token(&Token::RParen)?;
self.expect_keyword(Keyword::DEFINE)?;
let symbols = self.parse_comma_separated(|p| {
let symbol = p.parse_identifier(false)?;
p.expect_keyword(Keyword::AS)?;
let definition = p.parse_expr()?;
Ok(SymbolDefinition { symbol, definition })
})?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
Ok(TableFactor::MatchRecognize {
table: Box::new(table),
partition_by,
order_by,
measures,
rows_per_match,
after_match_skip,
pattern,
symbols,
alias,
})
}
fn parse_base_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
match self.next_token().token {
Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)),
Token::Placeholder(s) if s == "$" => {
Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End))
}
Token::LBrace => {
self.expect_token(&Token::Minus)?;
let symbol = self
.parse_identifier(false)
.map(MatchRecognizeSymbol::Named)?;
self.expect_token(&Token::Minus)?;
self.expect_token(&Token::RBrace)?;
Ok(MatchRecognizePattern::Exclude(symbol))
}
Token::Word(Word {
value,
quote_style: None,
..
}) if value == "PERMUTE" => {
self.expect_token(&Token::LParen)?;
let symbols = self.parse_comma_separated(|p| {
p.parse_identifier(false).map(MatchRecognizeSymbol::Named)
})?;
self.expect_token(&Token::RParen)?;
Ok(MatchRecognizePattern::Permute(symbols))
}
Token::LParen => {
let pattern = self.parse_pattern()?;
self.expect_token(&Token::RParen)?;
Ok(MatchRecognizePattern::Group(Box::new(pattern)))
}
_ => {
self.prev_token();
self.parse_identifier(false)
.map(MatchRecognizeSymbol::Named)
.map(MatchRecognizePattern::Symbol)
}
}
}
fn parse_repetition_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
let mut pattern = self.parse_base_pattern()?;
loop {
let token = self.next_token();
let quantifier = match token.token {
Token::Mul => RepetitionQuantifier::ZeroOrMore,
Token::Plus => RepetitionQuantifier::OneOrMore,
Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne,
Token::LBrace => {
// quantifier is a range like {n} or {n,} or {,m} or {n,m}
let token = self.next_token();
match token.token {
Token::Comma => {
let next_token = self.next_token();
let Token::Number(n, _) = next_token.token else {
return self.expected("literal number", next_token);
};
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::AtMost(n.parse().expect("literal int"))
}
Token::Number(n, _) if self.consume_token(&Token::Comma) => {
let next_token = self.next_token();
match next_token.token {
Token::Number(m, _) => {
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::Range(
n.parse().expect("literal int"),
m.parse().expect("literal int"),
)
}
Token::RBrace => {
RepetitionQuantifier::AtLeast(n.parse().expect("literal int"))
}
_ => {
return self.expected("} or upper bound", next_token);
}
}
}
Token::Number(n, _) => {
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::Exactly(n.parse().expect("literal int"))
}
_ => return self.expected("quantifier range", token),
}
}
_ => {
self.prev_token();
break;
}
};
pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier);
}
Ok(pattern)
}
fn parse_concat_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
let mut patterns = vec![self.parse_repetition_pattern()?];
while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) {
patterns.push(self.parse_repetition_pattern()?);
}
match <[MatchRecognizePattern; 1]>::try_from(patterns) {
Ok([pattern]) => Ok(pattern),
Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)),
}
}
fn parse_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
let pattern = self.parse_concat_pattern()?;
if self.consume_token(&Token::Pipe) {
match self.parse_pattern()? {
// flatten nested alternations
MatchRecognizePattern::Alternation(mut patterns) => {
patterns.insert(0, pattern);
Ok(MatchRecognizePattern::Alternation(patterns))
}
next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])),
}
} else {
Ok(pattern)
}
}
/// Parse a given table version specifier.
///
/// For now it only supports timestamp versioning for BigQuery and MSSQL dialects.

View file

@ -307,3 +307,20 @@ pub fn join(relation: TableFactor) -> Join {
join_operator: JoinOperator::Inner(JoinConstraint::Natural),
}
}
pub fn call(function: &str, args: impl IntoIterator<Item = Expr>) -> Expr {
Expr::Function(Function {
name: ObjectName(vec![Ident::new(function)]),
args: args
.into_iter()
.map(FunctionArgExpr::Expr)
.map(FunctionArg::Unnamed)
.collect(),
filter: None,
null_treatment: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
})
}

View file

@ -33,8 +33,8 @@ use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::{Parser, ParserError, ParserOptions};
use sqlparser::tokenizer::Tokenizer;
use test_utils::{
all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, expr_from_projection, join,
number, only, table, table_alias, TestedDialects,
all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection,
join, number, only, table, table_alias, TestedDialects,
};
#[macro_use]
@ -8887,6 +8887,299 @@ fn parse_map_access_expr() {
}
}
#[test]
fn test_match_recognize() {
use MatchRecognizePattern::*;
use MatchRecognizeSymbol::*;
use RepetitionQuantifier::*;
let table = TableFactor::Table {
name: ObjectName(vec![Ident::new("my_table")]),
alias: None,
args: None,
with_hints: vec![],
version: None,
partitions: vec![],
};
fn check(options: &str, expect: TableFactor) {
let select = all_dialects_where(|d| d.supports_match_recognize()).verified_only_select(
&format!("SELECT * FROM my_table MATCH_RECOGNIZE({options})"),
);
assert_eq!(&select.from[0].relation, &expect);
}
check(
concat!(
"PARTITION BY company ",
"ORDER BY price_date ",
"MEASURES ",
"MATCH_NUMBER() AS match_number, ",
"FIRST(price_date) AS start_date, ",
"LAST(price_date) AS end_date ",
"ONE ROW PER MATCH ",
"AFTER MATCH SKIP TO LAST row_with_price_increase ",
"PATTERN (row_before_decrease row_with_price_decrease+ row_with_price_increase+) ",
"DEFINE ",
"row_with_price_decrease AS price < LAG(price), ",
"row_with_price_increase AS price > LAG(price)"
),
TableFactor::MatchRecognize {
table: Box::new(table),
partition_by: vec![Expr::Identifier(Ident::new("company"))],
order_by: vec![OrderByExpr {
expr: Expr::Identifier(Ident::new("price_date")),
asc: None,
nulls_first: None,
}],
measures: vec![
Measure {
expr: call("MATCH_NUMBER", []),
alias: Ident::new("match_number"),
},
Measure {
expr: call("FIRST", [Expr::Identifier(Ident::new("price_date"))]),
alias: Ident::new("start_date"),
},
Measure {
expr: call("LAST", [Expr::Identifier(Ident::new("price_date"))]),
alias: Ident::new("end_date"),
},
],
rows_per_match: Some(RowsPerMatch::OneRow),
after_match_skip: Some(AfterMatchSkip::ToLast(Ident::new(
"row_with_price_increase",
))),
pattern: Concat(vec![
Symbol(Named(Ident::new("row_before_decrease"))),
Repetition(
Box::new(Symbol(Named(Ident::new("row_with_price_decrease")))),
OneOrMore,
),
Repetition(
Box::new(Symbol(Named(Ident::new("row_with_price_increase")))),
OneOrMore,
),
]),
symbols: vec![
SymbolDefinition {
symbol: Ident::new("row_with_price_decrease"),
definition: Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("price"))),
op: BinaryOperator::Lt,
right: Box::new(call("LAG", [Expr::Identifier(Ident::new("price"))])),
},
},
SymbolDefinition {
symbol: Ident::new("row_with_price_increase"),
definition: Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("price"))),
op: BinaryOperator::Gt,
right: Box::new(call("LAG", [Expr::Identifier(Ident::new("price"))])),
},
},
],
alias: None,
},
);
#[rustfmt::skip]
let examples = [
concat!(
"SELECT * ",
"FROM login_attempts ",
"MATCH_RECOGNIZE(",
"PARTITION BY user_id ",
"ORDER BY timestamp ",
"PATTERN (failed_attempt{3,}) ",
"DEFINE ",
"failed_attempt AS status = 'failure'",
")",
),
concat!(
"SELECT * ",
"FROM stock_transactions ",
"MATCH_RECOGNIZE(",
"PARTITION BY symbol ",
"ORDER BY timestamp ",
"MEASURES ",
"FIRST(price) AS start_price, ",
"LAST(price) AS end_price, ",
"MATCH_NUMBER() AS match_num ",
"ALL ROWS PER MATCH ",
"PATTERN (STRT UP+) ",
"DEFINE ",
"UP AS price > PREV(price)",
")",
),
concat!(
"SELECT * ",
"FROM event_log ",
"MATCH_RECOGNIZE(",
"MEASURES ",
"FIRST(event_type) AS start_event, ",
"LAST(event_type) AS end_event, ",
"COUNT(*) AS error_count ",
"ALL ROWS PER MATCH ",
"PATTERN (STRT ERROR+ END) ",
"DEFINE ",
"STRT AS event_type = 'START', ",
"ERROR AS event_type = 'ERROR', ",
"END AS event_type = 'END'",
")",
)
];
for sql in examples {
all_dialects_where(|d| d.supports_match_recognize()).verified_query(sql);
}
}
#[test]
fn test_match_recognize_patterns() {
use MatchRecognizePattern::*;
use MatchRecognizeSymbol::*;
use RepetitionQuantifier::*;
fn check(pattern: &str, expect: MatchRecognizePattern) {
let select =
all_dialects_where(|d| d.supports_match_recognize()).verified_only_select(&format!(
"SELECT * FROM my_table MATCH_RECOGNIZE(PATTERN ({pattern}) DEFINE DUMMY AS true)" // "select * from my_table match_recognize ("
));
let TableFactor::MatchRecognize {
pattern: actual, ..
} = &select.from[0].relation
else {
panic!("expected match_recognize table factor");
};
assert_eq!(actual, &expect);
}
// just a symbol
check("FOO", Symbol(Named(Ident::new("FOO"))));
// just a symbol
check(
"^ FOO $",
Concat(vec![
Symbol(Start),
Symbol(Named(Ident::new("FOO"))),
Symbol(End),
]),
);
// exclusion
check("{- FOO -}", Exclude(Named(Ident::new("FOO"))));
check(
"PERMUTE(A, B, C)",
Permute(vec![
Named(Ident::new("A")),
Named(Ident::new("B")),
Named(Ident::new("C")),
]),
);
// various identifiers
check(
"FOO | \"BAR\" | baz42",
Alternation(vec![
Symbol(Named(Ident::new("FOO"))),
Symbol(Named(Ident::with_quote('"', "BAR"))),
Symbol(Named(Ident::new("baz42"))),
]),
);
// concatenated basic quantifiers
check(
"S1* S2+ S3?",
Concat(vec![
Repetition(Box::new(Symbol(Named(Ident::new("S1")))), ZeroOrMore),
Repetition(Box::new(Symbol(Named(Ident::new("S2")))), OneOrMore),
Repetition(Box::new(Symbol(Named(Ident::new("S3")))), AtMostOne),
]),
);
// double repetition
check(
"S2*?",
Repetition(
Box::new(Repetition(
Box::new(Symbol(Named(Ident::new("S2")))),
ZeroOrMore,
)),
AtMostOne,
),
);
// range quantifiers in an alternation
check(
"S1{1} | S2{2,3} | S3{4,} | S4{,5}",
Alternation(vec![
Repetition(Box::new(Symbol(Named(Ident::new("S1")))), Exactly(1)),
Repetition(Box::new(Symbol(Named(Ident::new("S2")))), Range(2, 3)),
Repetition(Box::new(Symbol(Named(Ident::new("S3")))), AtLeast(4)),
Repetition(Box::new(Symbol(Named(Ident::new("S4")))), AtMost(5)),
]),
);
// grouping case 1
check(
"S1 ( S2 )",
Concat(vec![
Symbol(Named(Ident::new("S1"))),
Group(Box::new(Symbol(Named(Ident::new("S2"))))),
]),
);
// grouping case 2
check(
"( {- S3 -} S4 )+",
Repetition(
Box::new(Group(Box::new(Concat(vec![
Exclude(Named(Ident::new("S3"))),
Symbol(Named(Ident::new("S4"))),
])))),
OneOrMore,
),
);
// the grand finale (example taken from snowflake docs)
check(
"^ S1 S2*? ( {- S3 -} S4 )+ | PERMUTE(S1, S2){1,2} $",
Alternation(vec![
Concat(vec![
Symbol(Start),
Symbol(Named(Ident::new("S1"))),
Repetition(
Box::new(Repetition(
Box::new(Symbol(Named(Ident::new("S2")))),
ZeroOrMore,
)),
AtMostOne,
),
Repetition(
Box::new(Group(Box::new(Concat(vec![
Exclude(Named(Ident::new("S3"))),
Symbol(Named(Ident::new("S4"))),
])))),
OneOrMore,
),
]),
Concat(vec![
Repetition(
Box::new(Permute(vec![
Named(Ident::new("S1")),
Named(Ident::new("S2")),
])),
Range(1, 2),
),
Symbol(End),
]),
]),
);
}
#[test]
fn test_select_wildcard_with_replace() {
let sql = r#"SELECT * REPLACE (lower(city) AS city) FROM addresses"#;