mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-30 18:57:21 +00:00
Support Snowflake MATCH_RECOGNIZE
syntax (#1222)
This commit is contained in:
parent
bf89b7d808
commit
39980e8976
9 changed files with 847 additions and 9 deletions
|
@ -40,13 +40,15 @@ pub use self::ddl::{
|
|||
pub use self::dml::{Delete, Insert};
|
||||
pub use self::operator::{BinaryOperator, UnaryOperator};
|
||||
pub use self::query::{
|
||||
Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause,
|
||||
ForJson, ForXml, GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint,
|
||||
JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
|
||||
NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem,
|
||||
ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator,
|
||||
SetQuantifier, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity,
|
||||
ValueTableMode, Values, WildcardAdditionalOptions, With,
|
||||
AfterMatchSkip, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem,
|
||||
ExcludeSelectItem, Fetch, ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias,
|
||||
IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn,
|
||||
JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern,
|
||||
MatchRecognizeSymbol, Measure, NamedWindowDefinition, NonBlock, Offset, OffsetRows,
|
||||
OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement,
|
||||
ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator,
|
||||
SetQuantifier, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, TableWithJoins,
|
||||
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
|
||||
};
|
||||
pub use self::value::{
|
||||
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
|
||||
|
|
266
src/ast/query.rs
266
src/ast/query.rs
|
@ -852,6 +852,238 @@ pub enum TableFactor {
|
|||
columns: Vec<Ident>,
|
||||
alias: Option<TableAlias>,
|
||||
},
|
||||
/// A `MATCH_RECOGNIZE` operation on a table.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize>.
|
||||
MatchRecognize {
|
||||
table: Box<TableFactor>,
|
||||
/// `PARTITION BY <expr> [, ... ]`
|
||||
partition_by: Vec<Expr>,
|
||||
/// `ORDER BY <expr> [, ... ]`
|
||||
order_by: Vec<OrderByExpr>,
|
||||
/// `MEASURES <expr> [AS] <alias> [, ... ]`
|
||||
measures: Vec<Measure>,
|
||||
/// `ONE ROW PER MATCH | ALL ROWS PER MATCH [ <option> ]`
|
||||
rows_per_match: Option<RowsPerMatch>,
|
||||
/// `AFTER MATCH SKIP <option>`
|
||||
after_match_skip: Option<AfterMatchSkip>,
|
||||
/// `PATTERN ( <pattern> )`
|
||||
pattern: MatchRecognizePattern,
|
||||
/// `DEFINE <symbol> AS <expr> [, ... ]`
|
||||
symbols: Vec<SymbolDefinition>,
|
||||
alias: Option<TableAlias>,
|
||||
},
|
||||
}
|
||||
|
||||
/// An item in the `MEASURES` subclause of a `MATCH_RECOGNIZE` operation.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#measures-specifying-additional-output-columns>.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub struct Measure {
|
||||
pub expr: Expr,
|
||||
pub alias: Ident,
|
||||
}
|
||||
|
||||
impl fmt::Display for Measure {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{} AS {}", self.expr, self.alias)
|
||||
}
|
||||
}
|
||||
|
||||
/// The rows per match option in a `MATCH_RECOGNIZE` operation.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#row-s-per-match-specifying-the-rows-to-return>.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum RowsPerMatch {
|
||||
/// `ONE ROW PER MATCH`
|
||||
OneRow,
|
||||
/// `ALL ROWS PER MATCH <mode>`
|
||||
AllRows(Option<EmptyMatchesMode>),
|
||||
}
|
||||
|
||||
impl fmt::Display for RowsPerMatch {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
RowsPerMatch::OneRow => write!(f, "ONE ROW PER MATCH"),
|
||||
RowsPerMatch::AllRows(mode) => {
|
||||
write!(f, "ALL ROWS PER MATCH")?;
|
||||
if let Some(mode) = mode {
|
||||
write!(f, " {}", mode)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The after match skip option in a `MATCH_RECOGNIZE` operation.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#after-match-skip-specifying-where-to-continue-after-a-match>.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum AfterMatchSkip {
|
||||
/// `PAST LAST ROW`
|
||||
PastLastRow,
|
||||
/// `TO NEXT ROW`
|
||||
ToNextRow,
|
||||
/// `TO FIRST <symbol>`
|
||||
ToFirst(Ident),
|
||||
/// `TO LAST <symbol>`
|
||||
ToLast(Ident),
|
||||
}
|
||||
|
||||
impl fmt::Display for AfterMatchSkip {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "AFTER MATCH SKIP ")?;
|
||||
match self {
|
||||
AfterMatchSkip::PastLastRow => write!(f, "PAST LAST ROW"),
|
||||
AfterMatchSkip::ToNextRow => write!(f, " TO NEXT ROW"),
|
||||
AfterMatchSkip::ToFirst(symbol) => write!(f, "TO FIRST {symbol}"),
|
||||
AfterMatchSkip::ToLast(symbol) => write!(f, "TO LAST {symbol}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum EmptyMatchesMode {
|
||||
/// `SHOW EMPTY MATCHES`
|
||||
Show,
|
||||
/// `OMIT EMPTY MATCHES`
|
||||
Omit,
|
||||
/// `WITH UNMATCHED ROWS`
|
||||
WithUnmatched,
|
||||
}
|
||||
|
||||
impl fmt::Display for EmptyMatchesMode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
EmptyMatchesMode::Show => write!(f, "SHOW EMPTY MATCHES"),
|
||||
EmptyMatchesMode::Omit => write!(f, "OMIT EMPTY MATCHES"),
|
||||
EmptyMatchesMode::WithUnmatched => write!(f, "WITH UNMATCHED ROWS"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A symbol defined in a `MATCH_RECOGNIZE` operation.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#define-defining-symbols>.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub struct SymbolDefinition {
|
||||
pub symbol: Ident,
|
||||
pub definition: Expr,
|
||||
}
|
||||
|
||||
impl fmt::Display for SymbolDefinition {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{} AS {}", self.symbol, self.definition)
|
||||
}
|
||||
}
|
||||
|
||||
/// A symbol in a `MATCH_RECOGNIZE` pattern.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum MatchRecognizeSymbol {
|
||||
/// A named symbol, e.g. `S1`.
|
||||
Named(Ident),
|
||||
/// A virtual symbol representing the start of the of partition (`^`).
|
||||
Start,
|
||||
/// A virtual symbol representing the end of the partition (`$`).
|
||||
End,
|
||||
}
|
||||
|
||||
impl fmt::Display for MatchRecognizeSymbol {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
MatchRecognizeSymbol::Named(symbol) => write!(f, "{symbol}"),
|
||||
MatchRecognizeSymbol::Start => write!(f, "^"),
|
||||
MatchRecognizeSymbol::End => write!(f, "$"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The pattern in a `MATCH_RECOGNIZE` operation.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#pattern-specifying-the-pattern-to-match>.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum MatchRecognizePattern {
|
||||
/// A named symbol such as `S1` or a virtual symbol such as `^`.
|
||||
Symbol(MatchRecognizeSymbol),
|
||||
/// {- symbol -}
|
||||
Exclude(MatchRecognizeSymbol),
|
||||
/// PERMUTE(symbol_1, ..., symbol_n)
|
||||
Permute(Vec<MatchRecognizeSymbol>),
|
||||
/// pattern_1 pattern_2 ... pattern_n
|
||||
Concat(Vec<MatchRecognizePattern>),
|
||||
/// ( pattern )
|
||||
Group(Box<MatchRecognizePattern>),
|
||||
/// pattern_1 | pattern_2 | ... | pattern_n
|
||||
Alternation(Vec<MatchRecognizePattern>),
|
||||
/// e.g. pattern*
|
||||
Repetition(Box<MatchRecognizePattern>, RepetitionQuantifier),
|
||||
}
|
||||
|
||||
impl fmt::Display for MatchRecognizePattern {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use MatchRecognizePattern::*;
|
||||
match self {
|
||||
Symbol(symbol) => write!(f, "{}", symbol),
|
||||
Exclude(symbol) => write!(f, "{{- {symbol} -}}"),
|
||||
Permute(symbols) => write!(f, "PERMUTE({})", display_comma_separated(symbols)),
|
||||
Concat(patterns) => write!(f, "{}", display_separated(patterns, " ")),
|
||||
Group(pattern) => write!(f, "( {pattern} )"),
|
||||
Alternation(patterns) => write!(f, "{}", display_separated(patterns, " | ")),
|
||||
Repetition(pattern, op) => write!(f, "{pattern}{op}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines the minimum and maximum allowed occurrences of a pattern in a
|
||||
/// `MATCH_RECOGNIZE` operation.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum RepetitionQuantifier {
|
||||
/// `*`
|
||||
ZeroOrMore,
|
||||
/// `+`
|
||||
OneOrMore,
|
||||
/// `?`
|
||||
AtMostOne,
|
||||
/// `{n}`
|
||||
Exactly(u32),
|
||||
/// `{n,}`
|
||||
AtLeast(u32),
|
||||
/// `{,n}`
|
||||
AtMost(u32),
|
||||
/// `{n,m}
|
||||
Range(u32, u32),
|
||||
}
|
||||
|
||||
impl fmt::Display for RepetitionQuantifier {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use RepetitionQuantifier::*;
|
||||
match self {
|
||||
ZeroOrMore => write!(f, "*"),
|
||||
OneOrMore => write!(f, "+"),
|
||||
AtMostOne => write!(f, "?"),
|
||||
Exactly(n) => write!(f, "{{{n}}}"),
|
||||
AtLeast(n) => write!(f, "{{{n},}}"),
|
||||
AtMost(n) => write!(f, "{{,{n}}}"),
|
||||
Range(n, m) => write!(f, "{{{n},{m}}}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TableFactor {
|
||||
|
@ -1005,6 +1237,40 @@ impl fmt::Display for TableFactor {
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
TableFactor::MatchRecognize {
|
||||
table,
|
||||
partition_by,
|
||||
order_by,
|
||||
measures,
|
||||
rows_per_match,
|
||||
after_match_skip,
|
||||
pattern,
|
||||
symbols,
|
||||
alias,
|
||||
} => {
|
||||
write!(f, "{table} MATCH_RECOGNIZE(")?;
|
||||
if !partition_by.is_empty() {
|
||||
write!(f, "PARTITION BY {} ", display_comma_separated(partition_by))?;
|
||||
}
|
||||
if !order_by.is_empty() {
|
||||
write!(f, "ORDER BY {} ", display_comma_separated(order_by))?;
|
||||
}
|
||||
if !measures.is_empty() {
|
||||
write!(f, "MEASURES {} ", display_comma_separated(measures))?;
|
||||
}
|
||||
if let Some(rows_per_match) = rows_per_match {
|
||||
write!(f, "{rows_per_match} ")?;
|
||||
}
|
||||
if let Some(after_match_skip) = after_match_skip {
|
||||
write!(f, "{after_match_skip} ")?;
|
||||
}
|
||||
write!(f, "PATTERN ({pattern}) ")?;
|
||||
write!(f, "DEFINE {})", display_comma_separated(symbols))?;
|
||||
if alias.is_some() {
|
||||
write!(f, " AS {}", alias.as_ref().unwrap())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,10 @@ impl Dialect for GenericDialect {
|
|||
true
|
||||
}
|
||||
|
||||
fn supports_match_recognize(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_start_transaction_modifier(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
|
|
@ -152,6 +152,10 @@ pub trait Dialect: Debug + Any {
|
|||
fn supports_group_by_expr(&self) -> bool {
|
||||
false
|
||||
}
|
||||
/// Returns true if the dialect supports the MATCH_RECOGNIZE operation.
|
||||
fn supports_match_recognize(&self) -> bool {
|
||||
false
|
||||
}
|
||||
/// Returns true if the dialect supports `(NOT) IN ()` expressions
|
||||
fn supports_in_empty_list(&self) -> bool {
|
||||
false
|
||||
|
|
|
@ -55,6 +55,10 @@ impl Dialect for SnowflakeDialect {
|
|||
true
|
||||
}
|
||||
|
||||
fn supports_match_recognize(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
|
||||
if parser.parse_keyword(Keyword::CREATE) {
|
||||
// possibly CREATE STAGE
|
||||
|
|
|
@ -214,6 +214,7 @@ define_keywords!(
|
|||
DEFAULT,
|
||||
DEFERRABLE,
|
||||
DEFERRED,
|
||||
DEFINE,
|
||||
DEFINED,
|
||||
DELAYED,
|
||||
DELETE,
|
||||
|
@ -418,9 +419,12 @@ define_keywords!(
|
|||
MAP,
|
||||
MATCH,
|
||||
MATCHED,
|
||||
MATCHES,
|
||||
MATCH_RECOGNIZE,
|
||||
MATERIALIZED,
|
||||
MAX,
|
||||
MAXVALUE,
|
||||
MEASURES,
|
||||
MEDIUMINT,
|
||||
MEMBER,
|
||||
MERGE,
|
||||
|
@ -482,7 +486,9 @@ define_keywords!(
|
|||
OF,
|
||||
OFFSET,
|
||||
OLD,
|
||||
OMIT,
|
||||
ON,
|
||||
ONE,
|
||||
ONLY,
|
||||
OPEN,
|
||||
OPERATOR,
|
||||
|
@ -509,8 +515,10 @@ define_keywords!(
|
|||
PARTITIONED,
|
||||
PARTITIONS,
|
||||
PASSWORD,
|
||||
PAST,
|
||||
PATH,
|
||||
PATTERN,
|
||||
PER,
|
||||
PERCENT,
|
||||
PERCENTILE_CONT,
|
||||
PERCENTILE_DISC,
|
||||
|
@ -712,6 +720,7 @@ define_keywords!(
|
|||
UNLOAD,
|
||||
UNLOCK,
|
||||
UNLOGGED,
|
||||
UNMATCHED,
|
||||
UNNEST,
|
||||
UNPIVOT,
|
||||
UNSAFE,
|
||||
|
@ -808,6 +817,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
|
|||
Keyword::FOR,
|
||||
// for MYSQL PARTITION SELECTION
|
||||
Keyword::PARTITION,
|
||||
// Reserved for snowflake MATCH_RECOGNIZE
|
||||
Keyword::MATCH_RECOGNIZE,
|
||||
];
|
||||
|
||||
/// Can't be used as a column alias, so that `SELECT <expr> alias`
|
||||
|
|
|
@ -8228,6 +8228,7 @@ impl<'a> Parser<'a> {
|
|||
| TableFactor::TableFunction { alias, .. }
|
||||
| TableFactor::Pivot { alias, .. }
|
||||
| TableFactor::Unpivot { alias, .. }
|
||||
| TableFactor::MatchRecognize { alias, .. }
|
||||
| TableFactor::NestedJoin { alias, .. } => {
|
||||
// but not `FROM (mytable AS alias1) AS alias2`.
|
||||
if let Some(inner_alias) = alias {
|
||||
|
@ -8351,10 +8352,246 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
if self.dialect.supports_match_recognize()
|
||||
&& self.parse_keyword(Keyword::MATCH_RECOGNIZE)
|
||||
{
|
||||
table = self.parse_match_recognize(table)?;
|
||||
}
|
||||
|
||||
Ok(table)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_match_recognize(&mut self, table: TableFactor) -> Result<TableFactor, ParserError> {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
|
||||
let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
|
||||
self.parse_comma_separated(Parser::parse_expr)?
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
|
||||
self.parse_comma_separated(Parser::parse_order_by_expr)?
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let measures = if self.parse_keyword(Keyword::MEASURES) {
|
||||
self.parse_comma_separated(|p| {
|
||||
let expr = p.parse_expr()?;
|
||||
let _ = p.parse_keyword(Keyword::AS);
|
||||
let alias = p.parse_identifier(false)?;
|
||||
Ok(Measure { expr, alias })
|
||||
})?
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let rows_per_match =
|
||||
if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) {
|
||||
Some(RowsPerMatch::OneRow)
|
||||
} else if self.parse_keywords(&[
|
||||
Keyword::ALL,
|
||||
Keyword::ROWS,
|
||||
Keyword::PER,
|
||||
Keyword::MATCH,
|
||||
]) {
|
||||
Some(RowsPerMatch::AllRows(
|
||||
if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) {
|
||||
Some(EmptyMatchesMode::Show)
|
||||
} else if self.parse_keywords(&[
|
||||
Keyword::OMIT,
|
||||
Keyword::EMPTY,
|
||||
Keyword::MATCHES,
|
||||
]) {
|
||||
Some(EmptyMatchesMode::Omit)
|
||||
} else if self.parse_keywords(&[
|
||||
Keyword::WITH,
|
||||
Keyword::UNMATCHED,
|
||||
Keyword::ROWS,
|
||||
]) {
|
||||
Some(EmptyMatchesMode::WithUnmatched)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let after_match_skip =
|
||||
if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) {
|
||||
if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) {
|
||||
Some(AfterMatchSkip::PastLastRow)
|
||||
} else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) {
|
||||
Some(AfterMatchSkip::ToNextRow)
|
||||
} else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) {
|
||||
Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?))
|
||||
} else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) {
|
||||
Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?))
|
||||
} else {
|
||||
let found = self.next_token();
|
||||
return self.expected("after match skip option", found);
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.expect_keyword(Keyword::PATTERN)?;
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let pattern = self.parse_pattern()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
self.expect_keyword(Keyword::DEFINE)?;
|
||||
|
||||
let symbols = self.parse_comma_separated(|p| {
|
||||
let symbol = p.parse_identifier(false)?;
|
||||
p.expect_keyword(Keyword::AS)?;
|
||||
let definition = p.parse_expr()?;
|
||||
Ok(SymbolDefinition { symbol, definition })
|
||||
})?;
|
||||
|
||||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
|
||||
Ok(TableFactor::MatchRecognize {
|
||||
table: Box::new(table),
|
||||
partition_by,
|
||||
order_by,
|
||||
measures,
|
||||
rows_per_match,
|
||||
after_match_skip,
|
||||
pattern,
|
||||
symbols,
|
||||
alias,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_base_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
|
||||
match self.next_token().token {
|
||||
Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)),
|
||||
Token::Placeholder(s) if s == "$" => {
|
||||
Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End))
|
||||
}
|
||||
Token::LBrace => {
|
||||
self.expect_token(&Token::Minus)?;
|
||||
let symbol = self
|
||||
.parse_identifier(false)
|
||||
.map(MatchRecognizeSymbol::Named)?;
|
||||
self.expect_token(&Token::Minus)?;
|
||||
self.expect_token(&Token::RBrace)?;
|
||||
Ok(MatchRecognizePattern::Exclude(symbol))
|
||||
}
|
||||
Token::Word(Word {
|
||||
value,
|
||||
quote_style: None,
|
||||
..
|
||||
}) if value == "PERMUTE" => {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let symbols = self.parse_comma_separated(|p| {
|
||||
p.parse_identifier(false).map(MatchRecognizeSymbol::Named)
|
||||
})?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(MatchRecognizePattern::Permute(symbols))
|
||||
}
|
||||
Token::LParen => {
|
||||
let pattern = self.parse_pattern()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(MatchRecognizePattern::Group(Box::new(pattern)))
|
||||
}
|
||||
_ => {
|
||||
self.prev_token();
|
||||
self.parse_identifier(false)
|
||||
.map(MatchRecognizeSymbol::Named)
|
||||
.map(MatchRecognizePattern::Symbol)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_repetition_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
|
||||
let mut pattern = self.parse_base_pattern()?;
|
||||
loop {
|
||||
let token = self.next_token();
|
||||
let quantifier = match token.token {
|
||||
Token::Mul => RepetitionQuantifier::ZeroOrMore,
|
||||
Token::Plus => RepetitionQuantifier::OneOrMore,
|
||||
Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne,
|
||||
Token::LBrace => {
|
||||
// quantifier is a range like {n} or {n,} or {,m} or {n,m}
|
||||
let token = self.next_token();
|
||||
match token.token {
|
||||
Token::Comma => {
|
||||
let next_token = self.next_token();
|
||||
let Token::Number(n, _) = next_token.token else {
|
||||
return self.expected("literal number", next_token);
|
||||
};
|
||||
self.expect_token(&Token::RBrace)?;
|
||||
RepetitionQuantifier::AtMost(n.parse().expect("literal int"))
|
||||
}
|
||||
Token::Number(n, _) if self.consume_token(&Token::Comma) => {
|
||||
let next_token = self.next_token();
|
||||
match next_token.token {
|
||||
Token::Number(m, _) => {
|
||||
self.expect_token(&Token::RBrace)?;
|
||||
RepetitionQuantifier::Range(
|
||||
n.parse().expect("literal int"),
|
||||
m.parse().expect("literal int"),
|
||||
)
|
||||
}
|
||||
Token::RBrace => {
|
||||
RepetitionQuantifier::AtLeast(n.parse().expect("literal int"))
|
||||
}
|
||||
_ => {
|
||||
return self.expected("} or upper bound", next_token);
|
||||
}
|
||||
}
|
||||
}
|
||||
Token::Number(n, _) => {
|
||||
self.expect_token(&Token::RBrace)?;
|
||||
RepetitionQuantifier::Exactly(n.parse().expect("literal int"))
|
||||
}
|
||||
_ => return self.expected("quantifier range", token),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
self.prev_token();
|
||||
break;
|
||||
}
|
||||
};
|
||||
pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier);
|
||||
}
|
||||
Ok(pattern)
|
||||
}
|
||||
|
||||
fn parse_concat_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
|
||||
let mut patterns = vec![self.parse_repetition_pattern()?];
|
||||
while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) {
|
||||
patterns.push(self.parse_repetition_pattern()?);
|
||||
}
|
||||
match <[MatchRecognizePattern; 1]>::try_from(patterns) {
|
||||
Ok([pattern]) => Ok(pattern),
|
||||
Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
|
||||
let pattern = self.parse_concat_pattern()?;
|
||||
if self.consume_token(&Token::Pipe) {
|
||||
match self.parse_pattern()? {
|
||||
// flatten nested alternations
|
||||
MatchRecognizePattern::Alternation(mut patterns) => {
|
||||
patterns.insert(0, pattern);
|
||||
Ok(MatchRecognizePattern::Alternation(patterns))
|
||||
}
|
||||
next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])),
|
||||
}
|
||||
} else {
|
||||
Ok(pattern)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a given table version specifier.
|
||||
///
|
||||
/// For now it only supports timestamp versioning for BigQuery and MSSQL dialects.
|
||||
|
|
|
@ -307,3 +307,20 @@ pub fn join(relation: TableFactor) -> Join {
|
|||
join_operator: JoinOperator::Inner(JoinConstraint::Natural),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn call(function: &str, args: impl IntoIterator<Item = Expr>) -> Expr {
|
||||
Expr::Function(Function {
|
||||
name: ObjectName(vec![Ident::new(function)]),
|
||||
args: args
|
||||
.into_iter()
|
||||
.map(FunctionArgExpr::Expr)
|
||||
.map(FunctionArg::Unnamed)
|
||||
.collect(),
|
||||
filter: None,
|
||||
null_treatment: None,
|
||||
over: None,
|
||||
distinct: false,
|
||||
special: false,
|
||||
order_by: vec![],
|
||||
})
|
||||
}
|
||||
|
|
|
@ -33,8 +33,8 @@ use sqlparser::keywords::ALL_KEYWORDS;
|
|||
use sqlparser::parser::{Parser, ParserError, ParserOptions};
|
||||
use sqlparser::tokenizer::Tokenizer;
|
||||
use test_utils::{
|
||||
all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, expr_from_projection, join,
|
||||
number, only, table, table_alias, TestedDialects,
|
||||
all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection,
|
||||
join, number, only, table, table_alias, TestedDialects,
|
||||
};
|
||||
|
||||
#[macro_use]
|
||||
|
@ -8887,6 +8887,299 @@ fn parse_map_access_expr() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_recognize() {
|
||||
use MatchRecognizePattern::*;
|
||||
use MatchRecognizeSymbol::*;
|
||||
use RepetitionQuantifier::*;
|
||||
|
||||
let table = TableFactor::Table {
|
||||
name: ObjectName(vec![Ident::new("my_table")]),
|
||||
alias: None,
|
||||
args: None,
|
||||
with_hints: vec![],
|
||||
version: None,
|
||||
partitions: vec![],
|
||||
};
|
||||
|
||||
fn check(options: &str, expect: TableFactor) {
|
||||
let select = all_dialects_where(|d| d.supports_match_recognize()).verified_only_select(
|
||||
&format!("SELECT * FROM my_table MATCH_RECOGNIZE({options})"),
|
||||
);
|
||||
assert_eq!(&select.from[0].relation, &expect);
|
||||
}
|
||||
|
||||
check(
|
||||
concat!(
|
||||
"PARTITION BY company ",
|
||||
"ORDER BY price_date ",
|
||||
"MEASURES ",
|
||||
"MATCH_NUMBER() AS match_number, ",
|
||||
"FIRST(price_date) AS start_date, ",
|
||||
"LAST(price_date) AS end_date ",
|
||||
"ONE ROW PER MATCH ",
|
||||
"AFTER MATCH SKIP TO LAST row_with_price_increase ",
|
||||
"PATTERN (row_before_decrease row_with_price_decrease+ row_with_price_increase+) ",
|
||||
"DEFINE ",
|
||||
"row_with_price_decrease AS price < LAG(price), ",
|
||||
"row_with_price_increase AS price > LAG(price)"
|
||||
),
|
||||
TableFactor::MatchRecognize {
|
||||
table: Box::new(table),
|
||||
partition_by: vec![Expr::Identifier(Ident::new("company"))],
|
||||
order_by: vec![OrderByExpr {
|
||||
expr: Expr::Identifier(Ident::new("price_date")),
|
||||
asc: None,
|
||||
nulls_first: None,
|
||||
}],
|
||||
measures: vec![
|
||||
Measure {
|
||||
expr: call("MATCH_NUMBER", []),
|
||||
alias: Ident::new("match_number"),
|
||||
},
|
||||
Measure {
|
||||
expr: call("FIRST", [Expr::Identifier(Ident::new("price_date"))]),
|
||||
alias: Ident::new("start_date"),
|
||||
},
|
||||
Measure {
|
||||
expr: call("LAST", [Expr::Identifier(Ident::new("price_date"))]),
|
||||
alias: Ident::new("end_date"),
|
||||
},
|
||||
],
|
||||
rows_per_match: Some(RowsPerMatch::OneRow),
|
||||
after_match_skip: Some(AfterMatchSkip::ToLast(Ident::new(
|
||||
"row_with_price_increase",
|
||||
))),
|
||||
pattern: Concat(vec![
|
||||
Symbol(Named(Ident::new("row_before_decrease"))),
|
||||
Repetition(
|
||||
Box::new(Symbol(Named(Ident::new("row_with_price_decrease")))),
|
||||
OneOrMore,
|
||||
),
|
||||
Repetition(
|
||||
Box::new(Symbol(Named(Ident::new("row_with_price_increase")))),
|
||||
OneOrMore,
|
||||
),
|
||||
]),
|
||||
symbols: vec![
|
||||
SymbolDefinition {
|
||||
symbol: Ident::new("row_with_price_decrease"),
|
||||
definition: Expr::BinaryOp {
|
||||
left: Box::new(Expr::Identifier(Ident::new("price"))),
|
||||
op: BinaryOperator::Lt,
|
||||
right: Box::new(call("LAG", [Expr::Identifier(Ident::new("price"))])),
|
||||
},
|
||||
},
|
||||
SymbolDefinition {
|
||||
symbol: Ident::new("row_with_price_increase"),
|
||||
definition: Expr::BinaryOp {
|
||||
left: Box::new(Expr::Identifier(Ident::new("price"))),
|
||||
op: BinaryOperator::Gt,
|
||||
right: Box::new(call("LAG", [Expr::Identifier(Ident::new("price"))])),
|
||||
},
|
||||
},
|
||||
],
|
||||
alias: None,
|
||||
},
|
||||
);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let examples = [
|
||||
concat!(
|
||||
"SELECT * ",
|
||||
"FROM login_attempts ",
|
||||
"MATCH_RECOGNIZE(",
|
||||
"PARTITION BY user_id ",
|
||||
"ORDER BY timestamp ",
|
||||
"PATTERN (failed_attempt{3,}) ",
|
||||
"DEFINE ",
|
||||
"failed_attempt AS status = 'failure'",
|
||||
")",
|
||||
),
|
||||
concat!(
|
||||
"SELECT * ",
|
||||
"FROM stock_transactions ",
|
||||
"MATCH_RECOGNIZE(",
|
||||
"PARTITION BY symbol ",
|
||||
"ORDER BY timestamp ",
|
||||
"MEASURES ",
|
||||
"FIRST(price) AS start_price, ",
|
||||
"LAST(price) AS end_price, ",
|
||||
"MATCH_NUMBER() AS match_num ",
|
||||
"ALL ROWS PER MATCH ",
|
||||
"PATTERN (STRT UP+) ",
|
||||
"DEFINE ",
|
||||
"UP AS price > PREV(price)",
|
||||
")",
|
||||
),
|
||||
concat!(
|
||||
"SELECT * ",
|
||||
"FROM event_log ",
|
||||
"MATCH_RECOGNIZE(",
|
||||
"MEASURES ",
|
||||
"FIRST(event_type) AS start_event, ",
|
||||
"LAST(event_type) AS end_event, ",
|
||||
"COUNT(*) AS error_count ",
|
||||
"ALL ROWS PER MATCH ",
|
||||
"PATTERN (STRT ERROR+ END) ",
|
||||
"DEFINE ",
|
||||
"STRT AS event_type = 'START', ",
|
||||
"ERROR AS event_type = 'ERROR', ",
|
||||
"END AS event_type = 'END'",
|
||||
")",
|
||||
)
|
||||
];
|
||||
|
||||
for sql in examples {
|
||||
all_dialects_where(|d| d.supports_match_recognize()).verified_query(sql);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_recognize_patterns() {
|
||||
use MatchRecognizePattern::*;
|
||||
use MatchRecognizeSymbol::*;
|
||||
use RepetitionQuantifier::*;
|
||||
|
||||
fn check(pattern: &str, expect: MatchRecognizePattern) {
|
||||
let select =
|
||||
all_dialects_where(|d| d.supports_match_recognize()).verified_only_select(&format!(
|
||||
"SELECT * FROM my_table MATCH_RECOGNIZE(PATTERN ({pattern}) DEFINE DUMMY AS true)" // "select * from my_table match_recognize ("
|
||||
));
|
||||
let TableFactor::MatchRecognize {
|
||||
pattern: actual, ..
|
||||
} = &select.from[0].relation
|
||||
else {
|
||||
panic!("expected match_recognize table factor");
|
||||
};
|
||||
assert_eq!(actual, &expect);
|
||||
}
|
||||
|
||||
// just a symbol
|
||||
check("FOO", Symbol(Named(Ident::new("FOO"))));
|
||||
|
||||
// just a symbol
|
||||
check(
|
||||
"^ FOO $",
|
||||
Concat(vec![
|
||||
Symbol(Start),
|
||||
Symbol(Named(Ident::new("FOO"))),
|
||||
Symbol(End),
|
||||
]),
|
||||
);
|
||||
|
||||
// exclusion
|
||||
check("{- FOO -}", Exclude(Named(Ident::new("FOO"))));
|
||||
|
||||
check(
|
||||
"PERMUTE(A, B, C)",
|
||||
Permute(vec![
|
||||
Named(Ident::new("A")),
|
||||
Named(Ident::new("B")),
|
||||
Named(Ident::new("C")),
|
||||
]),
|
||||
);
|
||||
|
||||
// various identifiers
|
||||
check(
|
||||
"FOO | \"BAR\" | baz42",
|
||||
Alternation(vec![
|
||||
Symbol(Named(Ident::new("FOO"))),
|
||||
Symbol(Named(Ident::with_quote('"', "BAR"))),
|
||||
Symbol(Named(Ident::new("baz42"))),
|
||||
]),
|
||||
);
|
||||
|
||||
// concatenated basic quantifiers
|
||||
check(
|
||||
"S1* S2+ S3?",
|
||||
Concat(vec![
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S1")))), ZeroOrMore),
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S2")))), OneOrMore),
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S3")))), AtMostOne),
|
||||
]),
|
||||
);
|
||||
|
||||
// double repetition
|
||||
check(
|
||||
"S2*?",
|
||||
Repetition(
|
||||
Box::new(Repetition(
|
||||
Box::new(Symbol(Named(Ident::new("S2")))),
|
||||
ZeroOrMore,
|
||||
)),
|
||||
AtMostOne,
|
||||
),
|
||||
);
|
||||
|
||||
// range quantifiers in an alternation
|
||||
check(
|
||||
"S1{1} | S2{2,3} | S3{4,} | S4{,5}",
|
||||
Alternation(vec![
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S1")))), Exactly(1)),
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S2")))), Range(2, 3)),
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S3")))), AtLeast(4)),
|
||||
Repetition(Box::new(Symbol(Named(Ident::new("S4")))), AtMost(5)),
|
||||
]),
|
||||
);
|
||||
|
||||
// grouping case 1
|
||||
check(
|
||||
"S1 ( S2 )",
|
||||
Concat(vec![
|
||||
Symbol(Named(Ident::new("S1"))),
|
||||
Group(Box::new(Symbol(Named(Ident::new("S2"))))),
|
||||
]),
|
||||
);
|
||||
|
||||
// grouping case 2
|
||||
check(
|
||||
"( {- S3 -} S4 )+",
|
||||
Repetition(
|
||||
Box::new(Group(Box::new(Concat(vec![
|
||||
Exclude(Named(Ident::new("S3"))),
|
||||
Symbol(Named(Ident::new("S4"))),
|
||||
])))),
|
||||
OneOrMore,
|
||||
),
|
||||
);
|
||||
|
||||
// the grand finale (example taken from snowflake docs)
|
||||
check(
|
||||
"^ S1 S2*? ( {- S3 -} S4 )+ | PERMUTE(S1, S2){1,2} $",
|
||||
Alternation(vec![
|
||||
Concat(vec![
|
||||
Symbol(Start),
|
||||
Symbol(Named(Ident::new("S1"))),
|
||||
Repetition(
|
||||
Box::new(Repetition(
|
||||
Box::new(Symbol(Named(Ident::new("S2")))),
|
||||
ZeroOrMore,
|
||||
)),
|
||||
AtMostOne,
|
||||
),
|
||||
Repetition(
|
||||
Box::new(Group(Box::new(Concat(vec![
|
||||
Exclude(Named(Ident::new("S3"))),
|
||||
Symbol(Named(Ident::new("S4"))),
|
||||
])))),
|
||||
OneOrMore,
|
||||
),
|
||||
]),
|
||||
Concat(vec![
|
||||
Repetition(
|
||||
Box::new(Permute(vec![
|
||||
Named(Ident::new("S1")),
|
||||
Named(Ident::new("S2")),
|
||||
])),
|
||||
Range(1, 2),
|
||||
),
|
||||
Symbol(End),
|
||||
]),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_select_wildcard_with_replace() {
|
||||
let sql = r#"SELECT * REPLACE (lower(city) AS city) FROM addresses"#;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue