Support Snowflake MATCH_RECOGNIZE syntax (#1222)

This commit is contained in:
Joey Hain 2024-04-22 13:17:50 -07:00 committed by GitHub
parent bf89b7d808
commit 39980e8976
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 847 additions and 9 deletions

View file

@ -8228,6 +8228,7 @@ impl<'a> Parser<'a> {
| TableFactor::TableFunction { alias, .. }
| TableFactor::Pivot { alias, .. }
| TableFactor::Unpivot { alias, .. }
| TableFactor::MatchRecognize { alias, .. }
| TableFactor::NestedJoin { alias, .. } => {
// but not `FROM (mytable AS alias1) AS alias2`.
if let Some(inner_alias) = alias {
@ -8351,10 +8352,246 @@ impl<'a> Parser<'a> {
}
}
if self.dialect.supports_match_recognize()
&& self.parse_keyword(Keyword::MATCH_RECOGNIZE)
{
table = self.parse_match_recognize(table)?;
}
Ok(table)
}
}
fn parse_match_recognize(&mut self, table: TableFactor) -> Result<TableFactor, ParserError> {
self.expect_token(&Token::LParen)?;
let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_order_by_expr)?
} else {
vec![]
};
let measures = if self.parse_keyword(Keyword::MEASURES) {
self.parse_comma_separated(|p| {
let expr = p.parse_expr()?;
let _ = p.parse_keyword(Keyword::AS);
let alias = p.parse_identifier(false)?;
Ok(Measure { expr, alias })
})?
} else {
vec![]
};
let rows_per_match =
if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) {
Some(RowsPerMatch::OneRow)
} else if self.parse_keywords(&[
Keyword::ALL,
Keyword::ROWS,
Keyword::PER,
Keyword::MATCH,
]) {
Some(RowsPerMatch::AllRows(
if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) {
Some(EmptyMatchesMode::Show)
} else if self.parse_keywords(&[
Keyword::OMIT,
Keyword::EMPTY,
Keyword::MATCHES,
]) {
Some(EmptyMatchesMode::Omit)
} else if self.parse_keywords(&[
Keyword::WITH,
Keyword::UNMATCHED,
Keyword::ROWS,
]) {
Some(EmptyMatchesMode::WithUnmatched)
} else {
None
},
))
} else {
None
};
let after_match_skip =
if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) {
if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) {
Some(AfterMatchSkip::PastLastRow)
} else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) {
Some(AfterMatchSkip::ToNextRow)
} else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) {
Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?))
} else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) {
Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?))
} else {
let found = self.next_token();
return self.expected("after match skip option", found);
}
} else {
None
};
self.expect_keyword(Keyword::PATTERN)?;
self.expect_token(&Token::LParen)?;
let pattern = self.parse_pattern()?;
self.expect_token(&Token::RParen)?;
self.expect_keyword(Keyword::DEFINE)?;
let symbols = self.parse_comma_separated(|p| {
let symbol = p.parse_identifier(false)?;
p.expect_keyword(Keyword::AS)?;
let definition = p.parse_expr()?;
Ok(SymbolDefinition { symbol, definition })
})?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
Ok(TableFactor::MatchRecognize {
table: Box::new(table),
partition_by,
order_by,
measures,
rows_per_match,
after_match_skip,
pattern,
symbols,
alias,
})
}
fn parse_base_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
match self.next_token().token {
Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)),
Token::Placeholder(s) if s == "$" => {
Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End))
}
Token::LBrace => {
self.expect_token(&Token::Minus)?;
let symbol = self
.parse_identifier(false)
.map(MatchRecognizeSymbol::Named)?;
self.expect_token(&Token::Minus)?;
self.expect_token(&Token::RBrace)?;
Ok(MatchRecognizePattern::Exclude(symbol))
}
Token::Word(Word {
value,
quote_style: None,
..
}) if value == "PERMUTE" => {
self.expect_token(&Token::LParen)?;
let symbols = self.parse_comma_separated(|p| {
p.parse_identifier(false).map(MatchRecognizeSymbol::Named)
})?;
self.expect_token(&Token::RParen)?;
Ok(MatchRecognizePattern::Permute(symbols))
}
Token::LParen => {
let pattern = self.parse_pattern()?;
self.expect_token(&Token::RParen)?;
Ok(MatchRecognizePattern::Group(Box::new(pattern)))
}
_ => {
self.prev_token();
self.parse_identifier(false)
.map(MatchRecognizeSymbol::Named)
.map(MatchRecognizePattern::Symbol)
}
}
}
fn parse_repetition_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
let mut pattern = self.parse_base_pattern()?;
loop {
let token = self.next_token();
let quantifier = match token.token {
Token::Mul => RepetitionQuantifier::ZeroOrMore,
Token::Plus => RepetitionQuantifier::OneOrMore,
Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne,
Token::LBrace => {
// quantifier is a range like {n} or {n,} or {,m} or {n,m}
let token = self.next_token();
match token.token {
Token::Comma => {
let next_token = self.next_token();
let Token::Number(n, _) = next_token.token else {
return self.expected("literal number", next_token);
};
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::AtMost(n.parse().expect("literal int"))
}
Token::Number(n, _) if self.consume_token(&Token::Comma) => {
let next_token = self.next_token();
match next_token.token {
Token::Number(m, _) => {
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::Range(
n.parse().expect("literal int"),
m.parse().expect("literal int"),
)
}
Token::RBrace => {
RepetitionQuantifier::AtLeast(n.parse().expect("literal int"))
}
_ => {
return self.expected("} or upper bound", next_token);
}
}
}
Token::Number(n, _) => {
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::Exactly(n.parse().expect("literal int"))
}
_ => return self.expected("quantifier range", token),
}
}
_ => {
self.prev_token();
break;
}
};
pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier);
}
Ok(pattern)
}
fn parse_concat_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
let mut patterns = vec![self.parse_repetition_pattern()?];
while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) {
patterns.push(self.parse_repetition_pattern()?);
}
match <[MatchRecognizePattern; 1]>::try_from(patterns) {
Ok([pattern]) => Ok(pattern),
Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)),
}
}
fn parse_pattern(&mut self) -> Result<MatchRecognizePattern, ParserError> {
let pattern = self.parse_concat_pattern()?;
if self.consume_token(&Token::Pipe) {
match self.parse_pattern()? {
// flatten nested alternations
MatchRecognizePattern::Alternation(mut patterns) => {
patterns.insert(0, pattern);
Ok(MatchRecognizePattern::Alternation(patterns))
}
next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])),
}
} else {
Ok(pattern)
}
}
/// Parse a given table version specifier.
///
/// For now it only supports timestamp versioning for BigQuery and MSSQL dialects.