From aab12add36bfb4dfd60c2ba38682b503cc248199 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Mon, 24 Feb 2025 08:34:36 +0100 Subject: [PATCH] BigQuery: Add support for `BEGIN` (#1718) --- src/ast/mod.rs | 43 ++++++++++++++++++++++++++++ src/dialect/bigquery.rs | 57 +++++++++++++++++++++++++++++++++++-- src/parser/mod.rs | 27 ++++++++++++++++++ tests/sqlparser_bigquery.rs | 46 ++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 33 ++++++++++++--------- tests/sqlparser_sqlite.rs | 17 ----------- 6 files changed, 190 insertions(+), 33 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 649b1f79..aad122fb 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3072,6 +3072,28 @@ pub enum Statement { begin: bool, transaction: Option, modifier: Option, + /// List of statements belonging to the `BEGIN` block. + /// Example: + /// ```sql + /// BEGIN + /// SELECT 1; + /// SELECT 2; + /// END; + /// ``` + statements: Vec, + /// Statements of an exception clause. + /// Example: + /// ```sql + /// BEGIN + /// SELECT 1; + /// EXCEPTION WHEN ERROR THEN + /// SELECT 2; + /// SELECT 3; + /// END; + /// + exception_statements: Option>, + /// TRUE if the statement has an `END` keyword. + has_end_keyword: bool, }, /// ```sql /// SET TRANSACTION ... @@ -4815,6 +4837,9 @@ impl fmt::Display for Statement { begin: syntax_begin, transaction, modifier, + statements, + exception_statements, + has_end_keyword, } => { if *syntax_begin { if let Some(modifier) = *modifier { @@ -4831,6 +4856,24 @@ impl fmt::Display for Statement { if !modes.is_empty() { write!(f, " {}", display_comma_separated(modes))?; } + if !statements.is_empty() { + write!(f, " {}", display_separated(statements, "; "))?; + // We manually insert semicolon for the last statement, + // since display_separated doesn't handle that case. + write!(f, ";")?; + } + if let Some(exception_statements) = exception_statements { + write!(f, " EXCEPTION WHEN ERROR THEN")?; + if !exception_statements.is_empty() { + write!(f, " {}", display_separated(exception_statements, "; "))?; + // We manually insert semicolon for the last statement, + // since display_separated doesn't handle that case. + write!(f, ";")?; + } + } + if *has_end_keyword { + write!(f, " END")?; + } Ok(()) } Statement::SetTransaction { diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index b8e7e4cf..49fb24f1 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. +use crate::ast::Statement; use crate::dialect::Dialect; use crate::keywords::Keyword; -use crate::parser::Parser; +use crate::parser::{Parser, ParserError}; /// These keywords are disallowed as column identifiers. Such that /// `SELECT 5 AS FROM T` is rejected by BigQuery. @@ -44,7 +45,11 @@ const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ pub struct BigQueryDialect; impl Dialect for BigQueryDialect { - // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers + fn parse_statement(&self, parser: &mut Parser) -> Option> { + self.maybe_parse_statement(parser) + } + + /// See fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '`' } @@ -60,6 +65,9 @@ impl Dialect for BigQueryDialect { fn is_identifier_start(&self, ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' + // BigQuery supports `@@foo.bar` variable syntax in its procedural language. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend + || ch == '@' } fn is_identifier_part(&self, ch: char) -> bool { @@ -129,3 +137,48 @@ impl Dialect for BigQueryDialect { !RESERVED_FOR_COLUMN_ALIAS.contains(kw) } } + +impl BigQueryDialect { + fn maybe_parse_statement(&self, parser: &mut Parser) -> Option> { + if parser.peek_keyword(Keyword::BEGIN) { + return Some(self.parse_begin(parser)); + } + None + } + + /// Parse a `BEGIN` statement. + /// + fn parse_begin(&self, parser: &mut Parser) -> Result { + parser.expect_keyword(Keyword::BEGIN)?; + + let statements = parser.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?; + + let has_exception_when_clause = parser.parse_keywords(&[ + Keyword::EXCEPTION, + Keyword::WHEN, + Keyword::ERROR, + Keyword::THEN, + ]); + let exception_statements = if has_exception_when_clause { + if !parser.peek_keyword(Keyword::END) { + Some(parser.parse_statement_list(&[Keyword::END])?) + } else { + Some(Default::default()) + } + } else { + None + }; + + parser.expect_keyword(Keyword::END)?; + + Ok(Statement::StartTransaction { + begin: true, + statements, + exception_statements, + has_end_keyword: true, + transaction: None, + modifier: None, + modes: Default::default(), + }) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e40f4d58..c08c7049 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4273,6 +4273,27 @@ impl<'a> Parser<'a> { self.parse_comma_separated(f) } + /// Parses 0 or more statements, each followed by a semicolon. + /// If the next token is any of `terminal_keywords` then no more + /// statements will be parsed. + pub(crate) fn parse_statement_list( + &mut self, + terminal_keywords: &[Keyword], + ) -> Result, ParserError> { + let mut values = vec![]; + loop { + if let Token::Word(w) = &self.peek_nth_token_ref(0).token { + if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) { + break; + } + } + + values.push(self.parse_statement()?); + self.expect_token(&Token::SemiColon)?; + } + Ok(values) + } + /// Default implementation of a predicate that returns true if /// the specified keyword is reserved for column alias. /// See [Dialect::is_column_alias] @@ -13783,6 +13804,9 @@ impl<'a> Parser<'a> { begin: false, transaction: Some(BeginTransactionKind::Transaction), modifier: None, + statements: vec![], + exception_statements: None, + has_end_keyword: false, }) } @@ -13812,6 +13836,9 @@ impl<'a> Parser<'a> { begin: true, transaction, modifier, + statements: vec![], + exception_statements: None, + has_end_keyword: false, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 52aa3b3b..55e35422 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -236,6 +236,52 @@ fn parse_big_query_non_reserved_column_alias() { bigquery().verified_stmt(sql); } +#[test] +fn parse_at_at_identifier() { + bigquery().verified_stmt("SELECT @@error.stack_trace, @@error.message"); +} + +#[test] +fn parse_begin() { + let sql = r#"BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2; END"#; + let Statement::StartTransaction { + statements, + exception_statements, + has_end_keyword, + .. + } = bigquery().verified_stmt(sql) + else { + unreachable!(); + }; + assert_eq!(1, statements.len()); + assert_eq!(1, exception_statements.unwrap().len()); + assert!(has_end_keyword); + + bigquery().verified_stmt( + "BEGIN SELECT 1; SELECT 2; EXCEPTION WHEN ERROR THEN SELECT 2; SELECT 4; END", + ); + bigquery() + .verified_stmt("BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT @@error.stack_trace; END"); + bigquery().verified_stmt("BEGIN EXCEPTION WHEN ERROR THEN SELECT 2; END"); + bigquery().verified_stmt("BEGIN SELECT 1; SELECT 2; EXCEPTION WHEN ERROR THEN END"); + bigquery().verified_stmt("BEGIN EXCEPTION WHEN ERROR THEN END"); + bigquery().verified_stmt("BEGIN SELECT 1; SELECT 2; END"); + bigquery().verified_stmt("BEGIN END"); + + assert_eq!( + bigquery() + .parse_sql_statements("BEGIN SELECT 1; SELECT 2 END") + .unwrap_err(), + ParserError::ParserError("Expected: ;, found: END".to_string()) + ); + assert_eq!( + bigquery() + .parse_sql_statements("BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2 END") + .unwrap_err(), + ParserError::ParserError("Expected: ;, found: END".to_string()) + ); +} + #[test] fn parse_delete_statement() { let sql = "DELETE \"table\" WHERE 1"; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 578c42de..0072baf7 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8343,7 +8343,12 @@ fn lateral_function() { #[test] fn parse_start_transaction() { - match verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") { + let dialects = all_dialects_except(|d| + // BigQuery does not support this syntax + d.is::()); + match dialects + .verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") + { Statement::StartTransaction { modes, .. } => assert_eq!( modes, vec![ @@ -8357,7 +8362,7 @@ fn parse_start_transaction() { // For historical reasons, PostgreSQL allows the commas between the modes to // be omitted. - match one_statement_parses_to( + match dialects.one_statement_parses_to( "START TRANSACTION READ ONLY READ WRITE ISOLATION LEVEL SERIALIZABLE", "START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE", ) { @@ -8372,40 +8377,40 @@ fn parse_start_transaction() { _ => unreachable!(), } - verified_stmt("START TRANSACTION"); - verified_stmt("BEGIN"); - verified_stmt("BEGIN WORK"); - verified_stmt("BEGIN TRANSACTION"); + dialects.verified_stmt("START TRANSACTION"); + dialects.verified_stmt("BEGIN"); + dialects.verified_stmt("BEGIN WORK"); + dialects.verified_stmt("BEGIN TRANSACTION"); - verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); - verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED"); - verified_stmt("START TRANSACTION ISOLATION LEVEL REPEATABLE READ"); - verified_stmt("START TRANSACTION ISOLATION LEVEL SERIALIZABLE"); + dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); + dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED"); + dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL REPEATABLE READ"); + dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL SERIALIZABLE"); // Regression test for https://github.com/sqlparser-rs/sqlparser-rs/pull/139, // in which START TRANSACTION would fail to parse if followed by a statement // terminator. assert_eq!( - parse_sql_statements("START TRANSACTION; SELECT 1"), + dialects.parse_sql_statements("START TRANSACTION; SELECT 1"), Ok(vec![ verified_stmt("START TRANSACTION"), verified_stmt("SELECT 1"), ]) ); - let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); + let res = dialects.parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); assert_eq!( ParserError::ParserError("Expected: isolation level, found: BAD".to_string()), res.unwrap_err() ); - let res = parse_sql_statements("START TRANSACTION BAD"); + let res = dialects.parse_sql_statements("START TRANSACTION BAD"); assert_eq!( ParserError::ParserError("Expected: end of statement, found: BAD".to_string()), res.unwrap_err() ); - let res = parse_sql_statements("START TRANSACTION READ ONLY,"); + let res = dialects.parse_sql_statements("START TRANSACTION READ ONLY,"); assert_eq!( ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()), res.unwrap_err() diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index c1774330..17dcfed8 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -518,23 +518,6 @@ fn parse_start_transaction_with_modifier() { sqlite_and_generic().verified_stmt("BEGIN DEFERRED"); sqlite_and_generic().verified_stmt("BEGIN IMMEDIATE"); sqlite_and_generic().verified_stmt("BEGIN EXCLUSIVE"); - - let unsupported_dialects = all_dialects_except(|d| d.supports_start_transaction_modifier()); - let res = unsupported_dialects.parse_sql_statements("BEGIN DEFERRED"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: DEFERRED".to_string()), - res.unwrap_err(), - ); - let res = unsupported_dialects.parse_sql_statements("BEGIN IMMEDIATE"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: IMMEDIATE".to_string()), - res.unwrap_err(), - ); - let res = unsupported_dialects.parse_sql_statements("BEGIN EXCLUSIVE"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: EXCLUSIVE".to_string()), - res.unwrap_err(), - ); } #[test]