BigQuery: Add support for BEGIN (#1718)

This commit is contained in:
Ifeanyi Ubah 2025-02-24 08:34:36 +01:00 committed by GitHub
parent 72312ba82a
commit aab12add36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 190 additions and 33 deletions

View file

@ -3072,6 +3072,28 @@ pub enum Statement {
begin: bool,
transaction: Option<BeginTransactionKind>,
modifier: Option<TransactionModifier>,
/// List of statements belonging to the `BEGIN` block.
/// Example:
/// ```sql
/// BEGIN
/// SELECT 1;
/// SELECT 2;
/// END;
/// ```
statements: Vec<Statement>,
/// Statements of an exception clause.
/// Example:
/// ```sql
/// BEGIN
/// SELECT 1;
/// EXCEPTION WHEN ERROR THEN
/// SELECT 2;
/// SELECT 3;
/// END;
/// <https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend>
exception_statements: Option<Vec<Statement>>,
/// TRUE if the statement has an `END` keyword.
has_end_keyword: bool,
},
/// ```sql
/// SET TRANSACTION ...
@ -4815,6 +4837,9 @@ impl fmt::Display for Statement {
begin: syntax_begin,
transaction,
modifier,
statements,
exception_statements,
has_end_keyword,
} => {
if *syntax_begin {
if let Some(modifier) = *modifier {
@ -4831,6 +4856,24 @@ impl fmt::Display for Statement {
if !modes.is_empty() {
write!(f, " {}", display_comma_separated(modes))?;
}
if !statements.is_empty() {
write!(f, " {}", display_separated(statements, "; "))?;
// We manually insert semicolon for the last statement,
// since display_separated doesn't handle that case.
write!(f, ";")?;
}
if let Some(exception_statements) = exception_statements {
write!(f, " EXCEPTION WHEN ERROR THEN")?;
if !exception_statements.is_empty() {
write!(f, " {}", display_separated(exception_statements, "; "))?;
// We manually insert semicolon for the last statement,
// since display_separated doesn't handle that case.
write!(f, ";")?;
}
}
if *has_end_keyword {
write!(f, " END")?;
}
Ok(())
}
Statement::SetTransaction {

View file

@ -15,9 +15,10 @@
// specific language governing permissions and limitations
// under the License.
use crate::ast::Statement;
use crate::dialect::Dialect;
use crate::keywords::Keyword;
use crate::parser::Parser;
use crate::parser::{Parser, ParserError};
/// These keywords are disallowed as column identifiers. Such that
/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
@ -44,7 +45,11 @@ const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
pub struct BigQueryDialect;
impl Dialect for BigQueryDialect {
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
self.maybe_parse_statement(parser)
}
/// See <https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers>
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '`'
}
@ -60,6 +65,9 @@ impl Dialect for BigQueryDialect {
fn is_identifier_start(&self, ch: char) -> bool {
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
// BigQuery supports `@@foo.bar` variable syntax in its procedural language.
// https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend
|| ch == '@'
}
fn is_identifier_part(&self, ch: char) -> bool {
@ -129,3 +137,48 @@ impl Dialect for BigQueryDialect {
!RESERVED_FOR_COLUMN_ALIAS.contains(kw)
}
}
impl BigQueryDialect {
fn maybe_parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
if parser.peek_keyword(Keyword::BEGIN) {
return Some(self.parse_begin(parser));
}
None
}
/// Parse a `BEGIN` statement.
/// <https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend>
fn parse_begin(&self, parser: &mut Parser) -> Result<Statement, ParserError> {
parser.expect_keyword(Keyword::BEGIN)?;
let statements = parser.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?;
let has_exception_when_clause = parser.parse_keywords(&[
Keyword::EXCEPTION,
Keyword::WHEN,
Keyword::ERROR,
Keyword::THEN,
]);
let exception_statements = if has_exception_when_clause {
if !parser.peek_keyword(Keyword::END) {
Some(parser.parse_statement_list(&[Keyword::END])?)
} else {
Some(Default::default())
}
} else {
None
};
parser.expect_keyword(Keyword::END)?;
Ok(Statement::StartTransaction {
begin: true,
statements,
exception_statements,
has_end_keyword: true,
transaction: None,
modifier: None,
modes: Default::default(),
})
}
}

View file

@ -4273,6 +4273,27 @@ impl<'a> Parser<'a> {
self.parse_comma_separated(f)
}
/// Parses 0 or more statements, each followed by a semicolon.
/// If the next token is any of `terminal_keywords` then no more
/// statements will be parsed.
pub(crate) fn parse_statement_list(
&mut self,
terminal_keywords: &[Keyword],
) -> Result<Vec<Statement>, ParserError> {
let mut values = vec![];
loop {
if let Token::Word(w) = &self.peek_nth_token_ref(0).token {
if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) {
break;
}
}
values.push(self.parse_statement()?);
self.expect_token(&Token::SemiColon)?;
}
Ok(values)
}
/// Default implementation of a predicate that returns true if
/// the specified keyword is reserved for column alias.
/// See [Dialect::is_column_alias]
@ -13783,6 +13804,9 @@ impl<'a> Parser<'a> {
begin: false,
transaction: Some(BeginTransactionKind::Transaction),
modifier: None,
statements: vec![],
exception_statements: None,
has_end_keyword: false,
})
}
@ -13812,6 +13836,9 @@ impl<'a> Parser<'a> {
begin: true,
transaction,
modifier,
statements: vec![],
exception_statements: None,
has_end_keyword: false,
})
}

View file

@ -236,6 +236,52 @@ fn parse_big_query_non_reserved_column_alias() {
bigquery().verified_stmt(sql);
}
#[test]
fn parse_at_at_identifier() {
bigquery().verified_stmt("SELECT @@error.stack_trace, @@error.message");
}
#[test]
fn parse_begin() {
let sql = r#"BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2; END"#;
let Statement::StartTransaction {
statements,
exception_statements,
has_end_keyword,
..
} = bigquery().verified_stmt(sql)
else {
unreachable!();
};
assert_eq!(1, statements.len());
assert_eq!(1, exception_statements.unwrap().len());
assert!(has_end_keyword);
bigquery().verified_stmt(
"BEGIN SELECT 1; SELECT 2; EXCEPTION WHEN ERROR THEN SELECT 2; SELECT 4; END",
);
bigquery()
.verified_stmt("BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT @@error.stack_trace; END");
bigquery().verified_stmt("BEGIN EXCEPTION WHEN ERROR THEN SELECT 2; END");
bigquery().verified_stmt("BEGIN SELECT 1; SELECT 2; EXCEPTION WHEN ERROR THEN END");
bigquery().verified_stmt("BEGIN EXCEPTION WHEN ERROR THEN END");
bigquery().verified_stmt("BEGIN SELECT 1; SELECT 2; END");
bigquery().verified_stmt("BEGIN END");
assert_eq!(
bigquery()
.parse_sql_statements("BEGIN SELECT 1; SELECT 2 END")
.unwrap_err(),
ParserError::ParserError("Expected: ;, found: END".to_string())
);
assert_eq!(
bigquery()
.parse_sql_statements("BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2 END")
.unwrap_err(),
ParserError::ParserError("Expected: ;, found: END".to_string())
);
}
#[test]
fn parse_delete_statement() {
let sql = "DELETE \"table\" WHERE 1";

View file

@ -8343,7 +8343,12 @@ fn lateral_function() {
#[test]
fn parse_start_transaction() {
match verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") {
let dialects = all_dialects_except(|d|
// BigQuery does not support this syntax
d.is::<BigQueryDialect>());
match dialects
.verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE")
{
Statement::StartTransaction { modes, .. } => assert_eq!(
modes,
vec![
@ -8357,7 +8362,7 @@ fn parse_start_transaction() {
// For historical reasons, PostgreSQL allows the commas between the modes to
// be omitted.
match one_statement_parses_to(
match dialects.one_statement_parses_to(
"START TRANSACTION READ ONLY READ WRITE ISOLATION LEVEL SERIALIZABLE",
"START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE",
) {
@ -8372,40 +8377,40 @@ fn parse_start_transaction() {
_ => unreachable!(),
}
verified_stmt("START TRANSACTION");
verified_stmt("BEGIN");
verified_stmt("BEGIN WORK");
verified_stmt("BEGIN TRANSACTION");
dialects.verified_stmt("START TRANSACTION");
dialects.verified_stmt("BEGIN");
dialects.verified_stmt("BEGIN WORK");
dialects.verified_stmt("BEGIN TRANSACTION");
verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED");
verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED");
verified_stmt("START TRANSACTION ISOLATION LEVEL REPEATABLE READ");
verified_stmt("START TRANSACTION ISOLATION LEVEL SERIALIZABLE");
dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED");
dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED");
dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL REPEATABLE READ");
dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL SERIALIZABLE");
// Regression test for https://github.com/sqlparser-rs/sqlparser-rs/pull/139,
// in which START TRANSACTION would fail to parse if followed by a statement
// terminator.
assert_eq!(
parse_sql_statements("START TRANSACTION; SELECT 1"),
dialects.parse_sql_statements("START TRANSACTION; SELECT 1"),
Ok(vec![
verified_stmt("START TRANSACTION"),
verified_stmt("SELECT 1"),
])
);
let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD");
let res = dialects.parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD");
assert_eq!(
ParserError::ParserError("Expected: isolation level, found: BAD".to_string()),
res.unwrap_err()
);
let res = parse_sql_statements("START TRANSACTION BAD");
let res = dialects.parse_sql_statements("START TRANSACTION BAD");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: BAD".to_string()),
res.unwrap_err()
);
let res = parse_sql_statements("START TRANSACTION READ ONLY,");
let res = dialects.parse_sql_statements("START TRANSACTION READ ONLY,");
assert_eq!(
ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()),
res.unwrap_err()

View file

@ -518,23 +518,6 @@ fn parse_start_transaction_with_modifier() {
sqlite_and_generic().verified_stmt("BEGIN DEFERRED");
sqlite_and_generic().verified_stmt("BEGIN IMMEDIATE");
sqlite_and_generic().verified_stmt("BEGIN EXCLUSIVE");
let unsupported_dialects = all_dialects_except(|d| d.supports_start_transaction_modifier());
let res = unsupported_dialects.parse_sql_statements("BEGIN DEFERRED");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: DEFERRED".to_string()),
res.unwrap_err(),
);
let res = unsupported_dialects.parse_sql_statements("BEGIN IMMEDIATE");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: IMMEDIATE".to_string()),
res.unwrap_err(),
);
let res = unsupported_dialects.parse_sql_statements("BEGIN EXCLUSIVE");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: EXCLUSIVE".to_string()),
res.unwrap_err(),
);
}
#[test]