BigQuery: Add support for BEGIN (#1718)

This commit is contained in:
Ifeanyi Ubah 2025-02-24 08:34:36 +01:00 committed by GitHub
parent 72312ba82a
commit aab12add36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 190 additions and 33 deletions

View file

@ -3072,6 +3072,28 @@ pub enum Statement {
begin: bool, begin: bool,
transaction: Option<BeginTransactionKind>, transaction: Option<BeginTransactionKind>,
modifier: Option<TransactionModifier>, modifier: Option<TransactionModifier>,
/// List of statements belonging to the `BEGIN` block.
/// Example:
/// ```sql
/// BEGIN
/// SELECT 1;
/// SELECT 2;
/// END;
/// ```
statements: Vec<Statement>,
/// Statements of an exception clause.
/// Example:
/// ```sql
/// BEGIN
/// SELECT 1;
/// EXCEPTION WHEN ERROR THEN
/// SELECT 2;
/// SELECT 3;
/// END;
/// <https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend>
exception_statements: Option<Vec<Statement>>,
/// TRUE if the statement has an `END` keyword.
has_end_keyword: bool,
}, },
/// ```sql /// ```sql
/// SET TRANSACTION ... /// SET TRANSACTION ...
@ -4815,6 +4837,9 @@ impl fmt::Display for Statement {
begin: syntax_begin, begin: syntax_begin,
transaction, transaction,
modifier, modifier,
statements,
exception_statements,
has_end_keyword,
} => { } => {
if *syntax_begin { if *syntax_begin {
if let Some(modifier) = *modifier { if let Some(modifier) = *modifier {
@ -4831,6 +4856,24 @@ impl fmt::Display for Statement {
if !modes.is_empty() { if !modes.is_empty() {
write!(f, " {}", display_comma_separated(modes))?; write!(f, " {}", display_comma_separated(modes))?;
} }
if !statements.is_empty() {
write!(f, " {}", display_separated(statements, "; "))?;
// We manually insert semicolon for the last statement,
// since display_separated doesn't handle that case.
write!(f, ";")?;
}
if let Some(exception_statements) = exception_statements {
write!(f, " EXCEPTION WHEN ERROR THEN")?;
if !exception_statements.is_empty() {
write!(f, " {}", display_separated(exception_statements, "; "))?;
// We manually insert semicolon for the last statement,
// since display_separated doesn't handle that case.
write!(f, ";")?;
}
}
if *has_end_keyword {
write!(f, " END")?;
}
Ok(()) Ok(())
} }
Statement::SetTransaction { Statement::SetTransaction {

View file

@ -15,9 +15,10 @@
// specific language governing permissions and limitations // specific language governing permissions and limitations
// under the License. // under the License.
use crate::ast::Statement;
use crate::dialect::Dialect; use crate::dialect::Dialect;
use crate::keywords::Keyword; use crate::keywords::Keyword;
use crate::parser::Parser; use crate::parser::{Parser, ParserError};
/// These keywords are disallowed as column identifiers. Such that /// These keywords are disallowed as column identifiers. Such that
/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery. /// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
@ -44,7 +45,11 @@ const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
pub struct BigQueryDialect; pub struct BigQueryDialect;
impl Dialect for BigQueryDialect { impl Dialect for BigQueryDialect {
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
self.maybe_parse_statement(parser)
}
/// See <https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers>
fn is_delimited_identifier_start(&self, ch: char) -> bool { fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '`' ch == '`'
} }
@ -60,6 +65,9 @@ impl Dialect for BigQueryDialect {
fn is_identifier_start(&self, ch: char) -> bool { fn is_identifier_start(&self, ch: char) -> bool {
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
// BigQuery supports `@@foo.bar` variable syntax in its procedural language.
// https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend
|| ch == '@'
} }
fn is_identifier_part(&self, ch: char) -> bool { fn is_identifier_part(&self, ch: char) -> bool {
@ -129,3 +137,48 @@ impl Dialect for BigQueryDialect {
!RESERVED_FOR_COLUMN_ALIAS.contains(kw) !RESERVED_FOR_COLUMN_ALIAS.contains(kw)
} }
} }
impl BigQueryDialect {
fn maybe_parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
if parser.peek_keyword(Keyword::BEGIN) {
return Some(self.parse_begin(parser));
}
None
}
/// Parse a `BEGIN` statement.
/// <https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#beginexceptionend>
fn parse_begin(&self, parser: &mut Parser) -> Result<Statement, ParserError> {
parser.expect_keyword(Keyword::BEGIN)?;
let statements = parser.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?;
let has_exception_when_clause = parser.parse_keywords(&[
Keyword::EXCEPTION,
Keyword::WHEN,
Keyword::ERROR,
Keyword::THEN,
]);
let exception_statements = if has_exception_when_clause {
if !parser.peek_keyword(Keyword::END) {
Some(parser.parse_statement_list(&[Keyword::END])?)
} else {
Some(Default::default())
}
} else {
None
};
parser.expect_keyword(Keyword::END)?;
Ok(Statement::StartTransaction {
begin: true,
statements,
exception_statements,
has_end_keyword: true,
transaction: None,
modifier: None,
modes: Default::default(),
})
}
}

View file

@ -4273,6 +4273,27 @@ impl<'a> Parser<'a> {
self.parse_comma_separated(f) self.parse_comma_separated(f)
} }
/// Parses 0 or more statements, each followed by a semicolon.
/// If the next token is any of `terminal_keywords` then no more
/// statements will be parsed.
pub(crate) fn parse_statement_list(
&mut self,
terminal_keywords: &[Keyword],
) -> Result<Vec<Statement>, ParserError> {
let mut values = vec![];
loop {
if let Token::Word(w) = &self.peek_nth_token_ref(0).token {
if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) {
break;
}
}
values.push(self.parse_statement()?);
self.expect_token(&Token::SemiColon)?;
}
Ok(values)
}
/// Default implementation of a predicate that returns true if /// Default implementation of a predicate that returns true if
/// the specified keyword is reserved for column alias. /// the specified keyword is reserved for column alias.
/// See [Dialect::is_column_alias] /// See [Dialect::is_column_alias]
@ -13783,6 +13804,9 @@ impl<'a> Parser<'a> {
begin: false, begin: false,
transaction: Some(BeginTransactionKind::Transaction), transaction: Some(BeginTransactionKind::Transaction),
modifier: None, modifier: None,
statements: vec![],
exception_statements: None,
has_end_keyword: false,
}) })
} }
@ -13812,6 +13836,9 @@ impl<'a> Parser<'a> {
begin: true, begin: true,
transaction, transaction,
modifier, modifier,
statements: vec![],
exception_statements: None,
has_end_keyword: false,
}) })
} }

View file

@ -236,6 +236,52 @@ fn parse_big_query_non_reserved_column_alias() {
bigquery().verified_stmt(sql); bigquery().verified_stmt(sql);
} }
#[test]
fn parse_at_at_identifier() {
bigquery().verified_stmt("SELECT @@error.stack_trace, @@error.message");
}
#[test]
fn parse_begin() {
let sql = r#"BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2; END"#;
let Statement::StartTransaction {
statements,
exception_statements,
has_end_keyword,
..
} = bigquery().verified_stmt(sql)
else {
unreachable!();
};
assert_eq!(1, statements.len());
assert_eq!(1, exception_statements.unwrap().len());
assert!(has_end_keyword);
bigquery().verified_stmt(
"BEGIN SELECT 1; SELECT 2; EXCEPTION WHEN ERROR THEN SELECT 2; SELECT 4; END",
);
bigquery()
.verified_stmt("BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT @@error.stack_trace; END");
bigquery().verified_stmt("BEGIN EXCEPTION WHEN ERROR THEN SELECT 2; END");
bigquery().verified_stmt("BEGIN SELECT 1; SELECT 2; EXCEPTION WHEN ERROR THEN END");
bigquery().verified_stmt("BEGIN EXCEPTION WHEN ERROR THEN END");
bigquery().verified_stmt("BEGIN SELECT 1; SELECT 2; END");
bigquery().verified_stmt("BEGIN END");
assert_eq!(
bigquery()
.parse_sql_statements("BEGIN SELECT 1; SELECT 2 END")
.unwrap_err(),
ParserError::ParserError("Expected: ;, found: END".to_string())
);
assert_eq!(
bigquery()
.parse_sql_statements("BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2 END")
.unwrap_err(),
ParserError::ParserError("Expected: ;, found: END".to_string())
);
}
#[test] #[test]
fn parse_delete_statement() { fn parse_delete_statement() {
let sql = "DELETE \"table\" WHERE 1"; let sql = "DELETE \"table\" WHERE 1";

View file

@ -8343,7 +8343,12 @@ fn lateral_function() {
#[test] #[test]
fn parse_start_transaction() { fn parse_start_transaction() {
match verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") { let dialects = all_dialects_except(|d|
// BigQuery does not support this syntax
d.is::<BigQueryDialect>());
match dialects
.verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE")
{
Statement::StartTransaction { modes, .. } => assert_eq!( Statement::StartTransaction { modes, .. } => assert_eq!(
modes, modes,
vec![ vec![
@ -8357,7 +8362,7 @@ fn parse_start_transaction() {
// For historical reasons, PostgreSQL allows the commas between the modes to // For historical reasons, PostgreSQL allows the commas between the modes to
// be omitted. // be omitted.
match one_statement_parses_to( match dialects.one_statement_parses_to(
"START TRANSACTION READ ONLY READ WRITE ISOLATION LEVEL SERIALIZABLE", "START TRANSACTION READ ONLY READ WRITE ISOLATION LEVEL SERIALIZABLE",
"START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE", "START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE",
) { ) {
@ -8372,40 +8377,40 @@ fn parse_start_transaction() {
_ => unreachable!(), _ => unreachable!(),
} }
verified_stmt("START TRANSACTION"); dialects.verified_stmt("START TRANSACTION");
verified_stmt("BEGIN"); dialects.verified_stmt("BEGIN");
verified_stmt("BEGIN WORK"); dialects.verified_stmt("BEGIN WORK");
verified_stmt("BEGIN TRANSACTION"); dialects.verified_stmt("BEGIN TRANSACTION");
verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED");
verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED"); dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED");
verified_stmt("START TRANSACTION ISOLATION LEVEL REPEATABLE READ"); dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL REPEATABLE READ");
verified_stmt("START TRANSACTION ISOLATION LEVEL SERIALIZABLE"); dialects.verified_stmt("START TRANSACTION ISOLATION LEVEL SERIALIZABLE");
// Regression test for https://github.com/sqlparser-rs/sqlparser-rs/pull/139, // Regression test for https://github.com/sqlparser-rs/sqlparser-rs/pull/139,
// in which START TRANSACTION would fail to parse if followed by a statement // in which START TRANSACTION would fail to parse if followed by a statement
// terminator. // terminator.
assert_eq!( assert_eq!(
parse_sql_statements("START TRANSACTION; SELECT 1"), dialects.parse_sql_statements("START TRANSACTION; SELECT 1"),
Ok(vec![ Ok(vec![
verified_stmt("START TRANSACTION"), verified_stmt("START TRANSACTION"),
verified_stmt("SELECT 1"), verified_stmt("SELECT 1"),
]) ])
); );
let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); let res = dialects.parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD");
assert_eq!( assert_eq!(
ParserError::ParserError("Expected: isolation level, found: BAD".to_string()), ParserError::ParserError("Expected: isolation level, found: BAD".to_string()),
res.unwrap_err() res.unwrap_err()
); );
let res = parse_sql_statements("START TRANSACTION BAD"); let res = dialects.parse_sql_statements("START TRANSACTION BAD");
assert_eq!( assert_eq!(
ParserError::ParserError("Expected: end of statement, found: BAD".to_string()), ParserError::ParserError("Expected: end of statement, found: BAD".to_string()),
res.unwrap_err() res.unwrap_err()
); );
let res = parse_sql_statements("START TRANSACTION READ ONLY,"); let res = dialects.parse_sql_statements("START TRANSACTION READ ONLY,");
assert_eq!( assert_eq!(
ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()), ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()),
res.unwrap_err() res.unwrap_err()

View file

@ -518,23 +518,6 @@ fn parse_start_transaction_with_modifier() {
sqlite_and_generic().verified_stmt("BEGIN DEFERRED"); sqlite_and_generic().verified_stmt("BEGIN DEFERRED");
sqlite_and_generic().verified_stmt("BEGIN IMMEDIATE"); sqlite_and_generic().verified_stmt("BEGIN IMMEDIATE");
sqlite_and_generic().verified_stmt("BEGIN EXCLUSIVE"); sqlite_and_generic().verified_stmt("BEGIN EXCLUSIVE");
let unsupported_dialects = all_dialects_except(|d| d.supports_start_transaction_modifier());
let res = unsupported_dialects.parse_sql_statements("BEGIN DEFERRED");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: DEFERRED".to_string()),
res.unwrap_err(),
);
let res = unsupported_dialects.parse_sql_statements("BEGIN IMMEDIATE");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: IMMEDIATE".to_string()),
res.unwrap_err(),
);
let res = unsupported_dialects.parse_sql_statements("BEGIN EXCLUSIVE");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: EXCLUSIVE".to_string()),
res.unwrap_err(),
);
} }
#[test] #[test]