mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
BigQuery: Fix column identifier reserved keywords list (#1678)
This commit is contained in:
parent
269967a6ac
commit
7980c866a3
5 changed files with 120 additions and 34 deletions
|
@ -16,6 +16,28 @@
|
|||
// under the License.
|
||||
|
||||
use crate::dialect::Dialect;
|
||||
use crate::keywords::Keyword;
|
||||
use crate::parser::Parser;
|
||||
|
||||
/// These keywords are disallowed as column identifiers. Such that
|
||||
/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
|
||||
const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
|
||||
Keyword::WITH,
|
||||
Keyword::SELECT,
|
||||
Keyword::WHERE,
|
||||
Keyword::GROUP,
|
||||
Keyword::HAVING,
|
||||
Keyword::ORDER,
|
||||
Keyword::LATERAL,
|
||||
Keyword::LIMIT,
|
||||
Keyword::FETCH,
|
||||
Keyword::UNION,
|
||||
Keyword::EXCEPT,
|
||||
Keyword::INTERSECT,
|
||||
Keyword::FROM,
|
||||
Keyword::INTO,
|
||||
Keyword::END,
|
||||
];
|
||||
|
||||
/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/)
|
||||
#[derive(Debug, Default)]
|
||||
|
@ -92,4 +114,8 @@ impl Dialect for BigQueryDialect {
|
|||
fn supports_timestamp_versioning(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
|
||||
!RESERVED_FOR_COLUMN_ALIAS.contains(kw)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -804,7 +804,7 @@ pub trait Dialect: Debug + Any {
|
|||
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
|
||||
}
|
||||
|
||||
// Returns reserved keywords when looking to parse a [TableFactor].
|
||||
/// Returns reserved keywords when looking to parse a `TableFactor`.
|
||||
/// See [Self::supports_from_trailing_commas]
|
||||
fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
|
||||
keywords::RESERVED_FOR_TABLE_FACTOR
|
||||
|
@ -844,11 +844,17 @@ pub trait Dialect: Debug + Any {
|
|||
false
|
||||
}
|
||||
|
||||
/// Returns true if the specified keyword should be parsed as a column identifier.
|
||||
/// See [keywords::RESERVED_FOR_COLUMN_ALIAS]
|
||||
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
|
||||
!keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
|
||||
}
|
||||
|
||||
/// Returns true if the specified keyword should be parsed as a select item alias.
|
||||
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
|
||||
/// to enable looking ahead if needed.
|
||||
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
|
||||
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
|
||||
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
|
||||
explicit || self.is_column_alias(kw, parser)
|
||||
}
|
||||
|
||||
/// Returns true if the specified keyword should be parsed as a table factor alias.
|
||||
|
|
|
@ -3951,7 +3951,7 @@ impl<'a> Parser<'a> {
|
|||
self.parse_comma_separated_with_trailing_commas(
|
||||
|p| p.parse_select_item(),
|
||||
trailing_commas,
|
||||
None,
|
||||
Self::is_reserved_for_column_alias,
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -3985,30 +3985,42 @@ impl<'a> Parser<'a> {
|
|||
self.parse_comma_separated_with_trailing_commas(
|
||||
Parser::parse_table_and_joins,
|
||||
trailing_commas,
|
||||
Some(self.dialect.get_reserved_keywords_for_table_factor()),
|
||||
|kw, _parser| {
|
||||
self.dialect
|
||||
.get_reserved_keywords_for_table_factor()
|
||||
.contains(kw)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Parse the comma of a comma-separated syntax element.
|
||||
/// `R` is a predicate that should return true if the next
|
||||
/// keyword is a reserved keyword.
|
||||
/// Allows for control over trailing commas
|
||||
///
|
||||
/// Returns true if there is a next element
|
||||
fn is_parse_comma_separated_end_with_trailing_commas(
|
||||
fn is_parse_comma_separated_end_with_trailing_commas<R>(
|
||||
&mut self,
|
||||
trailing_commas: bool,
|
||||
reserved_keywords: Option<&[Keyword]>,
|
||||
) -> bool {
|
||||
let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS);
|
||||
is_reserved_keyword: &R,
|
||||
) -> bool
|
||||
where
|
||||
R: Fn(&Keyword, &mut Parser) -> bool,
|
||||
{
|
||||
if !self.consume_token(&Token::Comma) {
|
||||
true
|
||||
} else if trailing_commas {
|
||||
let token = self.peek_token().token;
|
||||
match token {
|
||||
Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true,
|
||||
let token = self.next_token().token;
|
||||
let is_end = match token {
|
||||
Token::Word(ref kw) if is_reserved_keyword(&kw.keyword, self) => true,
|
||||
Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => {
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
};
|
||||
self.prev_token();
|
||||
|
||||
is_end
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
@ -4017,7 +4029,10 @@ impl<'a> Parser<'a> {
|
|||
/// Parse the comma of a comma-separated syntax element.
|
||||
/// Returns true if there is a next element
|
||||
fn is_parse_comma_separated_end(&mut self) -> bool {
|
||||
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None)
|
||||
self.is_parse_comma_separated_end_with_trailing_commas(
|
||||
self.options.trailing_commas,
|
||||
&Self::is_reserved_for_column_alias,
|
||||
)
|
||||
}
|
||||
|
||||
/// Parse a comma-separated list of 1+ items accepted by `F`
|
||||
|
@ -4025,26 +4040,33 @@ impl<'a> Parser<'a> {
|
|||
where
|
||||
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
|
||||
{
|
||||
self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None)
|
||||
self.parse_comma_separated_with_trailing_commas(
|
||||
f,
|
||||
self.options.trailing_commas,
|
||||
Self::is_reserved_for_column_alias,
|
||||
)
|
||||
}
|
||||
|
||||
/// Parse a comma-separated list of 1+ items accepted by `F`
|
||||
/// Allows for control over trailing commas
|
||||
fn parse_comma_separated_with_trailing_commas<T, F>(
|
||||
/// Parse a comma-separated list of 1+ items accepted by `F`.
|
||||
/// `R` is a predicate that should return true if the next
|
||||
/// keyword is a reserved keyword.
|
||||
/// Allows for control over trailing commas.
|
||||
fn parse_comma_separated_with_trailing_commas<T, F, R>(
|
||||
&mut self,
|
||||
mut f: F,
|
||||
trailing_commas: bool,
|
||||
reserved_keywords: Option<&[Keyword]>,
|
||||
is_reserved_keyword: R,
|
||||
) -> Result<Vec<T>, ParserError>
|
||||
where
|
||||
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
|
||||
R: Fn(&Keyword, &mut Parser) -> bool,
|
||||
{
|
||||
let mut values = vec![];
|
||||
loop {
|
||||
values.push(f(self)?);
|
||||
if self.is_parse_comma_separated_end_with_trailing_commas(
|
||||
trailing_commas,
|
||||
reserved_keywords,
|
||||
&is_reserved_keyword,
|
||||
) {
|
||||
break;
|
||||
}
|
||||
|
@ -4118,6 +4140,13 @@ impl<'a> Parser<'a> {
|
|||
self.parse_comma_separated(f)
|
||||
}
|
||||
|
||||
/// Default implementation of a predicate that returns true if
|
||||
/// the specified keyword is reserved for column alias.
|
||||
/// See [Dialect::is_column_alias]
|
||||
fn is_reserved_for_column_alias(kw: &Keyword, parser: &mut Parser) -> bool {
|
||||
!parser.dialect.is_column_alias(kw, parser)
|
||||
}
|
||||
|
||||
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
|
||||
/// Returns `None` if `f` returns an error
|
||||
pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
|
||||
|
@ -9394,7 +9423,7 @@ impl<'a> Parser<'a> {
|
|||
let cols = self.parse_comma_separated_with_trailing_commas(
|
||||
Parser::parse_view_column,
|
||||
self.dialect.supports_column_definition_trailing_commas(),
|
||||
None,
|
||||
Self::is_reserved_for_column_alias,
|
||||
)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(cols)
|
||||
|
|
|
@ -213,6 +213,15 @@ fn parse_raw_literal() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_big_query_non_reserved_column_alias() {
|
||||
let sql = r#"SELECT OFFSET, EXPLAIN, ANALYZE, SORT, TOP, VIEW FROM T"#;
|
||||
bigquery().verified_stmt(sql);
|
||||
|
||||
let sql = r#"SELECT 1 AS OFFSET, 2 AS EXPLAIN, 3 AS ANALYZE FROM T"#;
|
||||
bigquery().verified_stmt(sql);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_delete_statement() {
|
||||
let sql = "DELETE \"table\" WHERE 1";
|
||||
|
|
|
@ -253,8 +253,13 @@ fn parse_insert_default_values() {
|
|||
|
||||
#[test]
|
||||
fn parse_insert_select_returning() {
|
||||
verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
|
||||
let stmt = verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
|
||||
// Dialects that support `RETURNING` as a column identifier do
|
||||
// not support this syntax.
|
||||
let dialects =
|
||||
all_dialects_where(|d| !d.is_column_alias(&Keyword::RETURNING, &mut Parser::new(d)));
|
||||
|
||||
dialects.verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
|
||||
let stmt = dialects.verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
|
||||
match stmt {
|
||||
Statement::Insert(Insert {
|
||||
returning: Some(ret),
|
||||
|
@ -6993,9 +6998,6 @@ fn parse_union_except_intersect_minus() {
|
|||
verified_stmt("SELECT 1 EXCEPT SELECT 2");
|
||||
verified_stmt("SELECT 1 EXCEPT ALL SELECT 2");
|
||||
verified_stmt("SELECT 1 EXCEPT DISTINCT SELECT 1");
|
||||
verified_stmt("SELECT 1 MINUS SELECT 2");
|
||||
verified_stmt("SELECT 1 MINUS ALL SELECT 2");
|
||||
verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
|
||||
verified_stmt("SELECT 1 INTERSECT SELECT 2");
|
||||
verified_stmt("SELECT 1 INTERSECT ALL SELECT 2");
|
||||
verified_stmt("SELECT 1 INTERSECT DISTINCT SELECT 1");
|
||||
|
@ -7014,6 +7016,13 @@ fn parse_union_except_intersect_minus() {
|
|||
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x");
|
||||
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x");
|
||||
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x");
|
||||
|
||||
// Dialects that support `MINUS` as column identifier
|
||||
// do not support `MINUS` as a set operator.
|
||||
let dialects = all_dialects_where(|d| !d.is_column_alias(&Keyword::MINUS, &mut Parser::new(d)));
|
||||
dialects.verified_stmt("SELECT 1 MINUS SELECT 2");
|
||||
dialects.verified_stmt("SELECT 1 MINUS ALL SELECT 2");
|
||||
dialects.verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -7690,19 +7699,26 @@ fn parse_invalid_subquery_without_parens() {
|
|||
|
||||
#[test]
|
||||
fn parse_offset() {
|
||||
// Dialects that support `OFFSET` as column identifiers
|
||||
// don't support this syntax.
|
||||
let dialects =
|
||||
all_dialects_where(|d| !d.is_column_alias(&Keyword::OFFSET, &mut Parser::new(d)));
|
||||
|
||||
let expect = Some(Offset {
|
||||
value: Expr::Value(number("2")),
|
||||
rows: OffsetRows::Rows,
|
||||
});
|
||||
let ast = verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
|
||||
let ast = dialects.verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
|
||||
assert_eq!(ast.offset, expect);
|
||||
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
|
||||
let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
|
||||
assert_eq!(ast.offset, expect);
|
||||
let ast = verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
|
||||
let ast = dialects.verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
|
||||
assert_eq!(ast.offset, expect);
|
||||
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
|
||||
let ast =
|
||||
dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
|
||||
assert_eq!(ast.offset, expect);
|
||||
let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
|
||||
let ast =
|
||||
dialects.verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
|
||||
assert_eq!(ast.offset, expect);
|
||||
match *ast.body {
|
||||
SetExpr::Select(s) => match only(s.from).relation {
|
||||
|
@ -7713,7 +7729,7 @@ fn parse_offset() {
|
|||
},
|
||||
_ => panic!("Test broke"),
|
||||
}
|
||||
let ast = verified_query("SELECT 'foo' OFFSET 0 ROWS");
|
||||
let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS");
|
||||
assert_eq!(
|
||||
ast.offset,
|
||||
Some(Offset {
|
||||
|
@ -7721,7 +7737,7 @@ fn parse_offset() {
|
|||
rows: OffsetRows::Rows,
|
||||
})
|
||||
);
|
||||
let ast = verified_query("SELECT 'foo' OFFSET 1 ROW");
|
||||
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW");
|
||||
assert_eq!(
|
||||
ast.offset,
|
||||
Some(Offset {
|
||||
|
@ -7729,7 +7745,7 @@ fn parse_offset() {
|
|||
rows: OffsetRows::Row,
|
||||
})
|
||||
);
|
||||
let ast = verified_query("SELECT 'foo' OFFSET 1");
|
||||
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1");
|
||||
assert_eq!(
|
||||
ast.offset,
|
||||
Some(Offset {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue