mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Add support for Snowflake column aliases that use SQL keywords (#1632)
This commit is contained in:
parent
474150006f
commit
b4b5576dd4
4 changed files with 163 additions and 56 deletions
|
@ -820,6 +820,20 @@ pub trait Dialect: Debug + Any {
|
|||
fn supports_set_stmt_without_operator(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true if the specified keyword should be parsed as a select item alias.
|
||||
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
|
||||
/// to enable looking ahead if needed.
|
||||
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
|
||||
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
|
||||
}
|
||||
|
||||
/// Returns true if the specified keyword should be parsed as a table factor alias.
|
||||
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
|
||||
/// to enable looking ahead if needed.
|
||||
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
|
||||
explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
|
||||
}
|
||||
}
|
||||
|
||||
/// This represents the operators for which precedence must be defined
|
||||
|
|
|
@ -251,6 +251,51 @@ impl Dialect for SnowflakeDialect {
|
|||
fn supports_partiql(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
|
||||
explicit
|
||||
|| match kw {
|
||||
// The following keywords can be considered an alias as long as
|
||||
// they are not followed by other tokens that may change their meaning
|
||||
// e.g. `SELECT * EXCEPT (col1) FROM tbl`
|
||||
Keyword::EXCEPT
|
||||
// e.g. `SELECT 1 LIMIT 5`
|
||||
| Keyword::LIMIT
|
||||
// e.g. `SELECT 1 OFFSET 5 ROWS`
|
||||
| Keyword::OFFSET
|
||||
// e.g. `INSERT INTO t SELECT 1 RETURNING *`
|
||||
| Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) =>
|
||||
{
|
||||
false
|
||||
}
|
||||
|
||||
// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
|
||||
// which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
|
||||
Keyword::FETCH
|
||||
if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
|
||||
{
|
||||
false
|
||||
}
|
||||
|
||||
// Reserved keywords by the Snowflake dialect, which seem to be less strictive
|
||||
// than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following
|
||||
// keywords were tested with the this statement: `SELECT 1 <KW>`.
|
||||
Keyword::FROM
|
||||
| Keyword::GROUP
|
||||
| Keyword::HAVING
|
||||
| Keyword::INTERSECT
|
||||
| Keyword::INTO
|
||||
| Keyword::MINUS
|
||||
| Keyword::ORDER
|
||||
| Keyword::SELECT
|
||||
| Keyword::UNION
|
||||
| Keyword::WHERE
|
||||
| Keyword::WITH => false,
|
||||
|
||||
// Any other word is considered an alias
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {
|
||||
|
|
|
@ -8838,38 +8838,76 @@ impl<'a> Parser<'a> {
|
|||
Ok(IdentWithAlias { ident, alias })
|
||||
}
|
||||
|
||||
/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
|
||||
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
|
||||
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
|
||||
/// Optionally parses an alias for a select list item
|
||||
fn maybe_parse_select_item_alias(&mut self) -> Result<Option<Ident>, ParserError> {
|
||||
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
|
||||
parser.dialect.is_select_item_alias(explicit, kw, parser)
|
||||
}
|
||||
self.parse_optional_alias_inner(None, validator)
|
||||
}
|
||||
|
||||
/// Optionally parses an alias for a table like in `... FROM generate_series(1, 10) AS t (col)`.
|
||||
/// In this case, the alias is allowed to optionally name the columns in the table, in
|
||||
/// addition to the table itself.
|
||||
pub fn maybe_parse_table_alias(&mut self) -> Result<Option<TableAlias>, ParserError> {
|
||||
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
|
||||
parser.dialect.is_table_factor_alias(explicit, kw, parser)
|
||||
}
|
||||
match self.parse_optional_alias_inner(None, validator)? {
|
||||
Some(name) => {
|
||||
let columns = self.parse_table_alias_column_defs()?;
|
||||
Ok(Some(TableAlias { name, columns }))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for parse_optional_alias_inner, left for backwards-compatibility
|
||||
/// but new flows should use the context-specific methods such as `maybe_parse_select_item_alias`
|
||||
/// and `maybe_parse_table_alias`.
|
||||
pub fn parse_optional_alias(
|
||||
&mut self,
|
||||
reserved_kwds: &[Keyword],
|
||||
) -> Result<Option<Ident>, ParserError> {
|
||||
fn validator(_explicit: bool, _kw: &Keyword, _parser: &mut Parser) -> bool {
|
||||
false
|
||||
}
|
||||
self.parse_optional_alias_inner(Some(reserved_kwds), validator)
|
||||
}
|
||||
|
||||
/// Parses an optional alias after a SQL element such as a select list item
|
||||
/// or a table name.
|
||||
///
|
||||
/// This method accepts an optional list of reserved keywords or a function
|
||||
/// to call to validate if a keyword should be parsed as an alias, to allow
|
||||
/// callers to customize the parsing logic based on their context.
|
||||
fn parse_optional_alias_inner<F>(
|
||||
&mut self,
|
||||
reserved_kwds: Option<&[Keyword]>,
|
||||
validator: F,
|
||||
) -> Result<Option<Ident>, ParserError>
|
||||
where
|
||||
F: Fn(bool, &Keyword, &mut Parser) -> bool,
|
||||
{
|
||||
let after_as = self.parse_keyword(Keyword::AS);
|
||||
|
||||
let next_token = self.next_token();
|
||||
match next_token.token {
|
||||
// Accept any identifier after `AS` (though many dialects have restrictions on
|
||||
// keywords that may appear here). If there's no `AS`: don't parse keywords,
|
||||
// which may start a construct allowed in this position, to be parsed as aliases.
|
||||
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
|
||||
// not an alias.)
|
||||
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
|
||||
// By default, if a word is located after the `AS` keyword we consider it an alias
|
||||
// as long as it's not reserved.
|
||||
Token::Word(w)
|
||||
if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) =>
|
||||
{
|
||||
Ok(Some(w.into_ident(next_token.span)))
|
||||
}
|
||||
// MSSQL supports single-quoted strings as aliases for columns
|
||||
// We accept them as table aliases too, although MSSQL does not.
|
||||
//
|
||||
// Note, that this conflicts with an obscure rule from the SQL
|
||||
// standard, which we don't implement:
|
||||
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
|
||||
// "[Obscure Rule] SQL allows you to break a long <character
|
||||
// string literal> up into two or more smaller <character string
|
||||
// literal>s, split by a <separator> that includes a newline
|
||||
// character. When it sees such a <literal>, your DBMS will
|
||||
// ignore the <separator> and treat the multiple strings as
|
||||
// a single <literal>."
|
||||
// This pattern allows for customizing the acceptance of words as aliases based on the caller's
|
||||
// context, such as to what SQL element this word is a potential alias of (select item alias, table name
|
||||
// alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords.
|
||||
Token::Word(w) if validator(after_as, &w.keyword, self) => {
|
||||
Ok(Some(w.into_ident(next_token.span)))
|
||||
}
|
||||
// For backwards-compatibility, we accept quoted strings as aliases regardless of the context.
|
||||
Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
|
||||
// Support for MySql dialect double-quoted string, `AS "HOUR"` for example
|
||||
Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))),
|
||||
_ => {
|
||||
if after_as {
|
||||
|
@ -8881,23 +8919,6 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse `AS identifier` when the AS is describing a table-valued object,
|
||||
/// like in `... FROM generate_series(1, 10) AS t (col)`. In this case
|
||||
/// the alias is allowed to optionally name the columns in the table, in
|
||||
/// addition to the table itself.
|
||||
pub fn parse_optional_table_alias(
|
||||
&mut self,
|
||||
reserved_kwds: &[Keyword],
|
||||
) -> Result<Option<TableAlias>, ParserError> {
|
||||
match self.parse_optional_alias(reserved_kwds)? {
|
||||
Some(name) => {
|
||||
let columns = self.parse_table_alias_column_defs()?;
|
||||
Ok(Some(TableAlias { name, columns }))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_optional_group_by(&mut self) -> Result<Option<GroupByExpr>, ParserError> {
|
||||
if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
|
||||
let expressions = if self.parse_keyword(Keyword::ALL) {
|
||||
|
@ -10899,7 +10920,7 @@ impl<'a> Parser<'a> {
|
|||
let name = self.parse_object_name(false)?;
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let args = self.parse_optional_args()?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::Function {
|
||||
lateral: true,
|
||||
name,
|
||||
|
@ -10912,7 +10933,7 @@ impl<'a> Parser<'a> {
|
|||
self.expect_token(&Token::LParen)?;
|
||||
let expr = self.parse_expr()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::TableFunction { expr, alias })
|
||||
} else if self.consume_token(&Token::LParen) {
|
||||
// A left paren introduces either a derived table (i.e., a subquery)
|
||||
|
@ -10961,7 +10982,7 @@ impl<'a> Parser<'a> {
|
|||
#[allow(clippy::if_same_then_else)]
|
||||
if !table_and_joins.joins.is_empty() {
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::NestedJoin {
|
||||
table_with_joins: Box::new(table_and_joins),
|
||||
alias,
|
||||
|
@ -10974,7 +10995,7 @@ impl<'a> Parser<'a> {
|
|||
// (B): `table_and_joins` (what we found inside the parentheses)
|
||||
// is a nested join `(foo JOIN bar)`, not followed by other joins.
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::NestedJoin {
|
||||
table_with_joins: Box::new(table_and_joins),
|
||||
alias,
|
||||
|
@ -10988,9 +11009,7 @@ impl<'a> Parser<'a> {
|
|||
// [AS alias])`) as well.
|
||||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
if let Some(outer_alias) =
|
||||
self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?
|
||||
{
|
||||
if let Some(outer_alias) = self.maybe_parse_table_alias()? {
|
||||
// Snowflake also allows specifying an alias *after* parens
|
||||
// e.g. `FROM (mytable) AS alias`
|
||||
match &mut table_and_joins.relation {
|
||||
|
@ -11043,7 +11062,7 @@ impl<'a> Parser<'a> {
|
|||
// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2)
|
||||
// where there are no parentheses around the VALUES clause.
|
||||
let values = SetExpr::Values(self.parse_values(false)?);
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::Derived {
|
||||
lateral: false,
|
||||
subquery: Box::new(Query {
|
||||
|
@ -11069,7 +11088,7 @@ impl<'a> Parser<'a> {
|
|||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
|
||||
let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) {
|
||||
let alias = match self.maybe_parse_table_alias() {
|
||||
Ok(Some(alias)) => Some(alias),
|
||||
Ok(None) => None,
|
||||
Err(e) => return Err(e),
|
||||
|
@ -11106,7 +11125,7 @@ impl<'a> Parser<'a> {
|
|||
let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::JsonTable {
|
||||
json_expr,
|
||||
json_path,
|
||||
|
@ -11151,7 +11170,7 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
|
||||
// MSSQL-specific table hints:
|
||||
let mut with_hints = vec![];
|
||||
|
@ -11329,7 +11348,7 @@ impl<'a> Parser<'a> {
|
|||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::OpenJsonTable {
|
||||
json_expr,
|
||||
json_path,
|
||||
|
@ -11428,7 +11447,7 @@ impl<'a> Parser<'a> {
|
|||
|
||||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
|
||||
Ok(TableFactor::MatchRecognize {
|
||||
table: Box::new(table),
|
||||
|
@ -11672,7 +11691,7 @@ impl<'a> Parser<'a> {
|
|||
) -> Result<TableFactor, ParserError> {
|
||||
let subquery = self.parse_query()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::Derived {
|
||||
lateral: match lateral {
|
||||
Lateral => true,
|
||||
|
@ -11766,7 +11785,7 @@ impl<'a> Parser<'a> {
|
|||
};
|
||||
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::Pivot {
|
||||
table: Box::new(table),
|
||||
aggregate_functions,
|
||||
|
@ -11788,7 +11807,7 @@ impl<'a> Parser<'a> {
|
|||
self.expect_keyword_is(Keyword::IN)?;
|
||||
let columns = self.parse_parenthesized_column_list(Mandatory, false)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::Unpivot {
|
||||
table: Box::new(table),
|
||||
value,
|
||||
|
@ -12614,7 +12633,7 @@ impl<'a> Parser<'a> {
|
|||
})
|
||||
}
|
||||
expr => self
|
||||
.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)
|
||||
.maybe_parse_select_item_alias()
|
||||
.map(|alias| match alias {
|
||||
Some(alias) => SelectItem::ExprWithAlias { expr, alias },
|
||||
None => SelectItem::UnnamedExpr(expr),
|
||||
|
|
|
@ -3022,3 +3022,32 @@ fn parse_ls_and_rm() {
|
|||
|
||||
snowflake().verified_stmt(r#"LIST @"STAGE_WITH_QUOTES""#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_keywords_as_select_item_aliases() {
|
||||
// Some keywords that should be parsed as an alias
|
||||
let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"];
|
||||
for kw in unreserved_kws {
|
||||
snowflake()
|
||||
.one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}"));
|
||||
}
|
||||
|
||||
// Some keywords that should not be parsed as an alias
|
||||
let reserved_kws = vec![
|
||||
"FROM",
|
||||
"GROUP",
|
||||
"HAVING",
|
||||
"INTERSECT",
|
||||
"INTO",
|
||||
"ORDER",
|
||||
"SELECT",
|
||||
"UNION",
|
||||
"WHERE",
|
||||
"WITH",
|
||||
];
|
||||
for kw in reserved_kws {
|
||||
assert!(snowflake()
|
||||
.parse_sql_statements(&format!("SELECT 1 {kw}"))
|
||||
.is_err());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue