Add support for trailing commas (#810)

* Add support for trailing commas

* Support trailing commas for brace/bracket

* Andrew's comments
This commit is contained in:
Ankur Goyal 2023-03-02 07:35:46 -08:00 committed by GitHub
parent 2285bb44ba
commit b45306819c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 23 deletions

View file

@ -195,12 +195,20 @@ impl std::error::Error for ParserError {}
// By default, allow expressions up to this deep before erroring
const DEFAULT_REMAINING_DEPTH: usize = 50;
#[derive(Default)]
pub struct ParserOptions {
pub trailing_commas: bool,
}
pub struct Parser<'a> {
tokens: Vec<TokenWithLocation>,
/// The index of the first unprocessed token in `self.tokens`
index: usize,
/// The current dialect to use
dialect: &'a dyn Dialect,
/// Additional options that allow you to mix & match behavior otherwise
/// constrained to certain dialects (e.g. trailing commas)
options: ParserOptions,
/// ensure the stack does not overflow by limiting recusion depth
recursion_counter: RecursionCounter,
}
@ -227,6 +235,7 @@ impl<'a> Parser<'a> {
index: 0,
dialect,
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
options: ParserOptions::default(),
}
}
@ -255,6 +264,31 @@ impl<'a> Parser<'a> {
self
}
/// Specify additional parser options
///
///
/// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to
/// mix & match behavior otherwise constrained to certain dialects (e.g. trailing
/// commas).
///
/// Example:
/// ```
/// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect};
/// # fn main() -> Result<(), ParserError> {
/// let dialect = GenericDialect{};
/// let result = Parser::new(&dialect)
/// .with_options(ParserOptions { trailing_commas: true })
/// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")?
/// .parse_statements();
/// assert!(matches!(result, Ok(_)));
/// # Ok(())
/// # }
/// ```
pub fn with_options(mut self, options: ParserOptions) -> Self {
self.options = options;
self
}
/// Reset this parser to parse the specified token stream
pub fn with_tokens_with_locations(mut self, tokens: Vec<TokenWithLocation>) -> Self {
self.tokens = tokens;
@ -2196,29 +2230,19 @@ impl<'a> Parser<'a> {
/// Parse a comma-separated list of 1+ SelectItem
pub fn parse_projection(&mut self) -> Result<Vec<SelectItem>, ParserError> {
let mut values = vec![];
loop {
values.push(self.parse_select_item()?);
if !self.consume_token(&Token::Comma) {
break;
} else if dialect_of!(self is BigQueryDialect) {
// BigQuery allows trailing commas.
// e.g. `SELECT 1, 2, FROM t`
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas
match self.peek_token().token {
Token::Word(kw)
if keywords::RESERVED_FOR_COLUMN_ALIAS
.iter()
.any(|d| kw.keyword == *d) =>
{
break;
}
Token::RParen | Token::EOF => break,
_ => continue,
}
}
}
Ok(values)
// BigQuery allows trailing commas, but only in project lists
// e.g. `SELECT 1, 2, FROM t`
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas
//
// This pattern could be captured better with RAII type semantics, but it's quite a bit of
// code to add for just one case, so we'll just do it manually here.
let old_value = self.options.trailing_commas;
self.options.trailing_commas |= dialect_of!(self is BigQueryDialect);
let ret = self.parse_comma_separated(|p| p.parse_select_item());
self.options.trailing_commas = old_value;
ret
}
/// Parse a comma-separated list of 1+ items accepted by `F`
@ -2231,6 +2255,22 @@ impl<'a> Parser<'a> {
values.push(f(self)?);
if !self.consume_token(&Token::Comma) {
break;
} else if self.options.trailing_commas {
match self.peek_token().token {
Token::Word(kw)
if keywords::RESERVED_FOR_COLUMN_ALIAS
.iter()
.any(|d| kw.keyword == *d) =>
{
break;
}
Token::RParen
| Token::SemiColon
| Token::EOF
| Token::RBracket
| Token::RBrace => break,
_ => continue,
}
}
}
Ok(values)

View file

@ -181,6 +181,8 @@ fn parse_join_constraint_unnest_alias() {
fn parse_trailing_comma() {
for (sql, canonical) in [
("SELECT a,", "SELECT a"),
("SELECT 1,", "SELECT 1"),
("SELECT 1,2,", "SELECT 1, 2"),
("SELECT a, b,", "SELECT a, b"),
("SELECT a, b AS c,", "SELECT a, b AS c"),
("SELECT a, b AS c, FROM t", "SELECT a, b AS c FROM t"),