mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-31 19:27:21 +00:00
Add parse_comma_separated to simplify the parser
To use the new helper effectively, a few related changes were required: - Each of the parse_..._list functions (`parse_cte_list`, `parse_order_by_expr_list`, `parse_select_list`) was replaced with a version that parses a single element of the list (e.g. `parse_cte`), with their callers now using `self.parse_comma_separated(Parser::parse_<one_element>)?` - `parse_with_options` now parses the WITH keyword and a separate `parse_sql_option` function (named after the struct it produces) was added to parse a single k=v option. - `parse_list_of_ids` is gone, with the '.'-separated parsing moved to `parse_object_name`. Custom comma-separated parsing is still used in: - parse_transaction_modes (where the comma separator is optional) - parse_columns (allows optional trailing comma, before the closing ')')
This commit is contained in:
parent
f11d74a64d
commit
03efcf6fa6
1 changed files with 101 additions and 145 deletions
246
src/parser.rs
246
src/parser.rs
|
@ -290,7 +290,7 @@ impl Parser {
|
||||||
vec![]
|
vec![]
|
||||||
};
|
};
|
||||||
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
|
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
|
||||||
self.parse_order_by_expr_list()?
|
self.parse_comma_separated(Parser::parse_order_by_expr)?
|
||||||
} else {
|
} else {
|
||||||
vec![]
|
vec![]
|
||||||
};
|
};
|
||||||
|
@ -829,6 +829,21 @@ impl Parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a comma-separated list of 1+ items accepted by `F`
|
||||||
|
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
|
||||||
|
where
|
||||||
|
F: FnMut(&mut Parser) -> Result<T, ParserError>,
|
||||||
|
{
|
||||||
|
let mut values = vec![];
|
||||||
|
loop {
|
||||||
|
values.push(f(self)?);
|
||||||
|
if !self.consume_token(&Token::Comma) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a SQL CREATE statement
|
/// Parse a SQL CREATE statement
|
||||||
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
|
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
|
||||||
if self.parse_keyword("TABLE") {
|
if self.parse_keyword("TABLE") {
|
||||||
|
@ -872,11 +887,7 @@ impl Parser {
|
||||||
// ANSI SQL and Postgres support RECURSIVE here, but we don't support it either.
|
// ANSI SQL and Postgres support RECURSIVE here, but we don't support it either.
|
||||||
let name = self.parse_object_name()?;
|
let name = self.parse_object_name()?;
|
||||||
let columns = self.parse_parenthesized_column_list(Optional)?;
|
let columns = self.parse_parenthesized_column_list(Optional)?;
|
||||||
let with_options = if self.parse_keyword("WITH") {
|
let with_options = self.parse_with_options()?;
|
||||||
self.parse_with_options()?
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
self.expect_keyword("AS")?;
|
self.expect_keyword("AS")?;
|
||||||
let query = Box::new(self.parse_query()?);
|
let query = Box::new(self.parse_query()?);
|
||||||
// Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here.
|
// Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here.
|
||||||
|
@ -897,14 +908,10 @@ impl Parser {
|
||||||
} else {
|
} else {
|
||||||
return self.expected("TABLE or VIEW after DROP", self.peek_token());
|
return self.expected("TABLE or VIEW after DROP", self.peek_token());
|
||||||
};
|
};
|
||||||
|
// Many dialects support the non standard `IF EXISTS` clause and allow
|
||||||
|
// specifying multiple objects to delete in a single statement
|
||||||
let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]);
|
let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]);
|
||||||
let mut names = vec![];
|
let names = self.parse_comma_separated(Parser::parse_object_name)?;
|
||||||
loop {
|
|
||||||
names.push(self.parse_object_name()?);
|
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let cascade = self.parse_keyword("CASCADE");
|
let cascade = self.parse_keyword("CASCADE");
|
||||||
let restrict = self.parse_keyword("RESTRICT");
|
let restrict = self.parse_keyword("RESTRICT");
|
||||||
if cascade && restrict {
|
if cascade && restrict {
|
||||||
|
@ -922,12 +929,7 @@ impl Parser {
|
||||||
let table_name = self.parse_object_name()?;
|
let table_name = self.parse_object_name()?;
|
||||||
// parse optional column list (schema)
|
// parse optional column list (schema)
|
||||||
let (columns, constraints) = self.parse_columns()?;
|
let (columns, constraints) = self.parse_columns()?;
|
||||||
|
let with_options = self.parse_with_options()?;
|
||||||
let with_options = if self.parse_keyword("WITH") {
|
|
||||||
self.parse_with_options()?
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Statement::CreateTable {
|
Ok(Statement::CreateTable {
|
||||||
name: table_name,
|
name: table_name,
|
||||||
|
@ -1075,19 +1077,21 @@ impl Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_with_options(&mut self) -> Result<Vec<SqlOption>, ParserError> {
|
pub fn parse_with_options(&mut self) -> Result<Vec<SqlOption>, ParserError> {
|
||||||
self.expect_token(&Token::LParen)?;
|
if self.parse_keyword("WITH") {
|
||||||
let mut options = vec![];
|
self.expect_token(&Token::LParen)?;
|
||||||
loop {
|
let options = self.parse_comma_separated(Parser::parse_sql_option)?;
|
||||||
let name = self.parse_identifier()?;
|
self.expect_token(&Token::RParen)?;
|
||||||
self.expect_token(&Token::Eq)?;
|
Ok(options)
|
||||||
let value = self.parse_value()?;
|
} else {
|
||||||
options.push(SqlOption { name, value });
|
Ok(vec![])
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
self.expect_token(&Token::RParen)?;
|
}
|
||||||
Ok(options)
|
|
||||||
|
pub fn parse_sql_option(&mut self) -> Result<SqlOption, ParserError> {
|
||||||
|
let name = self.parse_identifier()?;
|
||||||
|
self.expect_token(&Token::Eq)?;
|
||||||
|
let value = self.parse_value()?;
|
||||||
|
Ok(SqlOption { name, value })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_alter(&mut self) -> Result<Statement, ParserError> {
|
pub fn parse_alter(&mut self) -> Result<Statement, ParserError> {
|
||||||
|
@ -1333,22 +1337,17 @@ impl Parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse one or more identifiers with the specified separator between them
|
|
||||||
pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result<Vec<Ident>, ParserError> {
|
|
||||||
let mut idents = vec![];
|
|
||||||
loop {
|
|
||||||
idents.push(self.parse_identifier()?);
|
|
||||||
if !self.consume_token(separator) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(idents)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse a possibly qualified, possibly quoted identifier, e.g.
|
/// Parse a possibly qualified, possibly quoted identifier, e.g.
|
||||||
/// `foo` or `myschema."table"`
|
/// `foo` or `myschema."table"`
|
||||||
pub fn parse_object_name(&mut self) -> Result<ObjectName, ParserError> {
|
pub fn parse_object_name(&mut self) -> Result<ObjectName, ParserError> {
|
||||||
Ok(ObjectName(self.parse_list_of_ids(&Token::Period)?))
|
let mut idents = vec![];
|
||||||
|
loop {
|
||||||
|
idents.push(self.parse_identifier()?);
|
||||||
|
if !self.consume_token(&Token::Period) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(ObjectName(idents))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
|
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
|
||||||
|
@ -1365,7 +1364,7 @@ impl Parser {
|
||||||
optional: IsOptional,
|
optional: IsOptional,
|
||||||
) -> Result<Vec<Ident>, ParserError> {
|
) -> Result<Vec<Ident>, ParserError> {
|
||||||
if self.consume_token(&Token::LParen) {
|
if self.consume_token(&Token::LParen) {
|
||||||
let cols = self.parse_list_of_ids(&Token::Comma)?;
|
let cols = self.parse_comma_separated(Parser::parse_identifier)?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok(cols)
|
Ok(cols)
|
||||||
} else if optional == Optional {
|
} else if optional == Optional {
|
||||||
|
@ -1424,7 +1423,7 @@ impl Parser {
|
||||||
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
|
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
|
||||||
let ctes = if self.parse_keyword("WITH") {
|
let ctes = if self.parse_keyword("WITH") {
|
||||||
// TODO: optional RECURSIVE
|
// TODO: optional RECURSIVE
|
||||||
self.parse_cte_list()?
|
self.parse_comma_separated(Parser::parse_cte)?
|
||||||
} else {
|
} else {
|
||||||
vec![]
|
vec![]
|
||||||
};
|
};
|
||||||
|
@ -1432,7 +1431,7 @@ impl Parser {
|
||||||
let body = self.parse_query_body(0)?;
|
let body = self.parse_query_body(0)?;
|
||||||
|
|
||||||
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
|
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
|
||||||
self.parse_order_by_expr_list()?
|
self.parse_comma_separated(Parser::parse_order_by_expr)?
|
||||||
} else {
|
} else {
|
||||||
vec![]
|
vec![]
|
||||||
};
|
};
|
||||||
|
@ -1465,27 +1464,17 @@ impl Parser {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse one or more (comma-separated) `alias AS (subquery)` CTEs,
|
/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
|
||||||
/// assuming the initial `WITH` was already consumed.
|
fn parse_cte(&mut self) -> Result<Cte, ParserError> {
|
||||||
fn parse_cte_list(&mut self) -> Result<Vec<Cte>, ParserError> {
|
let alias = TableAlias {
|
||||||
let mut cte = vec![];
|
name: self.parse_identifier()?,
|
||||||
loop {
|
columns: self.parse_parenthesized_column_list(Optional)?,
|
||||||
let alias = TableAlias {
|
};
|
||||||
name: self.parse_identifier()?,
|
self.expect_keyword("AS")?;
|
||||||
columns: self.parse_parenthesized_column_list(Optional)?,
|
self.expect_token(&Token::LParen)?;
|
||||||
};
|
let query = self.parse_query()?;
|
||||||
self.expect_keyword("AS")?;
|
self.expect_token(&Token::RParen)?;
|
||||||
self.expect_token(&Token::LParen)?;
|
Ok(Cte { alias, query })
|
||||||
cte.push(Cte {
|
|
||||||
alias,
|
|
||||||
query: self.parse_query()?,
|
|
||||||
});
|
|
||||||
self.expect_token(&Token::RParen)?;
|
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(cte)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a "query body", which is an expression with roughly the
|
/// Parse a "query body", which is an expression with roughly the
|
||||||
|
@ -1559,22 +1548,18 @@ impl Parser {
|
||||||
if all && distinct {
|
if all && distinct {
|
||||||
return parser_err!("Cannot specify both ALL and DISTINCT in SELECT");
|
return parser_err!("Cannot specify both ALL and DISTINCT in SELECT");
|
||||||
}
|
}
|
||||||
let projection = self.parse_select_list()?;
|
let projection = self.parse_comma_separated(Parser::parse_select_item)?;
|
||||||
|
|
||||||
// Note that for keywords to be properly handled here, they need to be
|
// Note that for keywords to be properly handled here, they need to be
|
||||||
// added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`,
|
// added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`,
|
||||||
// otherwise they may be parsed as an alias as part of the `projection`
|
// otherwise they may be parsed as an alias as part of the `projection`
|
||||||
// or `from`.
|
// or `from`.
|
||||||
|
|
||||||
let mut from = vec![];
|
let from = if self.parse_keyword("FROM") {
|
||||||
if self.parse_keyword("FROM") {
|
self.parse_comma_separated(Parser::parse_table_and_joins)?
|
||||||
loop {
|
} else {
|
||||||
from.push(self.parse_table_and_joins()?);
|
vec![]
|
||||||
if !self.consume_token(&Token::Comma) {
|
};
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let selection = if self.parse_keyword("WHERE") {
|
let selection = if self.parse_keyword("WHERE") {
|
||||||
Some(self.parse_expr()?)
|
Some(self.parse_expr()?)
|
||||||
|
@ -1812,16 +1797,7 @@ impl Parser {
|
||||||
pub fn parse_update(&mut self) -> Result<Statement, ParserError> {
|
pub fn parse_update(&mut self) -> Result<Statement, ParserError> {
|
||||||
let table_name = self.parse_object_name()?;
|
let table_name = self.parse_object_name()?;
|
||||||
self.expect_keyword("SET")?;
|
self.expect_keyword("SET")?;
|
||||||
let mut assignments = vec![];
|
let assignments = self.parse_comma_separated(Parser::parse_assignment)?;
|
||||||
loop {
|
|
||||||
let id = self.parse_identifier()?;
|
|
||||||
self.expect_token(&Token::Eq)?;
|
|
||||||
let value = self.parse_expr()?;
|
|
||||||
assignments.push(Assignment { id, value });
|
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let selection = if self.parse_keyword("WHERE") {
|
let selection = if self.parse_keyword("WHERE") {
|
||||||
Some(self.parse_expr()?)
|
Some(self.parse_expr()?)
|
||||||
} else {
|
} else {
|
||||||
|
@ -1834,16 +1810,17 @@ impl Parser {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a `var = expr` assignment, used in an UPDATE statement
|
||||||
|
pub fn parse_assignment(&mut self) -> Result<Assignment, ParserError> {
|
||||||
|
let id = self.parse_identifier()?;
|
||||||
|
self.expect_token(&Token::Eq)?;
|
||||||
|
let value = self.parse_expr()?;
|
||||||
|
Ok(Assignment { id, value })
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a comma-delimited list of SQL expressions
|
/// Parse a comma-delimited list of SQL expressions
|
||||||
pub fn parse_expr_list(&mut self) -> Result<Vec<Expr>, ParserError> {
|
pub fn parse_expr_list(&mut self) -> Result<Vec<Expr>, ParserError> {
|
||||||
let mut expr_list: Vec<Expr> = vec![];
|
Ok(self.parse_comma_separated(Parser::parse_expr)?)
|
||||||
loop {
|
|
||||||
expr_list.push(self.parse_expr()?);
|
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(expr_list)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_optional_args(&mut self) -> Result<Vec<Expr>, ParserError> {
|
pub fn parse_optional_args(&mut self) -> Result<Vec<Expr>, ParserError> {
|
||||||
|
@ -1857,52 +1834,34 @@ impl Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a comma-delimited list of projections after SELECT
|
/// Parse a comma-delimited list of projections after SELECT
|
||||||
pub fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParserError> {
|
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
|
||||||
let mut projections: Vec<SelectItem> = vec![];
|
let expr = self.parse_expr()?;
|
||||||
loop {
|
if let Expr::Wildcard = expr {
|
||||||
let expr = self.parse_expr()?;
|
Ok(SelectItem::Wildcard)
|
||||||
if let Expr::Wildcard = expr {
|
} else if let Expr::QualifiedWildcard(prefix) = expr {
|
||||||
projections.push(SelectItem::Wildcard);
|
Ok(SelectItem::QualifiedWildcard(ObjectName(prefix)))
|
||||||
} else if let Expr::QualifiedWildcard(prefix) = expr {
|
} else {
|
||||||
projections.push(SelectItem::QualifiedWildcard(ObjectName(prefix)));
|
// `expr` is a regular SQL expression and can be followed by an alias
|
||||||
|
if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? {
|
||||||
|
Ok(SelectItem::ExprWithAlias { expr, alias })
|
||||||
} else {
|
} else {
|
||||||
// `expr` is a regular SQL expression and can be followed by an alias
|
Ok(SelectItem::UnnamedExpr(expr))
|
||||||
if let Some(alias) =
|
|
||||||
self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)?
|
|
||||||
{
|
|
||||||
projections.push(SelectItem::ExprWithAlias { expr, alias });
|
|
||||||
} else {
|
|
||||||
projections.push(SelectItem::UnnamedExpr(expr));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(projections)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a comma-delimited list of ORDER BY expressions
|
/// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
|
||||||
pub fn parse_order_by_expr_list(&mut self) -> Result<Vec<OrderByExpr>, ParserError> {
|
pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
|
||||||
let mut expr_list: Vec<OrderByExpr> = vec![];
|
let expr = self.parse_expr()?;
|
||||||
loop {
|
|
||||||
let expr = self.parse_expr()?;
|
|
||||||
|
|
||||||
let asc = if self.parse_keyword("ASC") {
|
let asc = if self.parse_keyword("ASC") {
|
||||||
Some(true)
|
Some(true)
|
||||||
} else if self.parse_keyword("DESC") {
|
} else if self.parse_keyword("DESC") {
|
||||||
Some(false)
|
Some(false)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
Ok(OrderByExpr { expr, asc })
|
||||||
expr_list.push(OrderByExpr { expr, asc });
|
|
||||||
if !self.consume_token(&Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(expr_list)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a LIMIT clause
|
/// Parse a LIMIT clause
|
||||||
|
@ -1950,15 +1909,12 @@ impl Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_values(&mut self) -> Result<Values, ParserError> {
|
pub fn parse_values(&mut self) -> Result<Values, ParserError> {
|
||||||
let mut values = vec![];
|
let values = self.parse_comma_separated(|parser| {
|
||||||
loop {
|
parser.expect_token(&Token::LParen)?;
|
||||||
self.expect_token(&Token::LParen)?;
|
let e = parser.parse_expr_list()?;
|
||||||
values.push(self.parse_expr_list()?);
|
parser.expect_token(&Token::RParen)?;
|
||||||
self.expect_token(&Token::RParen)?;
|
Ok(e)
|
||||||
if !self.consume_token(&Token::Comma) {
|
})?;
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(Values(values))
|
Ok(Values(values))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue