From 03efcf6fa6ac2745ae27f6188e93863ae3ea9808 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 9 Jun 2019 05:39:07 +0300 Subject: [PATCH] Add parse_comma_separated to simplify the parser To use the new helper effectively, a few related changes were required: - Each of the parse_..._list functions (`parse_cte_list`, `parse_order_by_expr_list`, `parse_select_list`) was replaced with a version that parses a single element of the list (e.g. `parse_cte`), with their callers now using `self.parse_comma_separated(Parser::parse_)?` - `parse_with_options` now parses the WITH keyword and a separate `parse_sql_option` function (named after the struct it produces) was added to parse a single k=v option. - `parse_list_of_ids` is gone, with the '.'-separated parsing moved to `parse_object_name`. Custom comma-separated parsing is still used in: - parse_transaction_modes (where the comma separator is optional) - parse_columns (allows optional trailing comma, before the closing ')') --- src/parser.rs | 246 +++++++++++++++++++++----------------------------- 1 file changed, 101 insertions(+), 145 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index b30184ca..a0338664 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -290,7 +290,7 @@ impl Parser { vec![] }; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { - self.parse_order_by_expr_list()? + self.parse_comma_separated(Parser::parse_order_by_expr)? } else { vec![] }; @@ -829,6 +829,21 @@ impl Parser { } } + /// Parse a comma-separated list of 1+ items accepted by `F` + pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.consume_token(&Token::Comma) { + break; + } + } + Ok(values) + } + /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { @@ -872,11 +887,7 @@ impl Parser { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = if self.parse_keyword("WITH") { - self.parse_with_options()? - } else { - vec![] - }; + let with_options = self.parse_with_options()?; self.expect_keyword("AS")?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -897,14 +908,10 @@ impl Parser { } else { return self.expected("TABLE or VIEW after DROP", self.peek_token()); }; + // Many dialects support the non standard `IF EXISTS` clause and allow + // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]); - let mut names = vec![]; - loop { - names.push(self.parse_object_name()?); - if !self.consume_token(&Token::Comma) { - break; - } - } + let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword("CASCADE"); let restrict = self.parse_keyword("RESTRICT"); if cascade && restrict { @@ -922,12 +929,7 @@ impl Parser { let table_name = self.parse_object_name()?; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; - - let with_options = if self.parse_keyword("WITH") { - self.parse_with_options()? - } else { - vec![] - }; + let with_options = self.parse_with_options()?; Ok(Statement::CreateTable { name: table_name, @@ -1075,19 +1077,21 @@ impl Parser { } pub fn parse_with_options(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let mut options = vec![]; - loop { - let name = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_value()?; - options.push(SqlOption { name, value }); - if !self.consume_token(&Token::Comma) { - break; - } + if self.parse_keyword("WITH") { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) } - self.expect_token(&Token::RParen)?; - Ok(options) + } + + pub fn parse_sql_option(&mut self) -> Result { + let name = self.parse_identifier()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_value()?; + Ok(SqlOption { name, value }) } pub fn parse_alter(&mut self) -> Result { @@ -1333,22 +1337,17 @@ impl Parser { } } - /// Parse one or more identifiers with the specified separator between them - pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result, ParserError> { - let mut idents = vec![]; - loop { - idents.push(self.parse_identifier()?); - if !self.consume_token(separator) { - break; - } - } - Ok(idents) - } - /// Parse a possibly qualified, possibly quoted identifier, e.g. /// `foo` or `myschema."table"` pub fn parse_object_name(&mut self) -> Result { - Ok(ObjectName(self.parse_list_of_ids(&Token::Period)?)) + let mut idents = vec![]; + loop { + idents.push(self.parse_identifier()?); + if !self.consume_token(&Token::Period) { + break; + } + } + Ok(ObjectName(idents)) } /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) @@ -1365,7 +1364,7 @@ impl Parser { optional: IsOptional, ) -> Result, ParserError> { if self.consume_token(&Token::LParen) { - let cols = self.parse_list_of_ids(&Token::Comma)?; + let cols = self.parse_comma_separated(Parser::parse_identifier)?; self.expect_token(&Token::RParen)?; Ok(cols) } else if optional == Optional { @@ -1424,7 +1423,7 @@ impl Parser { pub fn parse_query(&mut self) -> Result { let ctes = if self.parse_keyword("WITH") { // TODO: optional RECURSIVE - self.parse_cte_list()? + self.parse_comma_separated(Parser::parse_cte)? } else { vec![] }; @@ -1432,7 +1431,7 @@ impl Parser { let body = self.parse_query_body(0)?; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { - self.parse_order_by_expr_list()? + self.parse_comma_separated(Parser::parse_order_by_expr)? } else { vec![] }; @@ -1465,27 +1464,17 @@ impl Parser { }) } - /// Parse one or more (comma-separated) `alias AS (subquery)` CTEs, - /// assuming the initial `WITH` was already consumed. - fn parse_cte_list(&mut self) -> Result, ParserError> { - let mut cte = vec![]; - loop { - let alias = TableAlias { - name: self.parse_identifier()?, - columns: self.parse_parenthesized_column_list(Optional)?, - }; - self.expect_keyword("AS")?; - self.expect_token(&Token::LParen)?; - cte.push(Cte { - alias, - query: self.parse_query()?, - }); - self.expect_token(&Token::RParen)?; - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(cte) + /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + fn parse_cte(&mut self) -> Result { + let alias = TableAlias { + name: self.parse_identifier()?, + columns: self.parse_parenthesized_column_list(Optional)?, + }; + self.expect_keyword("AS")?; + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Cte { alias, query }) } /// Parse a "query body", which is an expression with roughly the @@ -1559,22 +1548,18 @@ impl Parser { if all && distinct { return parser_err!("Cannot specify both ALL and DISTINCT in SELECT"); } - let projection = self.parse_select_list()?; + let projection = self.parse_comma_separated(Parser::parse_select_item)?; // Note that for keywords to be properly handled here, they need to be // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, // otherwise they may be parsed as an alias as part of the `projection` // or `from`. - let mut from = vec![]; - if self.parse_keyword("FROM") { - loop { - from.push(self.parse_table_and_joins()?); - if !self.consume_token(&Token::Comma) { - break; - } - } - } + let from = if self.parse_keyword("FROM") { + self.parse_comma_separated(Parser::parse_table_and_joins)? + } else { + vec![] + }; let selection = if self.parse_keyword("WHERE") { Some(self.parse_expr()?) @@ -1812,16 +1797,7 @@ impl Parser { pub fn parse_update(&mut self) -> Result { let table_name = self.parse_object_name()?; self.expect_keyword("SET")?; - let mut assignments = vec![]; - loop { - let id = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - assignments.push(Assignment { id, value }); - if !self.consume_token(&Token::Comma) { - break; - } - } + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; let selection = if self.parse_keyword("WHERE") { Some(self.parse_expr()?) } else { @@ -1834,16 +1810,17 @@ impl Parser { }) } + /// Parse a `var = expr` assignment, used in an UPDATE statement + pub fn parse_assignment(&mut self) -> Result { + let id = self.parse_identifier()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + Ok(Assignment { id, value }) + } + /// Parse a comma-delimited list of SQL expressions pub fn parse_expr_list(&mut self) -> Result, ParserError> { - let mut expr_list: Vec = vec![]; - loop { - expr_list.push(self.parse_expr()?); - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(expr_list) + Ok(self.parse_comma_separated(Parser::parse_expr)?) } pub fn parse_optional_args(&mut self) -> Result, ParserError> { @@ -1857,52 +1834,34 @@ impl Parser { } /// Parse a comma-delimited list of projections after SELECT - pub fn parse_select_list(&mut self) -> Result, ParserError> { - let mut projections: Vec = vec![]; - loop { - let expr = self.parse_expr()?; - if let Expr::Wildcard = expr { - projections.push(SelectItem::Wildcard); - } else if let Expr::QualifiedWildcard(prefix) = expr { - projections.push(SelectItem::QualifiedWildcard(ObjectName(prefix))); + pub fn parse_select_item(&mut self) -> Result { + let expr = self.parse_expr()?; + if let Expr::Wildcard = expr { + Ok(SelectItem::Wildcard) + } else if let Expr::QualifiedWildcard(prefix) = expr { + Ok(SelectItem::QualifiedWildcard(ObjectName(prefix))) + } else { + // `expr` is a regular SQL expression and can be followed by an alias + if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? { + Ok(SelectItem::ExprWithAlias { expr, alias }) } else { - // `expr` is a regular SQL expression and can be followed by an alias - if let Some(alias) = - self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? - { - projections.push(SelectItem::ExprWithAlias { expr, alias }); - } else { - projections.push(SelectItem::UnnamedExpr(expr)); - } - } - - if !self.consume_token(&Token::Comma) { - break; + Ok(SelectItem::UnnamedExpr(expr)) } } - Ok(projections) } - /// Parse a comma-delimited list of ORDER BY expressions - pub fn parse_order_by_expr_list(&mut self) -> Result, ParserError> { - let mut expr_list: Vec = vec![]; - loop { - let expr = self.parse_expr()?; + /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + pub fn parse_order_by_expr(&mut self) -> Result { + let expr = self.parse_expr()?; - let asc = if self.parse_keyword("ASC") { - Some(true) - } else if self.parse_keyword("DESC") { - Some(false) - } else { - None - }; - - expr_list.push(OrderByExpr { expr, asc }); - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(expr_list) + let asc = if self.parse_keyword("ASC") { + Some(true) + } else if self.parse_keyword("DESC") { + Some(false) + } else { + None + }; + Ok(OrderByExpr { expr, asc }) } /// Parse a LIMIT clause @@ -1950,15 +1909,12 @@ impl Parser { } pub fn parse_values(&mut self) -> Result { - let mut values = vec![]; - loop { - self.expect_token(&Token::LParen)?; - values.push(self.parse_expr_list()?); - self.expect_token(&Token::RParen)?; - if !self.consume_token(&Token::Comma) { - break; - } - } + let values = self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let e = parser.parse_expr_list()?; + parser.expect_token(&Token::RParen)?; + Ok(e) + })?; Ok(Values(values)) }