diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index b3dd1e27..d51e0cf2 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -716,8 +716,7 @@ pub const ALL_KEYWORDS: &'static [&'static str] = &[ /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ // Reserved as both a table and a column alias: - WITH, SELECT, WHERE, GROUP, ORDER, - // TODO add these with tests: UNION, EXCEPT, INTERSECT, + WITH, SELECT, WHERE, GROUP, ORDER, UNION, EXCEPT, INTERSECT, // Reserved only as a table alias in the `FROM`/`JOIN` clauses: ON, JOIN, INNER, CROSS, FULL, LEFT, RIGHT, NATURAL, USING, ]; @@ -726,8 +725,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_COLUMN_ALIAS: &'static [&'static str] = &[ // Reserved as both a table and a column alias: - WITH, SELECT, WHERE, GROUP, ORDER, - // TODO add these with tests: UNION, EXCEPT, INTERSECT, + WITH, SELECT, WHERE, GROUP, ORDER, UNION, EXCEPT, INTERSECT, // Reserved only as a column alias in the `SELECT` clause: FROM, ]; diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 85dc9f92..0981eb89 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -22,7 +22,7 @@ mod value; pub use self::query::{ Cte, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, SQLSelectItem, - TableFactor, + SQLSetExpr, SQLSetOperator, TableFactor, }; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 9ccf5602..69577e55 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -7,7 +7,7 @@ pub struct SQLQuery { /// WITH (common table expressions, or CTEs) pub ctes: Vec, /// SELECT or UNION / EXCEPT / INTECEPT - pub body: SQLSelect, + pub body: SQLSetExpr, /// ORDER BY pub order_by: Option>, /// LIMIT @@ -45,6 +45,66 @@ impl ToString for SQLQuery { } } +/// A node in a tree, representing a "query body" expression, roughly: +/// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` +#[derive(Debug, Clone, PartialEq)] +pub enum SQLSetExpr { + /// Restricted SELECT .. FROM .. HAVING (no ORDER BY or set operations) + Select(SQLSelect), + /// Parenthesized SELECT subquery, which may include more set operations + /// in its body and an optional ORDER BY / LIMIT. + Query(Box), + /// UNION/EXCEPT/INTERSECT of two queries + SetOperation { + op: SQLSetOperator, + all: bool, + left: Box, + right: Box, + }, + // TODO: ANSI SQL supports `TABLE` and `VALUES` here. +} + +impl ToString for SQLSetExpr { + fn to_string(&self) -> String { + match self { + SQLSetExpr::Select(s) => s.to_string(), + SQLSetExpr::Query(q) => format!("({})", q.to_string()), + SQLSetExpr::SetOperation { + left, + right, + op, + all, + } => { + let all_str = if *all { " ALL" } else { "" }; + format!( + "{} {}{} {}", + left.to_string(), + op.to_string(), + all_str, + right.to_string() + ) + } + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum SQLSetOperator { + Union, + Except, + Intersect, +} + +impl ToString for SQLSetOperator { + fn to_string(&self) -> String { + match self { + SQLSetOperator::Union => "UNION".to_string(), + SQLSetOperator::Except => "EXCEPT".to_string(), + SQLSetOperator::Intersect => "INTERSECT".to_string(), + } + } +} + /// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may /// appear either as the only body item of an `SQLQuery`, or as an operand /// to a set operation like `UNION`. diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 7fb45c21..263e6e99 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -60,6 +60,7 @@ impl Parser { let mut parser = Parser::new(tokens); let mut stmts = Vec::new(); let mut expecting_statement_delimiter = false; + debug!("Parsing sql '{}'...", sql); loop { // ignore empty statements (between successive statement delimiters) while parser.consume_token(&Token::SemiColon) { @@ -1208,8 +1209,7 @@ impl Parser { vec![] }; - self.expect_keyword("SELECT")?; - let body = self.parse_select()?; + let body = self.parse_query_body(0)?; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { Some(self.parse_order_by_expr_list()?) @@ -1252,6 +1252,64 @@ impl Parser { return Ok(cte); } + /// Parse a "query body", which is an expression with roughly the + /// following grammar: + /// ```text + /// query_body ::= restricted_select | '(' subquery ')' | set_operation + /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] + /// subquery ::= query_body [ order_by_limit ] + /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body + /// ``` + fn parse_query_body(&mut self, precedence: u8) -> Result { + // We parse the expression using a Pratt parser, as in `parse_expr()`. + // Start by parsing a restricted SELECT or a `(subquery)`: + let mut expr = if self.parse_keyword("SELECT") { + SQLSetExpr::Select(self.parse_select()?) + } else if self.consume_token(&Token::LParen) { + // CTEs are not allowed here, but the parser currently accepts them + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + SQLSetExpr::Query(Box::new(subquery)) + } else { + parser_err!("Expected SELECT or a subquery in the query body!")? + }; + + loop { + // The query can be optionally followed by a set operator: + let next_token = self.peek_token(); + let op = self.parse_set_operator(&next_token); + let next_precedence = match op { + // UNION and EXCEPT have the same binding power and evaluate left-to-right + Some(SQLSetOperator::Union) | Some(SQLSetOperator::Except) => 10, + // INTERSECT has higher precedence than UNION/EXCEPT + Some(SQLSetOperator::Intersect) => 20, + // Unexpected token or EOF => stop parsing the query body + None => break, + }; + if precedence >= next_precedence { + break; + } + self.next_token(); // skip past the set operator + expr = SQLSetExpr::SetOperation { + left: Box::new(expr), + op: op.unwrap(), + all: self.parse_keyword("ALL"), + right: Box::new(self.parse_query_body(next_precedence)?), + }; + } + + Ok(expr) + } + + fn parse_set_operator(&mut self, token: &Option) -> Option { + match token { + Some(Token::SQLWord(w)) if w.keyword == "UNION" => Some(SQLSetOperator::Union), + Some(Token::SQLWord(w)) if w.keyword == "EXCEPT" => Some(SQLSetOperator::Except), + Some(Token::SQLWord(w)) if w.keyword == "INTERSECT" => Some(SQLSetOperator::Intersect), + _ => None, + } + } + /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index 871046b1..73054fb7 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -12,7 +12,7 @@ fn parse_simple_select() { assert_eq!(1, ast.len()); match ast.first().unwrap() { SQLStatement::SQLSelect(SQLQuery { - body: SQLSelect { projection, .. }, + body: SQLSetExpr::Select(SQLSelect { projection, .. }), .. }) => { assert_eq!(3, projection.len()); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index caeeb4fb..e9be58ea 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -825,6 +825,24 @@ fn parse_derived_tables() { //TODO: add assertions } +#[test] +fn parse_union() { + // TODO: add assertions + verified_stmt("SELECT 1 UNION SELECT 2"); + verified_stmt("SELECT 1 UNION ALL SELECT 2"); + verified_stmt("SELECT 1 EXCEPT SELECT 2"); + verified_stmt("SELECT 1 EXCEPT ALL SELECT 2"); + verified_stmt("SELECT 1 INTERSECT SELECT 2"); + verified_stmt("SELECT 1 INTERSECT ALL SELECT 2"); + verified_stmt("SELECT 1 UNION SELECT 2 UNION SELECT 3"); + verified_stmt("SELECT 1 EXCEPT SELECT 2 UNION SELECT 3"); // Union[Except[1,2], 3] + verified_stmt("SELECT 1 INTERSECT (SELECT 2 EXCEPT SELECT 3)"); + verified_stmt("WITH cte AS (SELECT 1 AS foo) (SELECT foo FROM cte ORDER BY 1 LIMIT 1)"); + verified_stmt("SELECT 1 UNION (SELECT 2 ORDER BY 1 LIMIT 1)"); + verified_stmt("SELECT 1 UNION SELECT 2 INTERSECT SELECT 3"); // Union[1, Intersect[2,3]] + verified_stmt("SELECT foo FROM tab UNION SELECT bar FROM TAB"); +} + #[test] fn parse_multiple_statements() { fn test_with(sql1: &str, sql2_kw: &str, sql2_rest: &str) { @@ -920,7 +938,10 @@ fn expr_from_projection(item: &SQLSelectItem) -> &ASTNode { } fn verified_only_select(query: &str) -> SQLSelect { - verified_query(query).body + match verified_query(query).body { + SQLSetExpr::Select(s) => s, + _ => panic!("Expected SQLSetExpr::Select"), + } } fn verified_stmt(query: &str) -> SQLStatement {