// Copyright 2018 Grove Enterprises LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //! SQL Parser use log::debug; use super::dialect::keywords; use super::dialect::Dialect; use super::sqlast::*; use super::sqltokenizer::*; #[derive(Debug, Clone, PartialEq)] pub enum ParserError { TokenizerError(String), ParserError(String), } // Use `Parser::expected` instead, if possible macro_rules! parser_err { ($MSG:expr) => { Err(ParserError::ParserError($MSG.to_string())) }; } #[derive(PartialEq)] pub enum IsOptional { Optional, Mandatory, } use IsOptional::*; impl From for ParserError { fn from(e: TokenizerError) -> Self { ParserError::TokenizerError(format!("{:?}", e)) } } /// SQL Parser pub struct Parser { tokens: Vec, index: usize, } impl Parser { /// Parse the specified tokens pub fn new(tokens: Vec) -> Self { Parser { tokens, index: 0 } } /// Parse a SQL statement and produce an Abstract Syntax Tree (AST) pub fn parse_sql(dialect: &dyn Dialect, sql: String) -> Result, ParserError> { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize()?; let mut parser = Parser::new(tokens); let mut stmts = Vec::new(); let mut expecting_statement_delimiter = false; debug!("Parsing sql '{}'...", sql); loop { // ignore empty statements (between successive statement delimiters) while parser.consume_token(&Token::SemiColon) { expecting_statement_delimiter = false; } if parser.peek_token().is_none() { break; } else if expecting_statement_delimiter { return parser.expected("end of statement", parser.peek_token()); } let statement = parser.parse_statement()?; stmts.push(statement); expecting_statement_delimiter = true; } Ok(stmts) } /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { match self.next_token() { Some(t) => match t { Token::SQLWord(ref w) if w.keyword != "" => match w.keyword.as_ref() { "SELECT" | "WITH" => { self.prev_token(); Ok(SQLStatement::SQLQuery(Box::new(self.parse_query()?))) } "CREATE" => Ok(self.parse_create()?), "DROP" => Ok(self.parse_drop()?), "DELETE" => Ok(self.parse_delete()?), "INSERT" => Ok(self.parse_insert()?), "ALTER" => Ok(self.parse_alter()?), "COPY" => Ok(self.parse_copy()?), _ => parser_err!(format!( "Unexpected keyword {:?} at the beginning of a statement", w.to_string() )), }, unexpected => self.expected( "a keyword at the beginning of a statement", Some(unexpected), ), }, None => self.expected("SQL statement", None), } } /// Parse a new expression pub fn parse_expr(&mut self) -> Result { self.parse_subexpr(0) } /// Parse tokens until the precedence changes pub fn parse_subexpr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; debug!("prefix: {:?}", expr); loop { let next_precedence = self.get_next_precedence()?; debug!("next precedence: {:?}", next_precedence); if precedence >= next_precedence { break; } expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) } /// Parse expression for DEFAULT clause in CREATE TABLE pub fn parse_default_expr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; debug!("prefix: {:?}", expr); loop { // stop parsing on `NULL` | `NOT NULL` match self.peek_token() { Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break, _ => {} } let next_precedence = self.get_next_precedence()?; debug!("next precedence: {:?}", next_precedence); if precedence >= next_precedence { break; } expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) } /// Parse an expression prefix pub fn parse_prefix(&mut self) -> Result { let tok = self .next_token() .ok_or_else(|| ParserError::ParserError("Unexpected EOF".to_string()))?; let expr = match tok { Token::SQLWord(w) => match w.keyword.as_ref() { "TRUE" | "FALSE" | "NULL" => { self.prev_token(); self.parse_sql_value() } "CASE" => self.parse_case_expression(), "CAST" => self.parse_cast_expression(), "NOT" => { let p = self.get_precedence(&Token::make_keyword("NOT"))?; Ok(ASTNode::SQLUnary { operator: SQLOperator::Not, expr: Box::new(self.parse_subexpr(p)?), }) } // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token() { Some(Token::LParen) | Some(Token::Period) => { let mut id_parts: Vec = vec![w.as_sql_ident()]; let mut ends_with_wildcard = false; while self.consume_token(&Token::Period) { match self.next_token() { Some(Token::SQLWord(w)) => id_parts.push(w.as_sql_ident()), Some(Token::Mult) => { ends_with_wildcard = true; break; } unexpected => { return self .expected("an identifier or a '*' after '.'", unexpected); } } } if ends_with_wildcard { Ok(ASTNode::SQLQualifiedWildcard(id_parts)) } else if self.consume_token(&Token::LParen) { self.prev_token(); self.parse_function(SQLObjectName(id_parts)) } else { Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } } _ => Ok(ASTNode::SQLIdentifier(w.as_sql_ident())), }, }, // End of Token::SQLWord Token::Mult => Ok(ASTNode::SQLWildcard), tok @ Token::Minus | tok @ Token::Plus => { let p = self.get_precedence(&tok)?; let operator = if tok == Token::Plus { SQLOperator::Plus } else { SQLOperator::Minus }; Ok(ASTNode::SQLUnary { operator, expr: Box::new(self.parse_subexpr(p)?), }) } Token::Number(_) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) => { self.prev_token(); self.parse_sql_value() } Token::LParen => { let expr = if self.parse_keyword("SELECT") || self.parse_keyword("WITH") { self.prev_token(); ASTNode::SQLSubquery(Box::new(self.parse_query()?)) } else { ASTNode::SQLNested(Box::new(self.parse_expr()?)) }; self.expect_token(&Token::RParen)?; Ok(expr) } unexpected => self.expected("an expression", Some(unexpected)), }?; if self.parse_keyword("COLLATE") { Ok(ASTNode::SQLCollate { expr: Box::new(expr), collation: self.parse_object_name()?, }) } else { Ok(expr) } } pub fn parse_function(&mut self, name: SQLObjectName) -> Result { self.expect_token(&Token::LParen)?; let args = self.parse_optional_args()?; let over = if self.parse_keyword("OVER") { // TBD: support window names (`OVER mywin`) in place of inline specification self.expect_token(&Token::LParen)?; let partition_by = if self.parse_keywords(vec!["PARTITION", "BY"]) { // a list of possibly-qualified column names self.parse_expr_list()? } else { vec![] }; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { self.parse_order_by_expr_list()? } else { vec![] }; let window_frame = self.parse_window_frame()?; Some(SQLWindowSpec { partition_by, order_by, window_frame, }) } else { None }; Ok(ASTNode::SQLFunction { name, args, over }) } pub fn parse_window_frame(&mut self) -> Result, ParserError> { let window_frame = match self.peek_token() { Some(Token::SQLWord(w)) => { let units = w.keyword.parse::()?; self.next_token(); if self.parse_keyword("BETWEEN") { let start_bound = self.parse_window_frame_bound()?; self.expect_keyword("AND")?; let end_bound = Some(self.parse_window_frame_bound()?); Some(SQLWindowFrame { units, start_bound, end_bound, }) } else { let start_bound = self.parse_window_frame_bound()?; let end_bound = None; Some(SQLWindowFrame { units, start_bound, end_bound, }) } } Some(Token::RParen) => None, unexpected => return self.expected("'ROWS', 'RANGE', 'GROUPS', or ')'", unexpected), }; self.expect_token(&Token::RParen)?; Ok(window_frame) } /// "CURRENT ROW" | ( ( | "UNBOUNDED") ("PRECEDING" | FOLLOWING) ) pub fn parse_window_frame_bound(&mut self) -> Result { if self.parse_keywords(vec!["CURRENT", "ROW"]) { Ok(SQLWindowFrameBound::CurrentRow) } else { let rows = if self.parse_keyword("UNBOUNDED") { None } else { let rows = self.parse_literal_int()?; if rows < 0 { parser_err!(format!( "The number of rows must be non-negative, got {}", rows ))?; } Some(rows as u64) }; if self.parse_keyword("PRECEDING") { Ok(SQLWindowFrameBound::Preceding(rows)) } else if self.parse_keyword("FOLLOWING") { Ok(SQLWindowFrameBound::Following(rows)) } else { self.expected("PRECEDING or FOLLOWING", self.peek_token()) } } } pub fn parse_case_expression(&mut self) -> Result { let mut operand = None; if !self.parse_keyword("WHEN") { operand = Some(Box::new(self.parse_expr()?)); self.expect_keyword("WHEN")?; } let mut conditions = vec![]; let mut results = vec![]; loop { conditions.push(self.parse_expr()?); self.expect_keyword("THEN")?; results.push(self.parse_expr()?); if !self.parse_keyword("WHEN") { break; } } let else_result = if self.parse_keyword("ELSE") { Some(Box::new(self.parse_expr()?)) } else { None }; self.expect_keyword("END")?; Ok(ASTNode::SQLCase { operand, conditions, results, else_result, }) } /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expression(&mut self) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword("AS")?; let data_type = self.parse_data_type()?; self.expect_token(&Token::RParen)?; Ok(ASTNode::SQLCast { expr: Box::new(expr), data_type, }) } /// Parse an operator following an expression pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result { debug!("parsing infix"); let tok = self.next_token().unwrap(); // safe as EOF's precedence is the lowest let regular_binary_operator = match tok { Token::Eq => Some(SQLOperator::Eq), Token::Neq => Some(SQLOperator::NotEq), Token::Gt => Some(SQLOperator::Gt), Token::GtEq => Some(SQLOperator::GtEq), Token::Lt => Some(SQLOperator::Lt), Token::LtEq => Some(SQLOperator::LtEq), Token::Plus => Some(SQLOperator::Plus), Token::Minus => Some(SQLOperator::Minus), Token::Mult => Some(SQLOperator::Multiply), Token::Mod => Some(SQLOperator::Modulus), Token::Div => Some(SQLOperator::Divide), Token::SQLWord(ref k) => match k.keyword.as_ref() { "AND" => Some(SQLOperator::And), "OR" => Some(SQLOperator::Or), "LIKE" => Some(SQLOperator::Like), "NOT" => { if self.parse_keyword("LIKE") { Some(SQLOperator::NotLike) } else { None } } _ => None, }, _ => None, }; if let Some(op) = regular_binary_operator { Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op, right: Box::new(self.parse_subexpr(precedence)?), }) } else if let Token::SQLWord(ref k) = tok { match k.keyword.as_ref() { "IS" => { if self.parse_keyword("NULL") { Ok(ASTNode::SQLIsNull(Box::new(expr))) } else if self.parse_keywords(vec!["NOT", "NULL"]) { Ok(ASTNode::SQLIsNotNull(Box::new(expr))) } else { self.expected("NULL or NOT NULL after IS", self.peek_token()) } } "NOT" | "IN" | "BETWEEN" => { self.prev_token(); let negated = self.parse_keyword("NOT"); if self.parse_keyword("IN") { self.parse_in(expr, negated) } else if self.parse_keyword("BETWEEN") { self.parse_between(expr, negated) } else { self.expected("IN or BETWEEN after NOT", self.peek_token()) } } // Can only happen if `get_precedence` got out of sync with this function _ => panic!("No infix parser for token {:?}", tok), } } else if Token::DoubleColon == tok { self.parse_pg_cast(expr) } else { // Can only happen if `get_precedence` got out of sync with this function panic!("No infix parser for token {:?}", tok) } } /// Parses the parens following the `[ NOT ] IN` operator pub fn parse_in(&mut self, expr: ASTNode, negated: bool) -> Result { self.expect_token(&Token::LParen)?; let in_op = if self.parse_keyword("SELECT") || self.parse_keyword("WITH") { self.prev_token(); ASTNode::SQLInSubquery { expr: Box::new(expr), subquery: Box::new(self.parse_query()?), negated, } } else { ASTNode::SQLInList { expr: Box::new(expr), list: self.parse_expr_list()?, negated, } }; self.expect_token(&Token::RParen)?; Ok(in_op) } /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed pub fn parse_between(&mut self, expr: ASTNode, negated: bool) -> Result { let low = self.parse_prefix()?; self.expect_keyword("AND")?; let high = self.parse_prefix()?; Ok(ASTNode::SQLBetween { expr: Box::new(expr), negated, low: Box::new(low), high: Box::new(high), }) } /// Parse a postgresql casting style which is in the form of `expr::datatype` pub fn parse_pg_cast(&mut self, expr: ASTNode) -> Result { Ok(ASTNode::SQLCast { expr: Box::new(expr), data_type: self.parse_data_type()?, }) } /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { if let Some(token) = self.peek_token() { self.get_precedence(&token) } else { Ok(0) } } /// Get the precedence of a token pub fn get_precedence(&self, tok: &Token) -> Result { debug!("get_precedence() {:?}", tok); match tok { Token::SQLWord(k) if k.keyword == "OR" => Ok(5), Token::SQLWord(k) if k.keyword == "AND" => Ok(10), Token::SQLWord(k) if k.keyword == "NOT" => Ok(15), Token::SQLWord(k) if k.keyword == "IS" => Ok(17), Token::SQLWord(k) if k.keyword == "IN" => Ok(20), Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(20), Token::SQLWord(k) if k.keyword == "LIKE" => Ok(20), Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20), Token::Plus | Token::Minus => Ok(30), Token::Mult | Token::Div | Token::Mod => Ok(40), Token::DoubleColon => Ok(50), _ => Ok(0), } } /// Return first non-whitespace token that has not yet been processed pub fn peek_token(&self) -> Option { if let Some(n) = self.til_non_whitespace() { self.token_at(n) } else { None } } /// Get the next token skipping whitespace and increment the token index pub fn next_token(&mut self) -> Option { loop { match self.next_token_no_skip() { Some(Token::Whitespace(_)) => { continue; } token => { return token; } } } } /// get the index for non whitepsace token fn til_non_whitespace(&self) -> Option { let mut index = self.index; loop { match self.token_at(index) { Some(Token::Whitespace(_)) => { index += 1; } Some(_) => { return Some(index); } None => { return None; } } } } /// see the token at this index fn token_at(&self, n: usize) -> Option { if let Some(token) = self.tokens.get(n) { Some(token.clone()) } else { None } } pub fn next_token_no_skip(&mut self) -> Option { if self.index < self.tokens.len() { self.index += 1; Some(self.tokens[self.index - 1].clone()) } else { None } } /// Push back the last one non-whitespace token pub fn prev_token(&mut self) -> Option { // TODO: returned value is unused (available via peek_token) loop { match self.prev_token_no_skip() { Some(Token::Whitespace(_)) => { continue; } token => { return token; } } } } /// Get the previous token and decrement the token index fn prev_token_no_skip(&mut self) -> Option { if self.index > 0 { self.index -= 1; Some(self.tokens[self.index].clone()) } else { None } } /// Report unexpected token fn expected(&self, expected: &str, found: Option) -> Result { parser_err!(format!( "Expected {}, found: {}", expected, found.map_or("EOF".to_string(), |t| t.to_string()) )) } /// Look for an expected keyword and consume it if it exists #[must_use] pub fn parse_keyword(&mut self, expected: &'static str) -> bool { // Ideally, we'd accept a enum variant, not a string, but since // it's not trivial to maintain the enum without duplicating all // the keywords three times, we'll settle for a run-time check that // the string actually represents a known keyword... assert!(keywords::ALL_KEYWORDS.contains(&expected)); match self.peek_token() { Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => { self.next_token(); true } _ => false, } } /// Look for an expected sequence of keywords and consume them if they exist #[must_use] pub fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool { let index = self.index; for keyword in keywords { if !self.parse_keyword(&keyword) { //println!("parse_keywords aborting .. did not find {}", keyword); // reset index and return immediately self.index = index; return false; } } true } /// Bail out if the current token is not an expected keyword, or consume it if it is pub fn expect_keyword(&mut self, expected: &'static str) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) } else { self.expected(expected, self.peek_token()) } } /// Consume the next token if it matches the expected token, otherwise return false #[must_use] pub fn consume_token(&mut self, expected: &Token) -> bool { match self.peek_token() { Some(ref t) => { if *t == *expected { self.next_token(); true } else { false } } _ => false, } } /// Bail out if the current token is not an expected keyword, or consume it if it is pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> { if self.consume_token(expected) { Ok(()) } else { self.expected(&expected.to_string(), self.peek_token()) } } /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { self.parse_create_table() } else if self.parse_keyword("MATERIALIZED") || self.parse_keyword("VIEW") { self.prev_token(); self.parse_create_view() } else if self.parse_keyword("EXTERNAL") { self.parse_create_external_table() } else { self.expected("TABLE or VIEW after CREATE", self.peek_token()) } } pub fn parse_create_external_table(&mut self) -> Result { self.expect_keyword("TABLE")?; let table_name = self.parse_object_name()?; let columns = self.parse_columns()?; self.expect_keyword("STORED")?; self.expect_keyword("AS")?; let file_format = self.parse_identifier()?.parse::()?; self.expect_keyword("LOCATION")?; let location = self.parse_literal_string()?; Ok(SQLStatement::SQLCreateTable { name: table_name, columns, external: true, file_format: Some(file_format), location: Some(location), }) } pub fn parse_create_view(&mut self) -> Result { let materialized = self.parse_keyword("MATERIALIZED"); self.expect_keyword("VIEW")?; // Many dialects support `OR REPLACE` | `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; // Parenthesized "output" columns list could be handled here. // Some dialects allow WITH here, followed by some keywords (e.g. MS SQL) // or `(k1=v1, k2=v2, ...)` (Postgres) self.expect_keyword("AS")?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. Ok(SQLStatement::SQLCreateView { name, query, materialized, }) } pub fn parse_drop(&mut self) -> Result { let object_type = if self.parse_keyword("TABLE") { SQLObjectType::Table } else if self.parse_keyword("VIEW") { SQLObjectType::View } else { return parser_err!(format!( "Unexpected token after DROP: {:?}", self.peek_token() )); }; let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]); let mut names = vec![self.parse_object_name()?]; loop { let token = &self.next_token(); if let Some(Token::Comma) = token { names.push(self.parse_object_name()?) } else { if token.is_some() { self.prev_token(); } break; } } let cascade = self.parse_keyword("CASCADE"); let restrict = self.parse_keyword("RESTRICT"); if cascade && restrict { return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); } Ok(SQLStatement::SQLDrop { object_type, if_exists, names, cascade, }) } pub fn parse_create_table(&mut self) -> Result { let table_name = self.parse_object_name()?; // parse optional column list (schema) let columns = self.parse_columns()?; Ok(SQLStatement::SQLCreateTable { name: table_name, columns, external: false, file_format: None, location: None, }) } fn parse_columns(&mut self) -> Result, ParserError> { let mut columns = vec![]; if !self.consume_token(&Token::LParen) { return Ok(columns); } loop { match self.next_token() { Some(Token::SQLWord(column_name)) => { let data_type = self.parse_data_type()?; let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique = self.parse_keyword("UNIQUE"); let default = if self.parse_keyword("DEFAULT") { let expr = self.parse_default_expr(0)?; Some(expr) } else { None }; let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) { false } else { let _ = self.parse_keyword("NULL"); true }; debug!("default: {:?}", default); columns.push(SQLColumnDef { name: column_name.as_sql_ident(), data_type, allow_null, is_primary, is_unique, default, }); match self.next_token() { Some(Token::Comma) => {} Some(Token::RParen) => { break; } other => { return parser_err!(format!( "Expected ',' or ')' after column definition but found {:?}", other )); } } } unexpected => { return parser_err!(format!("Expected column name, got {:?}", unexpected)); } } } Ok(columns) } pub fn parse_table_key(&mut self, constraint_name: SQLIdent) -> Result { let is_primary_key = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique_key = self.parse_keywords(vec!["UNIQUE", "KEY"]); let is_foreign_key = self.parse_keywords(vec!["FOREIGN", "KEY"]); let column_names = self.parse_parenthesized_column_list(Mandatory)?; let key = Key { name: constraint_name, columns: column_names, }; if is_primary_key { Ok(TableKey::PrimaryKey(key)) } else if is_unique_key { Ok(TableKey::UniqueKey(key)) } else if is_foreign_key { self.expect_keyword("REFERENCES")?; let foreign_table = self.parse_object_name()?; let referred_columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(TableKey::ForeignKey { key, foreign_table, referred_columns, }) } else { parser_err!(format!( "Expecting primary key, unique key, or foreign key, found: {:?}", self.peek_token() )) } } pub fn parse_alter(&mut self) -> Result { self.expect_keyword("TABLE")?; let _ = self.parse_keyword("ONLY"); let table_name = self.parse_object_name()?; let operation = if self.parse_keyword("ADD") { if self.parse_keyword("CONSTRAINT") { let constraint_name = self.parse_identifier()?; let table_key = self.parse_table_key(constraint_name)?; AlterOperation::AddConstraint(table_key) } else { return self.expected("CONSTRAINT after ADD", self.peek_token()); } } else { return self.expected("ADD after ALTER TABLE", self.peek_token()); }; Ok(SQLStatement::SQLAlterTable { name: table_name, operation, }) } /// Parse a copy statement pub fn parse_copy(&mut self) -> Result { let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keyword("FROM")?; self.expect_keyword("STDIN")?; self.expect_token(&Token::SemiColon)?; let values = self.parse_tsv()?; Ok(SQLStatement::SQLCopy { table_name, columns, values, }) } /// Parse a tab separated values in /// COPY payload fn parse_tsv(&mut self) -> Result>, ParserError> { let values = self.parse_tab_value()?; Ok(values) } fn parse_sql_value(&mut self) -> Result { Ok(ASTNode::SQLValue(self.parse_value()?)) } fn parse_tab_value(&mut self) -> Result>, ParserError> { let mut values = vec![]; let mut content = String::from(""); while let Some(t) = self.next_token_no_skip() { match t { Token::Whitespace(Whitespace::Tab) => { values.push(Some(content.to_string())); content.clear(); } Token::Whitespace(Whitespace::Newline) => { values.push(Some(content.to_string())); content.clear(); } Token::Backslash => { if self.consume_token(&Token::Period) { return Ok(values); } if let Some(token) = self.next_token() { if let Token::SQLWord(SQLWord { value: v, .. }) = token { if v == "N" { values.push(None); } } } else { continue; } } _ => { content.push_str(&t.to_string()); } } } Ok(values) } /// Parse a literal value (numbers, strings, date/time, booleans) fn parse_value(&mut self) -> Result { match self.next_token() { Some(t) => match t { Token::SQLWord(k) => match k.keyword.as_ref() { "TRUE" => Ok(Value::Boolean(true)), "FALSE" => Ok(Value::Boolean(false)), "NULL" => Ok(Value::Null), _ => { return parser_err!(format!("No value parser for keyword {}", k.keyword)); } }, Token::Number(ref n) if n.contains('.') => match n.parse::() { Ok(n) => Ok(Value::Double(n)), Err(e) => parser_err!(format!("Could not parse '{}' as f64: {}", n, e)), }, Token::Number(ref n) => match n.parse::() { Ok(n) => Ok(Value::Long(n)), Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::NationalStringLiteral(ref s) => { Ok(Value::NationalStringLiteral(s.to_string())) } _ => parser_err!(format!("Unsupported value: {:?}", t)), }, None => parser_err!("Expecting a value, but found EOF"), } } /// Parse a literal integer/long pub fn parse_literal_int(&mut self) -> Result { match self.next_token() { Some(Token::Number(s)) => s.parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{}' as i64: {}", s, e)) }), other => parser_err!(format!("Expected literal int, found {:?}", other)), } } /// Parse a literal double pub fn parse_literal_double(&mut self) -> Result { match self.next_token() { Some(Token::Number(s)) => s.parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{}' as f64: {}", s, e)) }), other => parser_err!(format!("Expected literal number, found {:?}", other)), } } /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { Some(Token::SingleQuotedString(ref s)) => Ok(s.clone()), other => parser_err!(format!("Expected literal string, found {:?}", other)), } } /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { match self.next_token() { Some(Token::SQLWord(k)) => match k.keyword.as_ref() { "BOOLEAN" => Ok(SQLType::Boolean), "FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)), "REAL" => Ok(SQLType::Real), "DOUBLE" => { let _ = self.parse_keyword("PRECISION"); Ok(SQLType::Double) } "SMALLINT" => Ok(SQLType::SmallInt), "INT" | "INTEGER" => Ok(SQLType::Int), "BIGINT" => Ok(SQLType::BigInt), "VARCHAR" => Ok(SQLType::Varchar(self.parse_optional_precision()?)), "CHAR" | "CHARACTER" => { if self.parse_keyword("VARYING") { Ok(SQLType::Varchar(self.parse_optional_precision()?)) } else { Ok(SQLType::Char(self.parse_optional_precision()?)) } } "UUID" => Ok(SQLType::Uuid), "DATE" => Ok(SQLType::Date), "TIMESTAMP" => { // TBD: we throw away "with/without timezone" information if self.parse_keyword("WITH") || self.parse_keyword("WITHOUT") { self.expect_keyword("TIME")?; self.expect_keyword("ZONE")?; } Ok(SQLType::Timestamp) } "TIME" => { // TBD: we throw away "with/without timezone" information if self.parse_keyword("WITH") || self.parse_keyword("WITHOUT") { self.expect_keyword("TIME")?; self.expect_keyword("ZONE")?; } Ok(SQLType::Time) } "REGCLASS" => Ok(SQLType::Regclass), "TEXT" => { if self.consume_token(&Token::LBracket) { // Note: this is postgresql-specific self.expect_token(&Token::RBracket)?; Ok(SQLType::Array(Box::new(SQLType::Text))) } else { Ok(SQLType::Text) } } "BYTEA" => Ok(SQLType::Bytea), "NUMERIC" => { let (precision, scale) = self.parse_optional_precision_scale()?; Ok(SQLType::Decimal(precision, scale)) } _ => { self.prev_token(); let type_name = self.parse_object_name()?; Ok(SQLType::Custom(type_name)) } }, other => self.expected("a data type name", other), } } /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` pub fn parse_optional_alias( &mut self, reserved_kwds: &[&str], ) -> Result, ParserError> { let after_as = self.parse_keyword("AS"); let maybe_alias = self.next_token(); match maybe_alias { // Accept any identifier after `AS` (though many dialects have restrictions on // keywords that may appear here). If there's no `AS`: don't parse keywords, // which may start a construct allowed in this position, to be parsed as aliases. // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, // not an alias.) Some(Token::SQLWord(ref w)) if after_as || !reserved_kwds.contains(&w.keyword.as_str()) => { Ok(Some(w.as_sql_ident())) } ref not_an_ident if after_as => parser_err!(format!( "Expected an identifier after AS, got {:?}", not_an_ident )), Some(_not_an_ident) => { self.prev_token(); Ok(None) // no alias found } None => Ok(None), } } /// Parse one or more identifiers with the specified separator between them pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result, ParserError> { let mut idents = vec![]; let mut expect_identifier = true; loop { let token = &self.next_token(); match token { Some(Token::SQLWord(s)) if expect_identifier => { expect_identifier = false; idents.push(s.as_sql_ident()); } Some(token) if token == separator && !expect_identifier => { expect_identifier = true; continue; } _ => { if token.is_some() { self.prev_token(); } break; } } } if expect_identifier { self.expected("identifier", self.peek_token()) } else { Ok(idents) } } /// Parse a possibly qualified, possibly quoted identifier, e.g. /// `foo` or `myschema."table"` pub fn parse_object_name(&mut self) -> Result { Ok(SQLObjectName(self.parse_list_of_ids(&Token::Period)?)) } /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) pub fn parse_identifier(&mut self) -> Result { match self.next_token() { Some(Token::SQLWord(w)) => Ok(w.as_sql_ident()), unexpected => self.expected("identifier", unexpected), } } /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers pub fn parse_parenthesized_column_list( &mut self, optional: IsOptional, ) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let cols = self.parse_list_of_ids(&Token::Comma)?; self.expect_token(&Token::RParen)?; Ok(cols) } else if optional == Optional { Ok(vec![]) } else { self.expected("a list of columns in parentheses", self.peek_token()) } } pub fn parse_precision(&mut self) -> Result { //TODO: error handling Ok(self.parse_optional_precision()?.unwrap()) } pub fn parse_optional_precision(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let n = self.parse_literal_int()?; //TODO: check return value of reading rparen self.expect_token(&Token::RParen)?; Ok(Some(n as usize)) } else { Ok(None) } } pub fn parse_optional_precision_scale( &mut self, ) -> Result<(Option, Option), ParserError> { if self.consume_token(&Token::LParen) { let n = self.parse_literal_int()?; let scale = if self.consume_token(&Token::Comma) { Some(self.parse_literal_int()? as usize) } else { None }; self.expect_token(&Token::RParen)?; Ok((Some(n as usize), scale)) } else { Ok((None, None)) } } pub fn parse_delete(&mut self) -> Result { self.expect_keyword("FROM")?; let table_name = self.parse_object_name()?; let selection = if self.parse_keyword("WHERE") { Some(self.parse_expr()?) } else { None }; Ok(SQLStatement::SQLDelete { table_name, selection, }) } /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceeded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't /// expect the initial keyword to be already consumed pub fn parse_query(&mut self) -> Result { let ctes = if self.parse_keyword("WITH") { // TODO: optional RECURSIVE self.parse_cte_list()? } else { vec![] }; let body = self.parse_query_body(0)?; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { Some(self.parse_order_by_expr_list()?) } else { None }; let limit = if self.parse_keyword("LIMIT") { self.parse_limit()? } else { None }; Ok(SQLQuery { ctes, body, limit, order_by, }) } /// Parse one or more (comma-separated) `alias AS (subquery)` CTEs, /// assuming the initial `WITH` was already consumed. fn parse_cte_list(&mut self) -> Result, ParserError> { let mut cte = vec![]; loop { let alias = self.parse_identifier()?; let renamed_columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keyword("AS")?; self.expect_token(&Token::LParen)?; cte.push(Cte { alias, query: self.parse_query()?, renamed_columns, }); self.expect_token(&Token::RParen)?; if !self.consume_token(&Token::Comma) { break; } } Ok(cte) } /// Parse a "query body", which is an expression with roughly the /// following grammar: /// ```text /// query_body ::= restricted_select | '(' subquery ')' | set_operation /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] /// subquery ::= query_body [ order_by_limit ] /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body /// ``` fn parse_query_body(&mut self, precedence: u8) -> Result { // We parse the expression using a Pratt parser, as in `parse_expr()`. // Start by parsing a restricted SELECT or a `(subquery)`: let mut expr = if self.parse_keyword("SELECT") { SQLSetExpr::Select(Box::new(self.parse_select()?)) } else if self.consume_token(&Token::LParen) { // CTEs are not allowed here, but the parser currently accepts them let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; SQLSetExpr::Query(Box::new(subquery)) } else { return self.expected("SELECT or a subquery in the query body", self.peek_token()); }; loop { // The query can be optionally followed by a set operator: let next_token = self.peek_token(); let op = self.parse_set_operator(&next_token); let next_precedence = match op { // UNION and EXCEPT have the same binding power and evaluate left-to-right Some(SQLSetOperator::Union) | Some(SQLSetOperator::Except) => 10, // INTERSECT has higher precedence than UNION/EXCEPT Some(SQLSetOperator::Intersect) => 20, // Unexpected token or EOF => stop parsing the query body None => break, }; if precedence >= next_precedence { break; } self.next_token(); // skip past the set operator expr = SQLSetExpr::SetOperation { left: Box::new(expr), op: op.unwrap(), all: self.parse_keyword("ALL"), right: Box::new(self.parse_query_body(next_precedence)?), }; } Ok(expr) } fn parse_set_operator(&mut self, token: &Option) -> Option { match token { Some(Token::SQLWord(w)) if w.keyword == "UNION" => Some(SQLSetOperator::Union), Some(Token::SQLWord(w)) if w.keyword == "EXCEPT" => Some(SQLSetOperator::Except), Some(Token::SQLWord(w)) if w.keyword == "INTERSECT" => Some(SQLSetOperator::Intersect), _ => None, } } /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { let distinct = self.parse_keyword("DISTINCT"); let projection = self.parse_select_list()?; let (relation, joins) = if self.parse_keyword("FROM") { let relation = Some(self.parse_table_factor()?); let joins = self.parse_joins()?; (relation, joins) } else { (None, vec![]) }; let selection = if self.parse_keyword("WHERE") { Some(self.parse_expr()?) } else { None }; let group_by = if self.parse_keywords(vec!["GROUP", "BY"]) { Some(self.parse_expr_list()?) } else { None }; let having = if self.parse_keyword("HAVING") { Some(self.parse_expr()?) } else { None }; Ok(SQLSelect { distinct, projection, selection, relation, joins, group_by, having, }) } /// A table name or a parenthesized subquery, followed by optional `[AS] alias` pub fn parse_table_factor(&mut self) -> Result { if self.consume_token(&Token::LParen) { let subquery = Box::new(self.parse_query()?); self.expect_token(&Token::RParen)?; let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Derived { subquery, alias }) } else { let name = self.parse_object_name()?; // Postgres, MSSQL: table-valued functions: let args = if self.consume_token(&Token::LParen) { Some(self.parse_optional_args()?) } else { None }; let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: let mut with_hints = vec![]; if self.parse_keyword("WITH") { if self.consume_token(&Token::LParen) { with_hints = self.parse_expr_list()?; self.expect_token(&Token::RParen)?; } else { // rewind, as WITH may belong to the next statement's CTE self.prev_token(); } }; Ok(TableFactor::Table { name, alias, args, with_hints, }) } } fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) } else if self.parse_keyword("ON") { let constraint = self.parse_expr()?; Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { let columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(JoinConstraint::Using(columns)) } else { self.expected("ON, or USING after JOIN", self.peek_token()) } } fn parse_joins(&mut self) -> Result, ParserError> { let mut joins = vec![]; loop { let natural = match &self.peek_token() { Some(Token::Comma) => { self.next_token(); let relation = self.parse_table_factor()?; let join = Join { relation, join_operator: JoinOperator::Implicit, }; joins.push(join); continue; } Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => { self.next_token(); self.expect_keyword("JOIN")?; let relation = self.parse_table_factor()?; let join = Join { relation, join_operator: JoinOperator::Cross, }; joins.push(join); continue; } Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => { self.next_token(); true } Some(_) => false, None => return Ok(joins), }; let join = match &self.peek_token() { Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => { self.next_token(); self.expect_keyword("JOIN")?; Join { relation: self.parse_table_factor()?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => { self.next_token(); Join { relation: self.parse_table_factor()?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { relation: self.parse_table_factor()?, join_operator: JoinOperator::LeftOuter( self.parse_join_constraint(natural)?, ), } } Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { relation: self.parse_table_factor()?, join_operator: JoinOperator::RightOuter( self.parse_join_constraint(natural)?, ), } } Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { relation: self.parse_table_factor()?, join_operator: JoinOperator::FullOuter( self.parse_join_constraint(natural)?, ), } } _ => break, }; joins.push(join); } Ok(joins) } /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { self.expect_keyword("INTO")?; let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keyword("VALUES")?; self.expect_token(&Token::LParen)?; let values = self.parse_expr_list()?; self.expect_token(&Token::RParen)?; Ok(SQLStatement::SQLInsert { table_name, columns, values: vec![values], }) } /// Parse a comma-delimited list of SQL expressions pub fn parse_expr_list(&mut self) -> Result, ParserError> { let mut expr_list: Vec = vec![]; loop { expr_list.push(self.parse_expr()?); match self.peek_token() { Some(Token::Comma) => self.next_token(), _ => break, }; } Ok(expr_list) } pub fn parse_optional_args(&mut self) -> Result, ParserError> { if self.consume_token(&Token::RParen) { Ok(vec![]) } else { let args = self.parse_expr_list()?; self.expect_token(&Token::RParen)?; Ok(args) } } /// Parse a comma-delimited list of projections after SELECT pub fn parse_select_list(&mut self) -> Result, ParserError> { let mut projections: Vec = vec![]; loop { let expr = self.parse_expr()?; if let ASTNode::SQLWildcard = expr { projections.push(SQLSelectItem::Wildcard); } else if let ASTNode::SQLQualifiedWildcard(prefix) = expr { projections.push(SQLSelectItem::QualifiedWildcard(SQLObjectName(prefix))); } else { // `expr` is a regular SQL expression and can be followed by an alias if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? { projections.push(SQLSelectItem::ExpressionWithAlias { expr, alias }); } else { projections.push(SQLSelectItem::UnnamedExpression(expr)); } } match self.peek_token() { Some(Token::Comma) => self.next_token(), _ => break, }; } Ok(projections) } /// Parse a comma-delimited list of SQL ORDER BY expressions pub fn parse_order_by_expr_list(&mut self) -> Result, ParserError> { let mut expr_list: Vec = vec![]; loop { let expr = self.parse_expr()?; let asc = if self.parse_keyword("ASC") { Some(true) } else if self.parse_keyword("DESC") { Some(false) } else { None }; expr_list.push(SQLOrderByExpr { expr, asc }); if let Some(Token::Comma) = self.peek_token() { self.next_token(); } else { break; } } Ok(expr_list) } /// Parse a LIMIT clause pub fn parse_limit(&mut self) -> Result, ParserError> { if self.parse_keyword("ALL") { Ok(None) } else { self.parse_literal_int() .map(|n| Some(ASTNode::SQLValue(Value::Long(n)))) } } } impl SQLWord { pub fn as_sql_ident(&self) -> SQLIdent { self.to_string() } } #[cfg(test)] mod tests { use super::*; use crate::test_utils::all_dialects; #[test] fn test_prev_index() { let sql = "SELECT version()"; all_dialects().run_parser_method(sql, |parser| { assert_eq!(parser.prev_token(), None); assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); assert_eq!(parser.prev_token(), Some(Token::make_word("version", None))); assert_eq!(parser.peek_token(), Some(Token::make_word("version", None))); assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT"))); assert_eq!(parser.prev_token(), None); }); } }