From 3de2a0952cc4d1e6f78ebf3c1113d4d9cbd55729 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 03:26:33 +0300 Subject: [PATCH 01/11] Make SQLOrderByExpr::asc tri-state i.e. ASC/DESC/unspecified - so that we don't lose information about source code. Also don't take any keyword other than ASC/DESC or Comma to mean 'ascending'. --- src/sqlast/mod.rs | 12 ++++++------ src/sqlparser.rs | 30 ++++++++---------------------- tests/sqlparser_generic.rs | 14 +++++++++----- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 54b650a8..2bc026c5 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -366,21 +366,21 @@ impl ToString for SQLAssignment { #[derive(Debug, Clone, PartialEq)] pub struct SQLOrderByExpr { pub expr: Box, - pub asc: bool, + pub asc: Option, } impl SQLOrderByExpr { - pub fn new(expr: Box, asc: bool) -> Self { + pub fn new(expr: Box, asc: Option) -> Self { SQLOrderByExpr { expr, asc } } } impl ToString for SQLOrderByExpr { fn to_string(&self) -> String { - if self.asc { - format!("{} ASC", self.expr.as_ref().to_string()) - } else { - format!("{} DESC", self.expr.as_ref().to_string()) + match self.asc { + Some(true) => format!("{} ASC", self.expr.to_string()), + Some(false) => format!("{} DESC", self.expr.to_string()), + None => self.expr.to_string(), } } } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 42a39b01..dc666bf0 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1321,33 +1321,19 @@ impl Parser { loop { let expr = self.parse_expr(0)?; - // look for optional ASC / DESC specifier - let asc = match self.peek_token() { - Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { - "ASC" => { - self.next_token(); - true - } - "DESC" => { - self.next_token(); - false - } - _ => true, - }, - Some(Token::Comma) => true, - _ => true, + let asc = if self.parse_keyword("ASC") { + Some(true) + } else if self.parse_keyword("DESC") { + Some(false) + } else { + None }; expr_list.push(SQLOrderByExpr::new(Box::new(expr), asc)); - if let Some(t) = self.peek_token() { - if t == Token::Comma { - self.next_token(); - } else { - break; - } + if let Some(Token::Comma) = self.peek_token() { + self.next_token(); } else { - // EOF break; } } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 5c867972..04ba8754 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -230,7 +230,7 @@ fn parse_not_like() { #[test] fn parse_select_order_by() { let sql = String::from( - "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC", + "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id", ); match verified(&sql) { ASTNode::SQLSelect { order_by, .. } => { @@ -238,11 +238,15 @@ fn parse_select_order_by() { Some(vec![ SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: true, + asc: Some(true), }, SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: false, + asc: Some(false), + }, + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), + asc: None, }, ]), order_by @@ -266,11 +270,11 @@ fn parse_select_order_by_limit() { Some(vec![ SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: true, + asc: Some(true), }, SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: false, + asc: Some(false), }, ]), order_by From d73a1e0e1d14a419fa0d9609d271544518c721f9 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 03:05:58 +0300 Subject: [PATCH 02/11] Join match arms for Keyword and binary ops because they share implementation. --- src/sqlparser.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index dc666bf0..c3d626a6 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -276,12 +276,12 @@ impl Parser { parser_err!("Invalid tokens after NOT") } } - Token::Keyword(_) => Ok(Some(ASTNode::SQLBinaryExpr { - left: Box::new(expr), - op: self.to_sql_operator(&tok)?, - right: Box::new(self.parse_expr(precedence)?), - })), - Token::Eq + Token::DoubleColon => { + let pg_cast = self.parse_pg_cast(expr)?; + Ok(Some(pg_cast)) + } + Token::Keyword(_) + | Token::Eq | Token::Neq | Token::Gt | Token::GtEq @@ -296,10 +296,6 @@ impl Parser { op: self.to_sql_operator(&tok)?, right: Box::new(self.parse_expr(precedence)?), })), - Token::DoubleColon => { - let pg_cast = self.parse_pg_cast(expr)?; - Ok(Some(pg_cast)) - } _ => parser_err!(format!("No infix parser for token {:?}", tok)), }, None => Ok(None), From eb4b5bc6864aa4ace9f63b95cf504aecc0a6d4d7 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 29 Jan 2019 17:49:46 +0300 Subject: [PATCH 03/11] Stop returning Option from parse_infix This reduces amount of boilerplate and avoids cloning the `expr` param. --- src/sqlparser.rs | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index c3d626a6..878dd3d6 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -77,9 +77,7 @@ impl Parser { break; } - if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { - expr = infix_expr; - } + expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) } @@ -102,9 +100,7 @@ impl Parser { break; } - if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { - expr = infix_expr; - } + expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) } @@ -248,37 +244,33 @@ impl Parser { } /// Parse an expression infix (typically an operator) - pub fn parse_infix( - &mut self, - expr: ASTNode, - precedence: u8, - ) -> Result, ParserError> { + pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result { debug!("parsing infix"); match self.next_token() { Some(tok) => match tok { Token::Keyword(ref k) if k == "IS" => { if self.parse_keywords(vec!["NULL"]) { - Ok(Some(ASTNode::SQLIsNull(Box::new(expr)))) + Ok(ASTNode::SQLIsNull(Box::new(expr))) } else if self.parse_keywords(vec!["NOT", "NULL"]) { - Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr)))) + Ok(ASTNode::SQLIsNotNull(Box::new(expr))) } else { parser_err!("Invalid tokens after IS") } } Token::Keyword(ref k) if k == "NOT" => { if self.parse_keywords(vec!["LIKE"]) { - Ok(Some(ASTNode::SQLBinaryExpr { + Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: SQLOperator::NotLike, right: Box::new(self.parse_expr(precedence)?), - })) + }) } else { parser_err!("Invalid tokens after NOT") } } Token::DoubleColon => { let pg_cast = self.parse_pg_cast(expr)?; - Ok(Some(pg_cast)) + Ok(pg_cast) } Token::Keyword(_) | Token::Eq @@ -291,14 +283,16 @@ impl Parser { | Token::Minus | Token::Mult | Token::Mod - | Token::Div => Ok(Some(ASTNode::SQLBinaryExpr { + | Token::Div => Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: self.to_sql_operator(&tok)?, right: Box::new(self.parse_expr(precedence)?), - })), + }), _ => parser_err!(format!("No infix parser for token {:?}", tok)), }, - None => Ok(None), + // This is not supposed to happen, because of the precedence check + // in parse_expr. + None => parser_err!("Unexpected EOF in parse_infix"), } } From 9a8b6a8e6421ca07fb69b0054721068cd42a9708 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 12 Jan 2019 19:45:08 +0300 Subject: [PATCH 04/11] Rework keyword/identifier parsing (1/8) Fold Token::{Keyword, Identifier, DoubleQuotedString} into one Token::SQLWord, which has the necessary information (was it a known keyword and/or was it quoted). This lets the parser easily accept DoubleQuotedString (a quoted identifier) everywhere it expects an Identifier in the same match arm. (To complete support of quoted identifiers, or "delimited identifiers" as the spec calls them, a TODO in parse_tablename() ought to be addressed.) As an aside, per sqlite seems to be the only one supporting 'identifier' (which is rather hairy, since it can also be a string literal), and `identifier` seems only to be supported by MySQL. I didn't implement either one. This also allows the use of `parse`/`expect_keyword` machinery for non-reserved keywords: previously they relied on the keyword being a Token::Keyword, which wasn't a Token::Identifier, and so wasn't accepted as one. Now whether a keyword can be used as an identifier can be decided by the parser. (I didn't add a blacklist of "reserved" keywords, so that any keyword which doesn't have a special meaning in the parser could be used as an identifier. The list of keywords in the dialect could be re-used for that purpose at a later stage.) --- src/dialect/generic_sql.rs | 2 +- src/dialect/postgresql.rs | 2 +- src/sqlast/mod.rs | 13 ++-- src/sqlast/table_key.rs | 8 ++- src/sqlparser.rs | 136 +++++++++++++++++------------------- src/sqltokenizer.rs | 115 ++++++++++++++++++++---------- tests/sqlparser_postgres.rs | 19 ++--- 7 files changed, 164 insertions(+), 131 deletions(-) diff --git a/src/dialect/generic_sql.rs b/src/dialect/generic_sql.rs index 0f18b723..0a546d38 100644 --- a/src/dialect/generic_sql.rs +++ b/src/dialect/generic_sql.rs @@ -12,7 +12,7 @@ impl Dialect for GenericSqlDialect { CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, - CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, + CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, CAST, ]; } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 66cb51c1..5535fb14 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -15,7 +15,7 @@ impl Dialect for PostgreSqlDialect { DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY, STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, - LIKE, + LIKE, CAST, ]; } diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 2bc026c5..5c5d8c39 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -25,15 +25,18 @@ pub use self::value::Value; pub use self::sql_operator::SQLOperator; +// This could be enhanced to remember the way the identifier was quoted +pub type SQLIdent = String; + /// SQL Abstract Syntax Tree (AST) #[derive(Debug, Clone, PartialEq)] pub enum ASTNode { /// Identifier e.g. table name or column name - SQLIdentifier(String), + SQLIdentifier(SQLIdent), /// Wildcard e.g. `*` SQLWildcard, /// Multi part identifier e.g. `myschema.dbo.mytable` - SQLCompoundIdentifier(Vec), + SQLCompoundIdentifier(Vec), /// Assigment e.g. `name = 'Fred'` in an UPDATE statement SQLAssignment(SQLAssignment), /// `IS NULL` expression @@ -93,7 +96,7 @@ pub enum ASTNode { /// TABLE table_name: String, /// COLUMNS - columns: Vec, + columns: Vec, /// VALUES (vector of rows to insert) values: Vec>, }, @@ -101,7 +104,7 @@ pub enum ASTNode { /// TABLE table_name: String, /// COLUMNS - columns: Vec, + columns: Vec, /// VALUES a vector of values to be copied values: Vec>, }, @@ -388,7 +391,7 @@ impl ToString for SQLOrderByExpr { /// SQL column definition #[derive(Debug, Clone, PartialEq)] pub struct SQLColumnDef { - pub name: String, + pub name: SQLIdent, pub data_type: SQLType, pub is_primary: bool, pub is_unique: bool, diff --git a/src/sqlast/table_key.rs b/src/sqlast/table_key.rs index 9dacc21b..f4ff70f4 100644 --- a/src/sqlast/table_key.rs +++ b/src/sqlast/table_key.rs @@ -1,3 +1,5 @@ +use super::SQLIdent; + #[derive(Debug, PartialEq, Clone)] pub enum AlterOperation { AddConstraint(TableKey), @@ -17,8 +19,8 @@ impl ToString for AlterOperation { #[derive(Debug, PartialEq, Clone)] pub struct Key { - pub name: String, - pub columns: Vec, + pub name: SQLIdent, + pub columns: Vec, } #[derive(Debug, PartialEq, Clone)] @@ -29,7 +31,7 @@ pub enum TableKey { ForeignKey { key: Key, foreign_table: String, - referred_columns: Vec, + referred_columns: Vec, }, } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 878dd3d6..35af605a 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -90,7 +90,7 @@ impl Parser { loop { // stop parsing on `NULL` | `NOT NULL` match self.peek_token() { - Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break, + Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break, _ => {} } @@ -109,7 +109,7 @@ impl Parser { pub fn parse_prefix(&mut self) -> Result { match self.next_token() { Some(t) => match t { - Token::Keyword(k) => match k.to_uppercase().as_ref() { + Token::SQLWord(w) => match w.keyword.as_ref() { "SELECT" => Ok(self.parse_select()?), "CREATE" => Ok(self.parse_create()?), "DELETE" => Ok(self.parse_delete()?), @@ -121,38 +121,31 @@ impl Parser { self.parse_sql_value() } "CASE" => self.parse_case_expression(), + "CAST" => self.parse_cast_expression(), "NOT" => Ok(ASTNode::SQLUnary { operator: SQLOperator::Not, expr: Box::new(self.parse_expr(0)?), }), - _ => return parser_err!(format!("No prefix parser for keyword {}", k)), - }, - Token::Mult => Ok(ASTNode::SQLWildcard), - Token::Identifier(id) => { - if "CAST" == id.to_uppercase() { - self.parse_cast_expression() - } else { - match self.peek_token() { - Some(Token::LParen) => self.parse_function(&id), - Some(Token::Period) => { - let mut id_parts: Vec = vec![id]; - while self.peek_token() == Some(Token::Period) { - self.expect_token(&Token::Period)?; - match self.next_token() { - Some(Token::Identifier(id)) => id_parts.push(id), - _ => { - return parser_err!(format!( - "Error parsing compound identifier" - )) - } + _ => match self.peek_token() { + Some(Token::LParen) => self.parse_function(&w.value), + Some(Token::Period) => { + let mut id_parts: Vec = vec![w.value]; + while self.consume_token(&Token::Period) { + match self.next_token() { + Some(Token::SQLWord(w)) => id_parts.push(w.value), + _ => { + return parser_err!(format!( + "Error parsing compound identifier" + )); } } - Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } - _ => Ok(ASTNode::SQLIdentifier(id)), + Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } - } - } + _ => Ok(ASTNode::SQLIdentifier(w.value)), + }, + }, + Token::Mult => Ok(ASTNode::SQLWildcard), Token::Number(_) | Token::SingleQuotedString(_) => { self.prev_token(); self.parse_sql_value() @@ -248,7 +241,7 @@ impl Parser { debug!("parsing infix"); match self.next_token() { Some(tok) => match tok { - Token::Keyword(ref k) if k == "IS" => { + Token::SQLWord(ref k) if k.keyword == "IS" => { if self.parse_keywords(vec!["NULL"]) { Ok(ASTNode::SQLIsNull(Box::new(expr))) } else if self.parse_keywords(vec!["NOT", "NULL"]) { @@ -257,7 +250,7 @@ impl Parser { parser_err!("Invalid tokens after IS") } } - Token::Keyword(ref k) if k == "NOT" => { + Token::SQLWord(ref k) if k.keyword == "NOT" => { if self.parse_keywords(vec!["LIKE"]) { Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), @@ -272,7 +265,7 @@ impl Parser { let pg_cast = self.parse_pg_cast(expr)?; Ok(pg_cast) } - Token::Keyword(_) + Token::SQLWord(_) | Token::Eq | Token::Neq | Token::Gt @@ -310,10 +303,10 @@ impl Parser { &Token::Mult => Ok(SQLOperator::Multiply), &Token::Div => Ok(SQLOperator::Divide), &Token::Mod => Ok(SQLOperator::Modulus), - &Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And), - &Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or), - //&Token::Keyword(ref k) if k == "NOT" => Ok(SQLOperator::Not), - &Token::Keyword(ref k) if k == "LIKE" => Ok(SQLOperator::Like), + &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(SQLOperator::And), + &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(SQLOperator::Or), + //&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(SQLOperator::Not), + &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(SQLOperator::Like), _ => parser_err!(format!("Unsupported SQL operator {:?}", tok)), } } @@ -332,11 +325,11 @@ impl Parser { debug!("get_precedence() {:?}", tok); match tok { - &Token::Keyword(ref k) if k == "OR" => Ok(5), - &Token::Keyword(ref k) if k == "AND" => Ok(10), - &Token::Keyword(ref k) if k == "NOT" => Ok(15), - &Token::Keyword(ref k) if k == "IS" => Ok(15), - &Token::Keyword(ref k) if k == "LIKE" => Ok(20), + &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5), + &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10), + &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15), + &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15), + &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20), &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { Ok(20) } @@ -435,13 +428,9 @@ impl Parser { #[must_use] pub fn parse_keyword(&mut self, expected: &'static str) -> bool { match self.peek_token() { - Some(Token::Keyword(k)) => { - if expected.eq_ignore_ascii_case(k.as_str()) { - self.next_token(); - true - } else { - false - } + Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => { + self.next_token(); + true } _ => false, } @@ -512,7 +501,7 @@ impl Parser { let mut columns = vec![]; if self.consume_token(&Token::LParen) { loop { - if let Some(Token::Identifier(column_name)) = self.next_token() { + if let Some(Token::SQLWord(column_name)) = self.next_token() { if let Ok(data_type) = self.parse_data_type() { let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique = self.parse_keyword("UNIQUE"); @@ -535,7 +524,7 @@ impl Parser { Some(Token::Comma) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name, + name: column_name.value, data_type: data_type, allow_null, is_primary, @@ -546,7 +535,7 @@ impl Parser { Some(Token::RParen) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name, + name: column_name.value, data_type: data_type, allow_null, is_primary, @@ -628,8 +617,8 @@ impl Parser { let operation: Result = if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { match self.next_token() { - Some(Token::Identifier(ref id)) => { - let table_key = self.parse_table_key(id)?; + Some(Token::SQLWord(ref id)) => { + let table_key = self.parse_table_key(&id.value)?; Ok(AlterOperation::AddConstraint(table_key)) } _ => { @@ -707,8 +696,10 @@ impl Parser { return Ok(values); } if let Some(token) = self.next_token() { - if token == Token::Identifier("N".to_string()) { - values.push(None); + if let Token::SQLWord(SQLWord { value: v, .. }) = token { + if v == "N" { + values.push(None); + } } } else { continue; @@ -727,11 +718,16 @@ impl Parser { match self.next_token() { Some(t) => { match t { - Token::Keyword(k) => match k.to_uppercase().as_ref() { + Token::SQLWord(k) => match k.keyword.as_ref() { "TRUE" => Ok(Value::Boolean(true)), "FALSE" => Ok(Value::Boolean(false)), "NULL" => Ok(Value::Null), - _ => return parser_err!(format!("No value parser for keyword {}", k)), + _ => { + return parser_err!(format!( + "No value parser for keyword {}", + k.keyword + )); + } }, //TODO: parse the timestamp here (see parse_timestamp_value()) Token::Number(ref n) if n.contains(".") => match n.parse::() { @@ -863,7 +859,7 @@ impl Parser { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { match self.next_token() { - Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { + Some(Token::SQLWord(k)) => match k.keyword.as_ref() { "BOOLEAN" => Ok(SQLType::Boolean), "FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)), "REAL" => Ok(SQLType::Real), @@ -948,13 +944,12 @@ impl Parser { let (precision, scale) = self.parse_optional_precision_scale()?; Ok(SQLType::Decimal(precision, scale)) } - _ => parser_err!(format!("Invalid data type '{:?}'", k)), + _ => { + self.prev_token(); + let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type + Ok(SQLType::Custom(type_name)) + } }, - Some(Token::Identifier(_)) => { - self.prev_token(); - let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type - Ok(SQLType::Custom(type_name)) - } other => parser_err!(format!("Invalid data type: '{:?}'", other)), } } @@ -966,7 +961,7 @@ impl Parser { let token = &self.next_token(); match token { Some(token) => match token { - Token::Identifier(s) => { + Token::SQLWord(s) => { if expect_identifier { expect_identifier = false; idents.push(s.to_string()); @@ -1000,12 +995,13 @@ impl Parser { pub fn parse_tablename(&mut self) -> Result { let identifier = self.parse_compound_identifier(&Token::Period)?; match identifier { + // TODO: should store the compound identifier itself ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")), other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), } } - pub fn parse_column_names(&mut self) -> Result, ParserError> { + pub fn parse_column_names(&mut self) -> Result, ParserError> { let identifier = self.parse_compound_identifier(&Token::Comma)?; match identifier { ASTNode::SQLCompoundIdentifier(idents) => Ok(idents), @@ -1188,7 +1184,7 @@ impl Parser { joins.push(join); continue; } - Some(Token::Keyword(kw)) if kw == "CROSS" => { + Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => { self.next_token(); self.expect_keyword("JOIN")?; let relation = self.parse_expr(0)?; @@ -1199,7 +1195,7 @@ impl Parser { joins.push(join); continue; } - Some(Token::Keyword(kw)) if kw == "NATURAL" => { + Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => { self.next_token(); true } @@ -1208,7 +1204,7 @@ impl Parser { }; let join = match &self.peek_token() { - Some(Token::Keyword(kw)) if kw == "INNER" => { + Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => { self.next_token(); self.expect_keyword("JOIN")?; Join { @@ -1216,14 +1212,14 @@ impl Parser { join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } - Some(Token::Keyword(kw)) if kw == "JOIN" => { + Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => { self.next_token(); Join { relation: self.parse_expr(0)?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } - Some(Token::Keyword(kw)) if kw == "LEFT" => { + Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; @@ -1234,7 +1230,7 @@ impl Parser { ), } } - Some(Token::Keyword(kw)) if kw == "RIGHT" => { + Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; @@ -1245,7 +1241,7 @@ impl Parser { ), } } - Some(Token::Keyword(kw)) if kw == "FULL" => { + Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 50408822..d55daa26 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -26,18 +26,14 @@ use super::dialect::Dialect; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq)] pub enum Token { - /// SQL identifier e.g. table or column name - Identifier(String), - /// SQL keyword e.g. Keyword("SELECT") - Keyword(String), + /// A keyword (like SELECT) or an optionally quoted SQL identifier + SQLWord(SQLWord), /// Numeric literal Number(String), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), - /// Double quoted string: i.e: "string" - DoubleQuotedString(String), /// Comma Comma, /// Whitespace (space, tab, etc) @@ -93,12 +89,10 @@ pub enum Token { impl ToString for Token { fn to_string(&self) -> String { match self { - Token::Identifier(ref id) => id.to_string(), - Token::Keyword(ref k) => k.to_string(), + Token::SQLWord(ref w) => w.to_string(), Token::Number(ref n) => n.to_string(), Token::Char(ref c) => c.to_string(), Token::SingleQuotedString(ref s) => format!("'{}'", s), - Token::DoubleQuotedString(ref s) => format!("\"{}\"", s), Token::Comma => ",".to_string(), Token::Whitespace(ws) => ws.to_string(), Token::Eq => "=".to_string(), @@ -128,6 +122,49 @@ impl ToString for Token { } } +impl Token { + pub fn make_keyword(keyword: &str) -> Self { + Token::SQLWord(SQLWord { + value: keyword.to_string(), + quote_style: None, + keyword: keyword.to_uppercase().to_string(), + }) + } + pub fn make_word(word: &str, quote_style: Option) -> Self { + Token::SQLWord(SQLWord { + value: word.to_string(), + quote_style: quote_style, + keyword: "".to_string(), + }) + } +} + +/// A keyword (like SELECT) or an optionally quoted SQL identifier +#[derive(Debug, Clone, PartialEq)] +pub struct SQLWord { + /// The value of the token, without the enclosing quotes, and with the + /// escape sequences (if any) processed (TODO: escapes are not handled) + pub value: String, + /// An identifier can be "quoted" (<delimited identifier> in ANSI parlance). + /// The standard and most implementations allow using double quotes for this, + /// but some implementations support other quoting styles as well (e.g. \[MS SQL]) + pub quote_style: Option, + /// If the word was not quoted and it matched one of the known keywords, + /// this will have one of the values from dialect::keywords, otherwise empty + pub keyword: String, +} + +impl ToString for SQLWord { + fn to_string(&self) -> String { + match self.quote_style { + Some('"') => format!("\"{}\"", self.value), + Some('[') => format!("[{}]", self.value), + None => self.value.clone(), + _ => panic!("Unexpected quote_style!"), + } + } +} + #[derive(Debug, Clone, PartialEq)] pub enum Whitespace { Space, @@ -189,11 +226,10 @@ impl<'a> Tokenizer<'a> { } Token::Whitespace(Whitespace::Tab) => self.col += 4, - Token::Identifier(s) => self.col += s.len() as u64, - Token::Keyword(s) => self.col += s.len() as u64, + Token::SQLWord(w) if w.quote_style == None => self.col += w.value.len() as u64, + Token::SQLWord(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, Token::Number(s) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, - Token::DoubleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -234,14 +270,19 @@ impl<'a> Tokenizer<'a> { } let upper_str = s.to_uppercase(); if self.is_keyword(upper_str.as_str()) { - Ok(Some(Token::Keyword(upper_str))) + Ok(Some(Token::SQLWord(SQLWord { + value: s, + quote_style: None, + keyword: upper_str, + }))) } else { - Ok(Some(Token::Identifier(s))) + Ok(Some(Token::make_word(&s, None))) } } // string '\'' => { //TODO: handle escaped quotes in string + //TODO: handle newlines in string //TODO: handle EOF before terminating quote let mut s = String::new(); chars.next(); // consume @@ -275,7 +316,7 @@ impl<'a> Tokenizer<'a> { } } } - Ok(Some(Token::DoubleQuotedString(s))) + Ok(Some(Token::make_word(&s, Some('"')))) } // numbers '0'...'9' => { @@ -389,7 +430,7 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Number(String::from("1")), ]; @@ -405,9 +446,9 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("sqrt")), + Token::make_word("sqrt", None), Token::LParen, Token::Number(String::from("1")), Token::RParen, @@ -424,23 +465,23 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("FROM")), + Token::make_keyword("FROM"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("customer")), + Token::make_word("customer", None), Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("WHERE")), + Token::make_keyword("WHERE"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("id")), + Token::make_word("id", None), Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), Token::Number(String::from("1")), Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("LIMIT")), + Token::make_keyword("LIMIT"), Token::Whitespace(Whitespace::Space), Token::Number(String::from("5")), ]; @@ -456,17 +497,17 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("FROM")), + Token::make_keyword("FROM"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("customer")), + Token::make_word("customer", None), Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("WHERE")), + Token::make_keyword("WHERE"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("salary")), + Token::make_word("salary", None), Token::Whitespace(Whitespace::Space), Token::Neq, Token::Whitespace(Whitespace::Space), @@ -491,7 +532,7 @@ mod tests { Token::Char('ط'), Token::Char('ف'), Token::Char('ى'), - Token::Identifier("h".to_string()), + Token::make_word("h", None), ]; compare(expected, tokens); } @@ -507,20 +548,20 @@ mod tests { let expected = vec![ Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline), - Token::Keyword("SELECT".into()), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::Keyword("FROM".into()), + Token::make_keyword("FROM"), Token::Whitespace(Whitespace::Space), - Token::Keyword("TABLE".into()), + Token::make_keyword("table"), Token::Whitespace(Whitespace::Tab), Token::Char('م'), Token::Char('ص'), Token::Char('ط'), Token::Char('ف'), Token::Char('ى'), - Token::Identifier("h".to_string()), + Token::make_word("h", None), ]; compare(expected, tokens); } @@ -533,11 +574,11 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Identifier(String::from("a")), + Token::make_word("a", None), Token::Whitespace(Whitespace::Space), - Token::Keyword("IS".to_string()), + Token::make_keyword("IS"), Token::Whitespace(Whitespace::Space), - Token::Keyword("NULL".to_string()), + Token::make_keyword("NULL"), ]; compare(expected, tokens); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6b6598c6..55eb1271 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -13,20 +13,11 @@ fn test_prev_index() { let sql: &str = "SELECT version()"; let mut parser = parser(sql); assert_eq!(parser.prev_token(), None); - assert_eq!(parser.next_token(), Some(Token::Keyword("SELECT".into()))); - assert_eq!( - parser.next_token(), - Some(Token::Identifier("version".into())) - ); - assert_eq!( - parser.prev_token(), - Some(Token::Identifier("version".into())) - ); - assert_eq!( - parser.peek_token(), - Some(Token::Identifier("version".into())) - ); - assert_eq!(parser.prev_token(), Some(Token::Keyword("SELECT".into()))); + assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); + assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.prev_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.peek_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT"))); assert_eq!(parser.prev_token(), None); } From f87230553efda04a1b768063f6501af05b7cb40b Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 03:59:13 +0300 Subject: [PATCH 05/11] Remove dialect-specific keyword lists (2/8) Now populating SQLWord.keyword based on the list of globally supported keywords. --- src/dialect/ansi_sql.rs | 331 --------------------------------- src/dialect/generic_sql.rs | 14 -- src/dialect/keywords.rs | 364 ++++++++++++++++++++++++++++++++++++- src/dialect/mod.rs | 2 - src/dialect/postgresql.rs | 16 -- src/sqltokenizer.rs | 36 ++-- 6 files changed, 372 insertions(+), 391 deletions(-) diff --git a/src/dialect/ansi_sql.rs b/src/dialect/ansi_sql.rs index b91fdc6e..4026cf61 100644 --- a/src/dialect/ansi_sql.rs +++ b/src/dialect/ansi_sql.rs @@ -1,339 +1,8 @@ use dialect::Dialect; -use dialect::keywords::*; - pub struct AnsiSqlDialect {} impl Dialect for AnsiSqlDialect { - fn keywords(&self) -> Vec<&'static str> { - return vec![ - ABS, - ALL, - ALLOCATE, - ALTER, - AND, - ANY, - ARE, - ARRAY, - ARRAY_AGG, - ARRAY_MAX_CARDINALITY, - AS, - ASENSITIVE, - ASYMMETRIC, - AT, - ATOMIC, - AUTHORIZATION, - AVG, - BEGIN, - BEGIN_FRAME, - BEGIN_PARTITION, - BETWEEN, - BIGINT, - BINARY, - BLOB, - BOOLEAN, - BOTH, - BY, - CALL, - CALLED, - CARDINALITY, - CASCADED, - CASE, - CAST, - CEIL, - CEILING, - CHAR, - CHAR_LENGTH, - CHARACTER, - CHARACTER_LENGTH, - CHECK, - CLOB, - CLOSE, - COALESCE, - COLLATE, - COLLECT, - COLUMN, - COMMIT, - CONDITION, - CONNECT, - CONSTRAINT, - CONTAINS, - CONVERT, - CORR, - CORRESPONDING, - COUNT, - COVAR_POP, - COVAR_SAMP, - CREATE, - CROSS, - CUBE, - CUME_DIST, - CURRENT, - CURRENT_CATALOG, - CURRENT_DATE, - CURRENT_DEFAULT_TRANSFORM_GROUP, - CURRENT_PATH, - CURRENT_ROLE, - CURRENT_ROW, - CURRENT_SCHEMA, - CURRENT_TIME, - CURRENT_TIMESTAMP, - CURRENT_TRANSFORM_GROUP_FOR_TYPE, - CURRENT_USER, - CURSOR, - CYCLE, - DATE, - DAY, - DEALLOCATE, - DEC, - DECIMAL, - DECLARE, - DEFAULT, - DELETE, - DENSE_RANK, - DEREF, - DESCRIBE, - DETERMINISTIC, - DISCONNECT, - DISTINCT, - DOUBLE, - DROP, - DYNAMIC, - EACH, - ELEMENT, - ELSE, - END, - END_FRAME, - END_PARTITION, - END_EXEC, - EQUALS, - ESCAPE, - EVERY, - EXCEPT, - EXEC, - EXECUTE, - EXISTS, - EXP, - EXTERNAL, - EXTRACT, - FALSE, - FETCH, - FILTER, - FIRST_VALUE, - FLOAT, - FLOOR, - FOR, - FOREIGN, - FRAME_ROW, - FREE, - FROM, - FULL, - FUNCTION, - FUSION, - GET, - GLOBAL, - GRANT, - GROUP, - GROUPING, - GROUPS, - HAVING, - HOLD, - HOUR, - IDENTITY, - IN, - INDICATOR, - INNER, - INOUT, - INSENSITIVE, - INSERT, - INT, - INTEGER, - INTERSECT, - INTERSECTION, - INTERVAL, - INTO, - IS, - JOIN, - LAG, - LANGUAGE, - LARGE, - LAST_VALUE, - LATERAL, - LEAD, - LEADING, - LEFT, - LIKE, - LIKE_REGEX, - LN, - LOCAL, - LOCALTIME, - LOCALTIMESTAMP, - LOWER, - MATCH, - MAX, - MEMBER, - MERGE, - METHOD, - MIN, - MINUTE, - MOD, - MODIFIES, - MODULE, - MONTH, - MULTISET, - NATIONAL, - NATURAL, - NCHAR, - NCLOB, - NEW, - NO, - NONE, - NORMALIZE, - NOT, - NTH_VALUE, - NTILE, - NULL, - NULLIF, - NUMERIC, - OCTET_LENGTH, - OCCURRENCES_REGEX, - OF, - OFFSET, - OLD, - ON, - ONLY, - OPEN, - OR, - ORDER, - OUT, - OUTER, - OVER, - OVERLAPS, - OVERLAY, - PARAMETER, - PARTITION, - PERCENT, - PERCENT_RANK, - PERCENTILE_CONT, - PERCENTILE_DISC, - PERIOD, - PORTION, - POSITION, - POSITION_REGEX, - POWER, - PRECEDES, - PRECISION, - PREPARE, - PRIMARY, - PROCEDURE, - RANGE, - RANK, - READS, - REAL, - RECURSIVE, - REF, - REFERENCES, - REFERENCING, - REGR_AVGX, - REGR_AVGY, - REGR_COUNT, - REGR_INTERCEPT, - REGR_R2, - REGR_SLOPE, - REGR_SXX, - REGR_SXY, - REGR_SYY, - RELEASE, - RESULT, - RETURN, - RETURNS, - REVOKE, - RIGHT, - ROLLBACK, - ROLLUP, - ROW, - ROW_NUMBER, - ROWS, - SAVEPOINT, - SCOPE, - SCROLL, - SEARCH, - SECOND, - SELECT, - SENSITIVE, - SESSION_USER, - SET, - SIMILAR, - SMALLINT, - SOME, - SPECIFIC, - SPECIFICTYPE, - SQL, - SQLEXCEPTION, - SQLSTATE, - SQLWARNING, - SQRT, - START, - STATIC, - STDDEV_POP, - STDDEV_SAMP, - SUBMULTISET, - SUBSTRING, - SUBSTRING_REGEX, - SUCCEEDS, - SUM, - SYMMETRIC, - SYSTEM, - SYSTEM_TIME, - SYSTEM_USER, - TABLE, - TABLESAMPLE, - THEN, - TIME, - TIMESTAMP, - TIMEZONE_HOUR, - TIMEZONE_MINUTE, - TO, - TRAILING, - TRANSLATE, - TRANSLATE_REGEX, - TRANSLATION, - TREAT, - TRIGGER, - TRUNCATE, - TRIM, - TRIM_ARRAY, - TRUE, - UESCAPE, - UNION, - UNIQUE, - UNKNOWN, - UNNEST, - UPDATE, - UPPER, - USER, - USING, - VALUE, - VALUES, - VALUE_OF, - VAR_POP, - VAR_SAMP, - VARBINARY, - VARCHAR, - VARYING, - VERSIONING, - WHEN, - WHENEVER, - WHERE, - WIDTH_BUCKET, - WINDOW, - WITH, - WITHIN, - WITHOUT, - YEAR, - ]; - } - fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } diff --git a/src/dialect/generic_sql.rs b/src/dialect/generic_sql.rs index 0a546d38..54275d69 100644 --- a/src/dialect/generic_sql.rs +++ b/src/dialect/generic_sql.rs @@ -1,21 +1,7 @@ use dialect::Dialect; - -use dialect::keywords::*; pub struct GenericSqlDialect {} impl Dialect for GenericSqlDialect { - fn keywords(&self) -> Vec<&'static str> { - return vec![ - SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, INTO, UPDATE, - DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, NOT, AS, - STORED, CSV, PARQUET, LOCATION, WITH, WITHOUT, HEADER, ROW, // SQL types - CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, - REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, - BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, - CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, CAST, - ]; - } - fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index e4683724..42e4c6a5 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -1,12 +1,21 @@ -/// make a listing of keywords -/// with static str and their stringified value +///! This module defines a list of constants for every keyword that +/// can appear in SQLWord::keyword: +/// pub const KEYWORD = "KEYWORD" +/// and an `ALL_KEYWORDS` array with every keyword in it. +/// +/// This is not a list of *reserved* keywords: some of these can be +/// parsed as identifiers if the parser decides so. This means that +/// new keywords can be added here without affecting the parse result. +/// +/// As a matter of fact, most of these keywords are not used at all +/// and could be removed. + macro_rules! keyword { ($($ident:ident),*) => { - $(pub static $ident: &'static str = stringify!($ident);)* + $(pub const $ident: &'static str = stringify!($ident);)* } } -/// enumerate all the keywords here for all dialects to support in this project keyword!( ABS, ADD, @@ -352,4 +361,349 @@ keyword!( ); /// special case of keyword where the it is an invalid identifier -pub static END_EXEC: &'static str = "END-EXEC"; +pub const END_EXEC: &'static str = "END-EXEC"; + +pub const ALL_KEYWORDS: &'static [&'static str] = &[ + ABS, + ADD, + ASC, + ALL, + ALLOCATE, + ALTER, + AND, + ANY, + ARE, + ARRAY, + ARRAY_AGG, + ARRAY_MAX_CARDINALITY, + AS, + ASENSITIVE, + ASYMMETRIC, + AT, + ATOMIC, + AUTHORIZATION, + AVG, + BEGIN, + BEGIN_FRAME, + BEGIN_PARTITION, + BETWEEN, + BIGINT, + BINARY, + BLOB, + BOOLEAN, + BOTH, + BY, + BYTEA, + CALL, + CALLED, + CARDINALITY, + CASCADED, + CASE, + CAST, + CEIL, + CEILING, + CHAR, + CHAR_LENGTH, + CHARACTER, + CHARACTER_LENGTH, + CHECK, + CLOB, + CLOSE, + COALESCE, + COLLATE, + COLLECT, + COLUMN, + COMMIT, + CONDITION, + CONNECT, + CONSTRAINT, + CONTAINS, + CONVERT, + COPY, + CORR, + CORRESPONDING, + COUNT, + COVAR_POP, + COVAR_SAMP, + CREATE, + CROSS, + CSV, + CUBE, + CUME_DIST, + CURRENT, + CURRENT_CATALOG, + CURRENT_DATE, + CURRENT_DEFAULT_TRANSFORM_GROUP, + CURRENT_PATH, + CURRENT_ROLE, + CURRENT_ROW, + CURRENT_SCHEMA, + CURRENT_TIME, + CURRENT_TIMESTAMP, + CURRENT_TRANSFORM_GROUP_FOR_TYPE, + CURRENT_USER, + CURSOR, + CYCLE, + DATE, + DAY, + DEALLOCATE, + DEC, + DECIMAL, + DECLARE, + DEFAULT, + DELETE, + DENSE_RANK, + DEREF, + DESC, + DESCRIBE, + DETERMINISTIC, + DISCONNECT, + DISTINCT, + DOUBLE, + DROP, + DYNAMIC, + EACH, + ELEMENT, + ELSE, + END, + END_FRAME, + END_PARTITION, + EQUALS, + ESCAPE, + EVERY, + EXCEPT, + EXEC, + EXECUTE, + EXISTS, + EXP, + EXTERNAL, + EXTRACT, + FALSE, + FETCH, + FILTER, + FIRST_VALUE, + FLOAT, + FLOOR, + FOR, + FOREIGN, + FRAME_ROW, + FREE, + FROM, + FULL, + FUNCTION, + FUSION, + GET, + GLOBAL, + GRANT, + GROUP, + GROUPING, + GROUPS, + HAVING, + HEADER, + HOLD, + HOUR, + IDENTITY, + IN, + INDICATOR, + INNER, + INOUT, + INSENSITIVE, + INSERT, + INT, + INTEGER, + INTERSECT, + INTERSECTION, + INTERVAL, + INTO, + IS, + JOIN, + KEY, + LAG, + LANGUAGE, + LARGE, + LAST_VALUE, + LATERAL, + LEAD, + LEADING, + LEFT, + LIKE, + LIKE_REGEX, + LIMIT, + LN, + LOCAL, + LOCALTIME, + LOCALTIMESTAMP, + LOCATION, + LOWER, + MATCH, + MAX, + MEMBER, + MERGE, + METHOD, + MIN, + MINUTE, + MOD, + MODIFIES, + MODULE, + MONTH, + MULTISET, + NATIONAL, + NATURAL, + NCHAR, + NCLOB, + NEW, + NO, + NONE, + NORMALIZE, + NOT, + NTH_VALUE, + NTILE, + NULL, + NULLIF, + NUMERIC, + OBJECT, + OCTET_LENGTH, + OCCURRENCES_REGEX, + OF, + OFFSET, + OLD, + ON, + ONLY, + OPEN, + OR, + ORDER, + OUT, + OUTER, + OVER, + OVERLAPS, + OVERLAY, + PARAMETER, + PARTITION, + PARQUET, + PERCENT, + PERCENT_RANK, + PERCENTILE_CONT, + PERCENTILE_DISC, + PERIOD, + PORTION, + POSITION, + POSITION_REGEX, + POWER, + PRECEDES, + PRECISION, + PREPARE, + PRIMARY, + PROCEDURE, + RANGE, + RANK, + READS, + REAL, + RECURSIVE, + REF, + REFERENCES, + REFERENCING, + REGCLASS, + REGR_AVGX, + REGR_AVGY, + REGR_COUNT, + REGR_INTERCEPT, + REGR_R2, + REGR_SLOPE, + REGR_SXX, + REGR_SXY, + REGR_SYY, + RELEASE, + RESULT, + RETURN, + RETURNS, + REVOKE, + RIGHT, + ROLLBACK, + ROLLUP, + ROW, + ROW_NUMBER, + ROWS, + SAVEPOINT, + SCOPE, + SCROLL, + SEARCH, + SECOND, + SELECT, + SENSITIVE, + SESSION_USER, + SET, + SIMILAR, + SMALLINT, + SOME, + SPECIFIC, + SPECIFICTYPE, + SQL, + SQLEXCEPTION, + SQLSTATE, + SQLWARNING, + SQRT, + START, + STATIC, + STDDEV_POP, + STDDEV_SAMP, + STDIN, + STORED, + SUBMULTISET, + SUBSTRING, + SUBSTRING_REGEX, + SUCCEEDS, + SUM, + SYMMETRIC, + SYSTEM, + SYSTEM_TIME, + SYSTEM_USER, + TABLE, + TABLESAMPLE, + TEXT, + THEN, + TIME, + TIMESTAMP, + TIMEZONE_HOUR, + TIMEZONE_MINUTE, + TO, + TRAILING, + TRANSLATE, + TRANSLATE_REGEX, + TRANSLATION, + TREAT, + TRIGGER, + TRUNCATE, + TRIM, + TRIM_ARRAY, + TRUE, + UESCAPE, + UNION, + UNIQUE, + UNKNOWN, + UNNEST, + UPDATE, + UPPER, + USER, + USING, + UUID, + VALUE, + VALUES, + VALUE_OF, + VAR_POP, + VAR_SAMP, + VARBINARY, + VARCHAR, + VARYING, + VERSIONING, + WHEN, + WHENEVER, + WHERE, + WIDTH_BUCKET, + WINDOW, + WITH, + WITHIN, + WITHOUT, + YEAR, + ZONE, + END_EXEC, +]; diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1a704f00..3298a1de 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -8,8 +8,6 @@ pub use self::generic_sql::GenericSqlDialect; pub use self::postgresql::PostgreSqlDialect; pub trait Dialect { - /// Get a list of keywords for this dialect - fn keywords(&self) -> Vec<&'static str>; /// Determine if a character is a valid identifier start character fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid identifier character diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 5535fb14..2b64c1f0 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -1,24 +1,8 @@ use dialect::Dialect; -use dialect::keywords::*; - pub struct PostgreSqlDialect {} impl Dialect for PostgreSqlDialect { - fn keywords(&self) -> Vec<&'static str> { - return vec![ - ALTER, ONLY, SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, - INTO, UPDATE, DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, - NOT, AS, STORED, CSV, WITH, WITHOUT, ROW, // SQL types - CHAR, CHARACTER, VARYING, LARGE, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL, - DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN, - DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY, - STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN, - THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, - LIKE, CAST, - ]; - } - fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' } diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index d55daa26..0095e505 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -21,6 +21,7 @@ use std::iter::Peekable; use std::str::Chars; +use super::dialect::keywords::ALL_KEYWORDS; use super::dialect::Dialect; /// SQL Token enumeration @@ -124,17 +125,22 @@ impl ToString for Token { impl Token { pub fn make_keyword(keyword: &str) -> Self { - Token::SQLWord(SQLWord { - value: keyword.to_string(), - quote_style: None, - keyword: keyword.to_uppercase().to_string(), - }) + Token::make_word(keyword, None) } pub fn make_word(word: &str, quote_style: Option) -> Self { + let word_uppercase = word.to_uppercase(); + //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is + // not fast but I want the simplicity for now while I experiment with pluggable + // dialects + let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str()); Token::SQLWord(SQLWord { value: word.to_string(), quote_style: quote_style, - keyword: "".to_string(), + keyword: if is_keyword { + word_uppercase.to_string() + } else { + "".to_string() + }, }) } } @@ -205,13 +211,6 @@ impl<'a> Tokenizer<'a> { } } - fn is_keyword(&self, s: &str) -> bool { - //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is - // not fast but I want the simplicity for now while I experiment with pluggable - // dialects - return self.dialect.keywords().contains(&s); - } - /// Tokenize the statement and produce a vector of tokens pub fn tokenize(&mut self) -> Result, TokenizerError> { let mut peekable = self.query.chars().peekable(); @@ -268,16 +267,7 @@ impl<'a> Tokenizer<'a> { break; } } - let upper_str = s.to_uppercase(); - if self.is_keyword(upper_str.as_str()) { - Ok(Some(Token::SQLWord(SQLWord { - value: s, - quote_style: None, - keyword: upper_str, - }))) - } else { - Ok(Some(Token::make_word(&s, None))) - } + Ok(Some(Token::make_word(&s, None))) } // string '\'' => { From 8c3479969f4b67f8aaceade56c4fb23d5b908c91 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 14 Jan 2019 00:42:07 +0300 Subject: [PATCH 06/11] Simplify by using expect_keyword / expect_token (3/8) ...instead of parse_keyword / consume_token - to reduce nesting of `if`s. (Follow-up to PR #35) --- src/sqlparser.rs | 113 ++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 64 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 35af605a..613690da 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -589,19 +589,16 @@ impl Parser { } else if is_unique_key { Ok(TableKey::UniqueKey(key)) } else if is_foreign_key { - if self.parse_keyword("REFERENCES") { - let foreign_table = self.parse_tablename()?; - self.expect_token(&Token::LParen)?; - let referred_columns = self.parse_column_names()?; - self.expect_token(&Token::RParen)?; - Ok(TableKey::ForeignKey { - key, - foreign_table, - referred_columns, - }) - } else { - parser_err!("Expecting references") - } + self.expect_keyword("REFERENCES")?; + let foreign_table = self.parse_tablename()?; + self.expect_token(&Token::LParen)?; + let referred_columns = self.parse_column_names()?; + self.expect_token(&Token::RParen)?; + Ok(TableKey::ForeignKey { + key, + foreign_table, + referred_columns, + }) } else { parser_err!(format!( "Expecting primary key, unique key, or foreign key, found: {:?}", @@ -611,39 +608,33 @@ impl Parser { } pub fn parse_alter(&mut self) -> Result { - if self.parse_keyword("TABLE") { - let _ = self.parse_keyword("ONLY"); - let table_name = self.parse_tablename()?; - let operation: Result = - if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { - match self.next_token() { - Some(Token::SQLWord(ref id)) => { - let table_key = self.parse_table_key(&id.value)?; - Ok(AlterOperation::AddConstraint(table_key)) - } - _ => { - return parser_err!(format!( - "Expecting identifier, found : {:?}", - self.peek_token() - )); - } + self.expect_keyword("TABLE")?; + let _ = self.parse_keyword("ONLY"); + let table_name = self.parse_tablename()?; + let operation: Result = + if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { + match self.next_token() { + Some(Token::SQLWord(ref id)) => { + let table_key = self.parse_table_key(&id.value)?; + Ok(AlterOperation::AddConstraint(table_key)) } - } else { - return parser_err!(format!( - "Expecting ADD CONSTRAINT, found :{:?}", - self.peek_token() - )); - }; - Ok(ASTNode::SQLAlterTable { - name: table_name, - operation: operation?, - }) - } else { - parser_err!(format!( - "Expecting TABLE after ALTER, found {:?}", - self.peek_token() - )) - } + _ => { + return parser_err!(format!( + "Expecting identifier, found : {:?}", + self.peek_token() + )); + } + } + } else { + return parser_err!(format!( + "Expecting ADD CONSTRAINT, found :{:?}", + self.peek_token() + )); + }; + Ok(ASTNode::SQLAlterTable { + name: table_name, + operation: operation?, + }) } /// Parse a copy statement @@ -1142,26 +1133,20 @@ impl Parser { let constraint = self.parse_expr(0)?; Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { - if self.consume_token(&Token::LParen) { - let attributes = self - .parse_expr_list()? - .into_iter() - .map(|ast_node| match ast_node { - ASTNode::SQLIdentifier(ident) => Ok(ident), - unexpected => { - parser_err!(format!("Expected identifier, found {:?}", unexpected)) - } - }) - .collect::, ParserError>>()?; + self.expect_token(&Token::LParen)?; + let attributes = self + .parse_expr_list()? + .into_iter() + .map(|ast_node| match ast_node { + ASTNode::SQLIdentifier(ident) => Ok(ident), + unexpected => { + parser_err!(format!("Expected identifier, found {:?}", unexpected)) + } + }) + .collect::, ParserError>>()?; - if self.consume_token(&Token::RParen) { - Ok(JoinConstraint::Using(attributes)) - } else { - parser_err!(format!("Expected token ')', found {:?}", self.peek_token())) - } - } else { - parser_err!(format!("Expected token '(', found {:?}", self.peek_token())) - } + self.expect_token(&Token::RParen)?; + Ok(JoinConstraint::Using(attributes)) } else { parser_err!(format!( "Unexpected token after JOIN: {:?}", From 991fd19b87df60d433fd903c3e13a31418426b09 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 20:36:29 +0300 Subject: [PATCH 07/11] Stop nesting `match` in parse_compound_identifier (4/8) --- src/sqlparser.rs | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 613690da..ad5a1c97 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -951,30 +951,24 @@ impl Parser { loop { let token = &self.next_token(); match token { - Some(token) => match token { - Token::SQLWord(s) => { - if expect_identifier { - expect_identifier = false; - idents.push(s.to_string()); - } else { - self.prev_token(); - break; - } - } - token if token == separator => { - if expect_identifier { - return parser_err!(format!("Expecting identifier, found {:?}", token)); - } else { - expect_identifier = true; - continue; - } - } - _ => { + Some(Token::SQLWord(s)) => { + if expect_identifier { + expect_identifier = false; + idents.push(s.to_string()); + } else { self.prev_token(); break; } - }, - None => { + } + Some(token) if token == separator => { + if expect_identifier { + return parser_err!(format!("Expecting identifier, found {:?}", token)); + } else { + expect_identifier = true; + continue; + } + } + _ => { self.prev_token(); break; } From 7bbf69f51390f2738af2ed6207f390ceddf91b64 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 20:54:11 +0300 Subject: [PATCH 08/11] Further simplify parse_compound_identifier (5/8) This part changes behavior: - Fail when no identifier is found. - Avoid rewinding if EOF was hit right after the identifier. --- src/sqlparser.rs | 35 ++++++++++++++++++----------------- tests/sqlparser_postgres.rs | 7 +++++++ 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index ad5a1c97..cd68c81c 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -945,36 +945,37 @@ impl Parser { } } + /// Parse one or more identifiers with the specified separator between them pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result { let mut idents = vec![]; let mut expect_identifier = true; loop { let token = &self.next_token(); match token { - Some(Token::SQLWord(s)) => { - if expect_identifier { - expect_identifier = false; - idents.push(s.to_string()); - } else { - self.prev_token(); - break; - } + Some(Token::SQLWord(s)) if expect_identifier => { + expect_identifier = false; + idents.push(s.to_string()); } - Some(token) if token == separator => { - if expect_identifier { - return parser_err!(format!("Expecting identifier, found {:?}", token)); - } else { - expect_identifier = true; - continue; - } + Some(token) if token == separator && !expect_identifier => { + expect_identifier = true; + continue; } _ => { - self.prev_token(); + if token.is_some() { + self.prev_token(); + } break; } } } - Ok(ASTNode::SQLCompoundIdentifier(idents)) + if expect_identifier { + parser_err!(format!( + "Expecting identifier, found {:?}", + self.peek_token() + )) + } else { + Ok(ASTNode::SQLCompoundIdentifier(idents)) + } } pub fn parse_tablename(&mut self) -> Result { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 55eb1271..11b8cb5f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -103,6 +103,13 @@ fn parse_invalid_table_name() { assert!(ast.is_err()); } +#[test] +fn parse_no_table_name() { + let mut parser = parser(""); + let ast = parser.parse_tablename(); + assert!(ast.is_err()); +} + #[test] fn parse_insert_with_columns() { let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)"); From 536fa6e428d733071c352fbd105388834197385a Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 21 Jan 2019 01:28:41 +0300 Subject: [PATCH 09/11] Support `AS` table aliases (6/8) A "table factor" (name borrowed from the ANSI SQL grammar) is a table name or a derived table (subquery), followed by an optional `AS` and an optional alias. (The alias is *not* optional for subqueries, but we don't enforce that.) It can appear in the FROM/JOIN part of the query. This commit: - introduces ASTNode::TableFactor - changes the parser to populate SQLSelect::relation and Join::relation with ASTNode::TableFactor instead of the table name - changes the parser to only accept subqueries or identifiers, not arbitrary expressions in the "table factor" context - always parses the table name as SQLCompoundIdentifier (whether or not it was actually compound). --- src/sqlast/mod.rs | 16 +++++++++-- src/sqlparser.rs | 59 ++++++++++++++++++++++++++++++++------ tests/sqlparser_generic.rs | 59 +++++++++++++++++++++++++++++--------- 3 files changed, 111 insertions(+), 23 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 5c5d8c39..dbf4b9d4 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -72,12 +72,17 @@ pub enum ASTNode { results: Vec, else_result: Option>, }, + /// A table name or a parenthesized subquery with an optional alias + TableFactor { + relation: Box, // SQLNested or SQLCompoundIdentifier + alias: Option, + }, /// SELECT SQLSelect { /// projection expressions projection: Vec, /// FROM - relation: Option>, + relation: Option>, // TableFactor // JOIN joins: Vec, /// WHERE @@ -191,6 +196,13 @@ impl ToString for ASTNode { } s + " END" } + ASTNode::TableFactor { relation, alias } => { + if let Some(alias) = alias { + format!("{} AS {}", relation.to_string(), alias) + } else { + relation.to_string() + } + } ASTNode::SQLSelect { projection, relation, @@ -420,7 +432,7 @@ impl ToString for SQLColumnDef { #[derive(Debug, Clone, PartialEq)] pub struct Join { - pub relation: ASTNode, + pub relation: ASTNode, // TableFactor pub join_operator: JoinOperator, } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index cd68c81c..3d0451b0 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -945,6 +945,34 @@ impl Parser { } } + /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) + /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, + /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` + pub fn parse_optional_alias( + &mut self, + ) -> Result, ParserError> { + let after_as = self.parse_keyword("AS"); + let maybe_alias = self.next_token(); + match maybe_alias { + // Accept any identifier after `AS` (though many dialects have restrictions on + // keywords that may appear here). + Some(Token::SQLWord(ref w)) if after_as => + { + // have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default + Ok(Some(w.value.clone())) + } + ref not_an_ident if after_as => parser_err!(format!( + "Expected an identifier after AS, got {:?}", + not_an_ident + )), + Some(_not_an_ident) => { + self.prev_token(); + Ok(None) // no alias found + } + None => Ok(None), + } + } + /// Parse one or more identifiers with the specified separator between them pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result { let mut idents = vec![]; @@ -1062,7 +1090,7 @@ impl Parser { let projection = self.parse_expr_list()?; let (relation, joins): (Option>, Vec) = if self.parse_keyword("FROM") { - let relation = Some(Box::new(self.parse_expr(0)?)); + let relation = Some(Box::new(self.parse_table_factor()?)); let joins = self.parse_joins()?; (relation, joins) } else { @@ -1121,6 +1149,21 @@ impl Parser { } } + /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + pub fn parse_table_factor(&mut self) -> Result { + let relation = if self.consume_token(&Token::LParen) { + self.prev_token(); + self.parse_expr(0)? + } else { + self.parse_compound_identifier(&Token::Period)? + }; + let alias = self.parse_optional_alias()?; + Ok(ASTNode::TableFactor { + relation: Box::new(relation), + alias, + }) + } + fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) @@ -1156,7 +1199,7 @@ impl Parser { let natural = match &self.peek_token() { Some(Token::Comma) => { self.next_token(); - let relation = self.parse_expr(0)?; + let relation = self.parse_table_factor()?; let join = Join { relation, join_operator: JoinOperator::Implicit, @@ -1167,7 +1210,7 @@ impl Parser { Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => { self.next_token(); self.expect_keyword("JOIN")?; - let relation = self.parse_expr(0)?; + let relation = self.parse_table_factor()?; let join = Join { relation, join_operator: JoinOperator::Cross, @@ -1188,14 +1231,14 @@ impl Parser { self.next_token(); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => { self.next_token(); Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } @@ -1204,7 +1247,7 @@ impl Parser { let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::LeftOuter( self.parse_join_constraint(natural)?, ), @@ -1215,7 +1258,7 @@ impl Parser { let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::RightOuter( self.parse_join_constraint(natural)?, ), @@ -1226,7 +1269,7 @@ impl Parser { let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::FullOuter( self.parse_join_constraint(natural)?, ), diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 04ba8754..d850089a 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -539,7 +539,10 @@ fn parse_implicit_join() { assert_eq!( joins[0], Join { - relation: ASTNode::SQLIdentifier("t2".to_string()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), + alias: None, + }, join_operator: JoinOperator::Implicit } ) @@ -558,7 +561,10 @@ fn parse_cross_join() { assert_eq!( joins[0], Join { - relation: ASTNode::SQLIdentifier("t2".to_string()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), + alias: None, + }, join_operator: JoinOperator::Cross } ) @@ -571,10 +577,14 @@ fn parse_cross_join() { fn parse_joins_on() { fn join_with_constraint( relation: impl Into, + alias: Option, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::SQLIdentifier(relation.into()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), + alias, + }, join_operator: f(JoinConstraint::On(ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("c1".into())), op: SQLOperator::Eq, @@ -582,21 +592,31 @@ fn parse_joins_on() { })), } } + // Test parsing of aliases + assert_eq!( + joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")), + vec![join_with_constraint( + "t2", + Some("foo".to_string()), + JoinOperator::Inner + )] + ); + // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::Inner)] + vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::LeftOuter)] + vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::RightOuter)] + vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::FullOuter)] + vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } @@ -604,29 +624,42 @@ fn parse_joins_on() { fn parse_joins_using() { fn join_with_constraint( relation: impl Into, + alias: Option, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::SQLIdentifier(relation.into()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), + alias, + }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } } - + // Test parsing of aliases + assert_eq!( + joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")), + vec![join_with_constraint( + "t2", + Some("foo".to_string()), + JoinOperator::Inner + )] + ); + // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::Inner)] + vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::LeftOuter)] + vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::RightOuter)] + vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 FULL JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::FullOuter)] + vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } From 76ec175d20678771ee69e22b46b530d4b73217af Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 21 Jan 2019 01:28:59 +0300 Subject: [PATCH 10/11] Support table aliases without `AS` (7/8) ...as in `FROM foo bar WHERE bar.x > 1`. To avoid ambiguity as to whether a token is an alias or a keyword, we maintain a blacklist of keywords, that can follow a "table factor", to prevent parsing them as an alias. This "context-specific reserved keyword" approach lets us accept more SQL that's valid in some dialects, than a list of globally reserved keywords. Also some dialects (e.g. Oracle) apparently don't reserve some keywords (like JOIN), while presumably they won't accept them as an alias (`FROM foo JOIN` meaning `FROM foo AS JOIN`). --- src/dialect/keywords.rs | 27 +++++++++++++++++++-------- src/sqlparser.rs | 12 +++++++++--- tests/sqlparser_generic.rs | 8 ++++++++ 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 42e4c6a5..7c7eb494 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -1,14 +1,16 @@ -///! This module defines a list of constants for every keyword that +///! This module defines +/// 1) a list of constants for every keyword that /// can appear in SQLWord::keyword: /// pub const KEYWORD = "KEYWORD" -/// and an `ALL_KEYWORDS` array with every keyword in it. +/// 2) an `ALL_KEYWORDS` array with every keyword in it +/// This is not a list of *reserved* keywords: some of these can be +/// parsed as identifiers if the parser decides so. This means that +/// new keywords can be added here without affecting the parse result. /// -/// This is not a list of *reserved* keywords: some of these can be -/// parsed as identifiers if the parser decides so. This means that -/// new keywords can be added here without affecting the parse result. -/// -/// As a matter of fact, most of these keywords are not used at all -/// and could be removed. +/// As a matter of fact, most of these keywords are not used at all +/// and could be removed. +/// 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a +/// "table alias" context. macro_rules! keyword { ($($ident:ident),*) => { @@ -707,3 +709,12 @@ pub const ALL_KEYWORDS: &'static [&'static str] = &[ ZONE, END_EXEC, ]; + +/// These keywords can't be used as a table alias, so that `FROM table_name alias` +/// can be parsed unambiguously without looking ahead. +pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ + WHERE, GROUP, ON, // keyword is 'reserved' in most dialects + JOIN, INNER, CROSS, FULL, LEFT, RIGHT, // not reserved in Oracle + NATURAL, USING, // not reserved in Oracle & MSSQL + // UNION, EXCEPT, INTERSECT, ORDER // TODO add these with tests. +]; diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 3d0451b0..33b950de 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -14,6 +14,7 @@ //! SQL Parser +use super::dialect::keywords; use super::dialect::Dialect; use super::sqlast::*; use super::sqltokenizer::*; @@ -950,13 +951,18 @@ impl Parser { /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` pub fn parse_optional_alias( &mut self, + reserved_kwds: &[&str], ) -> Result, ParserError> { let after_as = self.parse_keyword("AS"); let maybe_alias = self.next_token(); match maybe_alias { // Accept any identifier after `AS` (though many dialects have restrictions on - // keywords that may appear here). - Some(Token::SQLWord(ref w)) if after_as => + // keywords that may appear here). If there's no `AS`: don't parse keywords, + // which may start a construct allowed in this position, to be parsed as aliases. + // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, + // not an alias.) + Some(Token::SQLWord(ref w)) + if after_as || !reserved_kwds.contains(&w.keyword.as_str()) => { // have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default Ok(Some(w.value.clone())) @@ -1157,7 +1163,7 @@ impl Parser { } else { self.parse_compound_identifier(&Token::Period)? }; - let alias = self.parse_optional_alias()?; + let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(ASTNode::TableFactor { relation: Box::new(relation), alias, diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index d850089a..ec4e26f5 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -601,6 +601,10 @@ fn parse_joins_on() { JoinOperator::Inner )] ); + parses_to( + "SELECT * FROM t1 JOIN t2 foo ON c1 = c2", + "SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2", + ); // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 ON c1 = c2")), @@ -644,6 +648,10 @@ fn parse_joins_using() { JoinOperator::Inner )] ); + parses_to( + "SELECT * FROM t1 JOIN t2 foo USING(c1)", + "SELECT * FROM t1 JOIN t2 AS foo USING(c1)", + ); // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 USING(c1)")), From 50b5724c39ad922814091caa84e921dfc948f018 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 21 Jan 2019 00:57:06 +0300 Subject: [PATCH 11/11] Don't parse ORDER BY as a table alias (8/8) --- src/dialect/keywords.rs | 2 +- tests/sqlparser_generic.rs | 48 ++++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 7c7eb494..1a39fe44 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -716,5 +716,5 @@ pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ WHERE, GROUP, ON, // keyword is 'reserved' in most dialects JOIN, INNER, CROSS, FULL, LEFT, RIGHT, // not reserved in Oracle NATURAL, USING, // not reserved in Oracle & MSSQL - // UNION, EXCEPT, INTERSECT, ORDER // TODO add these with tests. + ORDER, // UNION, EXCEPT, INTERSECT, // TODO add these with tests. ]; diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index ec4e26f5..c57e1d53 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -229,31 +229,33 @@ fn parse_not_like() { #[test] fn parse_select_order_by() { - let sql = String::from( - "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id", - ); - match verified(&sql) { - ASTNode::SQLSelect { order_by, .. } => { - assert_eq!( - Some(vec![ - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: Some(true), - }, - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: Some(false), - }, - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), - asc: None, - }, - ]), - order_by - ); + fn chk(sql: &str) { + match verified(&sql) { + ASTNode::SQLSelect { order_by, .. } => { + assert_eq!( + Some(vec![ + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), + asc: Some(true), + }, + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), + asc: Some(false), + }, + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), + asc: None, + }, + ]), + order_by + ); + } + _ => assert!(false), } - _ => assert!(false), } + chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); + // make sure ORDER is not treated as an alias + chk("SELECT id, fname, lname FROM customer ORDER BY lname ASC, fname DESC, id"); } #[test]