From dce09f80542f0d0784af2f61240391fa0fa2c773 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 00:47:40 +0300 Subject: [PATCH 1/6] Fix a mistake in merge conflict resolution earlier --- src/dialect/generic_sql.rs | 3 +-- src/dialect/postgresql.rs | 3 ++- src/sqlparser.rs | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/dialect/generic_sql.rs b/src/dialect/generic_sql.rs index 3788a749..0f18b723 100644 --- a/src/dialect/generic_sql.rs +++ b/src/dialect/generic_sql.rs @@ -12,8 +12,7 @@ impl Dialect for GenericSqlDialect { CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, - CROSS, OUTER, INNER, NATURAL, ON, USING, - BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, LIKE, + CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, ]; } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 0c5f4fd6..66cb51c1 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -14,7 +14,8 @@ impl Dialect for PostgreSqlDialect { DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN, DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY, STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN, - THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE + THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, + LIKE, ]; } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 67e0d790..6516a4a2 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -90,7 +90,6 @@ impl Parser { let mut expr = self.parse_prefix()?; debug!("prefix: {:?}", expr); loop { - // stop parsing on `NULL` | `NOT NULL` match self.peek_token() { Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break, From 89cfa9e5997ae017ae2fd5a1b8fe560ab22e18bd Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 12 Jan 2019 20:43:51 +0300 Subject: [PATCH 2/6] Don't consume a second DoubleColon in parse_pg_cast The function is invoked after a DoubleColon was already matched. --- src/sqlparser.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 6516a4a2..7c7d5f68 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -246,7 +246,6 @@ impl Parser { /// Parse a postgresql casting style which is in the form of `expr::datatype` pub fn parse_pg_cast(&mut self, expr: ASTNode) -> Result { - let _ = self.consume_token(&Token::DoubleColon)?; Ok(ASTNode::SQLCast { expr: Box::new(expr), data_type: self.parse_data_type()?, From de4ccd3cb7e89b533ccaae6038590188a44e7e0b Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 12 Jan 2019 19:59:29 +0300 Subject: [PATCH 3/6] Fail when expected keyword is not found Add #[must_use] to warn against unchecked results of parse_keyword/s in the future. --- src/sqlparser.rs | 17 ++++++++++------- tests/sqlparser_postgres.rs | 10 ++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 7c7d5f68..988d4318 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -447,6 +447,7 @@ impl Parser { } /// Look for an expected keyword and consume it if it exists + #[must_use] pub fn parse_keyword(&mut self, expected: &'static str) -> bool { match self.peek_token() { Some(Token::Keyword(k)) => { @@ -462,6 +463,7 @@ impl Parser { } /// Look for an expected sequence of keywords and consume them if they exist + #[must_use] pub fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool { let index = self.index; for keyword in keywords { @@ -475,6 +477,7 @@ impl Parser { true } + /// Bail out if the current token is not an expected keyword, or consume it if it is pub fn expect_keyword(&mut self, expected: &'static str) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) @@ -667,8 +670,8 @@ impl Parser { } else { vec![] }; - self.parse_keyword("FROM"); - self.parse_keyword("STDIN"); + self.expect_keyword("FROM")?; + self.expect_keyword("STDIN")?; self.consume_token(&Token::SemiColon)?; let values = self.parse_tsv()?; Ok(ASTNode::SQLCopy { @@ -1230,7 +1233,7 @@ impl Parser { } Some(Token::Keyword(kw)) if kw == "LEFT" => { self.next_token(); - self.parse_keyword("OUTER"); + let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { relation: self.parse_expr(0)?, @@ -1241,7 +1244,7 @@ impl Parser { } Some(Token::Keyword(kw)) if kw == "RIGHT" => { self.next_token(); - self.parse_keyword("OUTER"); + let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { relation: self.parse_expr(0)?, @@ -1252,7 +1255,7 @@ impl Parser { } Some(Token::Keyword(kw)) if kw == "FULL" => { self.next_token(); - self.parse_keyword("OUTER"); + let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { relation: self.parse_expr(0)?, @@ -1271,7 +1274,7 @@ impl Parser { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { - self.parse_keyword("INTO"); + self.expect_keyword("INTO")?; let table_name = self.parse_tablename()?; let columns = if self.consume_token(&Token::LParen)? { let column_names = self.parse_column_names()?; @@ -1280,7 +1283,7 @@ impl Parser { } else { vec![] }; - self.parse_keyword("VALUES"); + self.expect_keyword("VALUES")?; self.consume_token(&Token::LParen)?; let values = self.parse_expr_list()?; self.consume_token(&Token::RParen)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index e3412b2c..5c354a3b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -221,6 +221,16 @@ fn parse_insert_with_columns() { } } +#[test] +fn parse_insert_invalid() { + let sql = String::from("INSERT public.customer (id, name, active) VALUES (1, 2, 3)"); + let mut parser = parser(&sql); + match parser.parse() { + Err(_) => {} + _ => assert!(false), + } +} + #[test] fn parse_select_wildcard() { let sql = String::from("SELECT * FROM customer"); From b3ab4aca88f71bd988f77edbe4c54c0ce66a98f4 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 12 Jan 2019 20:38:14 +0300 Subject: [PATCH 4/6] Use expect_keyword() instead of consume_token() where appropriate Before this missing keywords THEN/WHEN/AS would be parsed as if they were in the text as the code didn't check the return value of consume_token() - see upcoming commit. --- src/sqlparser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 988d4318..2a08bc71 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -204,7 +204,7 @@ impl Parser { let mut else_result = None; loop { conditions.push(self.parse_expr(0)?); - self.consume_token(&Token::Keyword("THEN".to_string()))?; + self.expect_keyword("THEN")?; results.push(self.parse_expr(0)?); if self.parse_keywords(vec!["ELSE"]) { else_result = Some(Box::new(self.parse_expr(0)?)); @@ -217,7 +217,7 @@ impl Parser { if self.parse_keywords(vec!["END"]) { break; } - self.consume_token(&Token::Keyword("WHEN".to_string()))?; + self.expect_keyword("WHEN")?; } Ok(ASTNode::SQLCase { conditions, @@ -235,7 +235,7 @@ impl Parser { pub fn parse_cast_expression(&mut self) -> Result { self.consume_token(&Token::LParen)?; let expr = self.parse_expr(0)?; - self.consume_token(&Token::Keyword("AS".to_string()))?; + self.expect_keyword("AS")?; let data_type = self.parse_data_type()?; self.consume_token(&Token::RParen)?; Ok(ASTNode::SQLCast { From fd9e2818d71f38990e80f2053877c41bf7a60c7f Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 12 Jan 2019 20:45:30 +0300 Subject: [PATCH 5/6] Introduce expect_token(), failing when the expected token was not found --- src/sqlparser.rs | 55 +++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 2a08bc71..e2900947 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -141,7 +141,7 @@ impl Parser { Some(Token::Period) => { let mut id_parts: Vec = vec![id]; while self.peek_token() == Some(Token::Period) { - self.consume_token(&Token::Period)?; + self.expect_token(&Token::Period)?; match self.next_token() { Some(Token::Identifier(id)) => id_parts.push(id), _ => { @@ -166,9 +166,7 @@ impl Parser { } Token::LParen => { let expr = self.parse(); - if !self.consume_token(&Token::RParen)? { - return parser_err!(format!("expected token RParen")); - } + self.expect_token(&Token::RParen)?; expr } _ => parser_err!(format!( @@ -181,7 +179,7 @@ impl Parser { } pub fn parse_function(&mut self, id: &str) -> Result { - self.consume_token(&Token::LParen)?; + self.expect_token(&Token::LParen)?; if let Ok(true) = self.consume_token(&Token::RParen) { Ok(ASTNode::SQLFunction { id: id.to_string(), @@ -189,7 +187,7 @@ impl Parser { }) } else { let args = self.parse_expr_list()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; Ok(ASTNode::SQLFunction { id: id.to_string(), args, @@ -233,11 +231,11 @@ impl Parser { /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expression(&mut self) -> Result { - self.consume_token(&Token::LParen)?; + self.expect_token(&Token::LParen)?; let expr = self.parse_expr(0)?; self.expect_keyword("AS")?; let data_type = self.parse_data_type()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; Ok(ASTNode::SQLCast { expr: Box::new(expr), data_type, @@ -507,6 +505,19 @@ impl Parser { } } + /// Bail out if the current token is not an expected keyword, or consume it if it is + pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> { + if self.consume_token(expected)? { + Ok(()) + } else { + parser_err!(format!( + "Expected token {:?}, found {:?}", + expected, + self.peek_token() + )) + } + } + /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keywords(vec!["TABLE"]) { @@ -591,9 +602,9 @@ impl Parser { let is_primary_key = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique_key = self.parse_keywords(vec!["UNIQUE", "KEY"]); let is_foreign_key = self.parse_keywords(vec!["FOREIGN", "KEY"]); - self.consume_token(&Token::LParen)?; + self.expect_token(&Token::LParen)?; let column_names = self.parse_column_names()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; let key = Key { name: constraint_name.to_string(), columns: column_names, @@ -605,9 +616,9 @@ impl Parser { } else if is_foreign_key { if self.parse_keyword("REFERENCES") { let foreign_table = self.parse_tablename()?; - self.consume_token(&Token::LParen)?; + self.expect_token(&Token::LParen)?; let referred_columns = self.parse_column_names()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; Ok(TableKey::ForeignKey { key, foreign_table, @@ -665,14 +676,14 @@ impl Parser { let table_name = self.parse_tablename()?; let columns = if self.consume_token(&Token::LParen)? { let column_names = self.parse_column_names()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; column_names } else { vec![] }; self.expect_keyword("FROM")?; self.expect_keyword("STDIN")?; - self.consume_token(&Token::SemiColon)?; + self.expect_token(&Token::SemiColon)?; let values = self.parse_tsv()?; Ok(ASTNode::SQLCopy { table_name, @@ -853,9 +864,9 @@ impl Parser { pub fn parse_time(&mut self) -> Result { let hour = self.parse_literal_int()?; - self.consume_token(&Token::Colon)?; + self.expect_token(&Token::Colon)?; let min = self.parse_literal_int()?; - self.consume_token(&Token::Colon)?; + self.expect_token(&Token::Colon)?; // On one hand, the SQL specs defines ::= , // so it would be more correct to parse it as such let sec = self.parse_literal_double()?; @@ -945,7 +956,7 @@ impl Parser { "REGCLASS" => Ok(SQLType::Regclass), "TEXT" => { if let Ok(true) = self.consume_token(&Token::LBracket) { - self.consume_token(&Token::RBracket)?; + self.expect_token(&Token::RBracket)?; Ok(SQLType::Array(Box::new(SQLType::Text))) } else { Ok(SQLType::Text) @@ -1030,7 +1041,7 @@ impl Parser { if self.consume_token(&Token::LParen)? { let n = self.parse_literal_int()?; //TODO: check return value of reading rparen - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; Ok(Some(n as usize)) } else { Ok(None) @@ -1047,7 +1058,7 @@ impl Parser { } else { None }; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; Ok((n as usize, scale)) } else { parser_err!("Expecting `(`") @@ -1278,15 +1289,15 @@ impl Parser { let table_name = self.parse_tablename()?; let columns = if self.consume_token(&Token::LParen)? { let column_names = self.parse_column_names()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; column_names } else { vec![] }; self.expect_keyword("VALUES")?; - self.consume_token(&Token::LParen)?; + self.expect_token(&Token::LParen)?; let values = self.parse_expr_list()?; - self.consume_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; Ok(ASTNode::SQLInsert { table_name, columns, From ae06dc79016051e81c0edc0c34fac66a618039c7 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 12 Jan 2019 20:56:25 +0300 Subject: [PATCH 6/6] Return bool from consume_token(), mark as #[must_use] --- src/sqlparser.rs | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index e2900947..25043087 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -180,7 +180,7 @@ impl Parser { pub fn parse_function(&mut self, id: &str) -> Result { self.expect_token(&Token::LParen)?; - if let Ok(true) = self.consume_token(&Token::RParen) { + if self.consume_token(&Token::RParen) { Ok(ASTNode::SQLFunction { id: id.to_string(), args: vec![], @@ -488,26 +488,25 @@ impl Parser { } } - //TODO: this function is inconsistent and sometimes returns bool and sometimes fails - - /// Consume the next token if it matches the expected token, otherwise return an error - pub fn consume_token(&mut self, expected: &Token) -> Result { + /// Consume the next token if it matches the expected token, otherwise return false + #[must_use] + pub fn consume_token(&mut self, expected: &Token) -> bool { match self.peek_token() { Some(ref t) => { if *t == *expected { self.next_token(); - Ok(true) + true } else { - Ok(false) + false } } - other => parser_err!(format!("expected token {:?} but was {:?}", expected, other,)), + _ => false, } } /// Bail out if the current token is not an expected keyword, or consume it if it is pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> { - if self.consume_token(expected)? { + if self.consume_token(expected) { Ok(()) } else { parser_err!(format!( @@ -524,7 +523,7 @@ impl Parser { let table_name = self.parse_tablename()?; // parse optional column list (schema) let mut columns = vec![]; - if self.consume_token(&Token::LParen)? { + if self.consume_token(&Token::LParen) { loop { if let Some(Token::Identifier(column_name)) = self.next_token() { if let Ok(data_type) = self.parse_data_type() { @@ -674,7 +673,7 @@ impl Parser { /// Parse a copy statement pub fn parse_copy(&mut self) -> Result { let table_name = self.parse_tablename()?; - let columns = if self.consume_token(&Token::LParen)? { + let columns = if self.consume_token(&Token::LParen) { let column_names = self.parse_column_names()?; self.expect_token(&Token::RParen)?; column_names @@ -717,7 +716,7 @@ impl Parser { content.clear(); } Token::Backslash => { - if let Ok(true) = self.consume_token(&Token::Period) { + if self.consume_token(&Token::Period) { return Ok(values); } if let Some(token) = self.next_token() { @@ -842,9 +841,9 @@ impl Parser { } pub fn parse_date(&mut self, year: i64) -> Result { - if let Ok(true) = self.consume_token(&Token::Minus) { + if self.consume_token(&Token::Minus) { let month = self.parse_literal_int()?; - if let Ok(true) = self.consume_token(&Token::Minus) { + if self.consume_token(&Token::Minus) { let day = self.parse_literal_int()?; let date = NaiveDate::from_ymd(year as i32, month as u32, day as u32); Ok(date) @@ -955,7 +954,8 @@ impl Parser { } "REGCLASS" => Ok(SQLType::Regclass), "TEXT" => { - if let Ok(true) = self.consume_token(&Token::LBracket) { + if self.consume_token(&Token::LBracket) { + // Note: this is postgresql-specific self.expect_token(&Token::RBracket)?; Ok(SQLType::Array(Box::new(SQLType::Text))) } else { @@ -1038,7 +1038,7 @@ impl Parser { } pub fn parse_optional_precision(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen)? { + if self.consume_token(&Token::LParen) { let n = self.parse_literal_int()?; //TODO: check return value of reading rparen self.expect_token(&Token::RParen)?; @@ -1051,9 +1051,9 @@ impl Parser { pub fn parse_optional_precision_scale( &mut self, ) -> Result<(usize, Option), ParserError> { - if self.consume_token(&Token::LParen)? { + if self.consume_token(&Token::LParen) { let n = self.parse_literal_int()?; - let scale = if let Ok(true) = self.consume_token(&Token::Comma) { + let scale = if self.consume_token(&Token::Comma) { Some(self.parse_literal_int()? as usize) } else { None @@ -1165,7 +1165,7 @@ impl Parser { let constraint = self.parse_expr(0)?; Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { - if self.consume_token(&Token::LParen)? { + if self.consume_token(&Token::LParen) { let attributes = self .parse_expr_list()? .into_iter() @@ -1177,7 +1177,7 @@ impl Parser { }) .collect::, ParserError>>()?; - if self.consume_token(&Token::RParen)? { + if self.consume_token(&Token::RParen) { Ok(JoinConstraint::Using(attributes)) } else { parser_err!(format!("Expected token ')', found {:?}", self.peek_token())) @@ -1287,7 +1287,7 @@ impl Parser { pub fn parse_insert(&mut self) -> Result { self.expect_keyword("INTO")?; let table_name = self.parse_tablename()?; - let columns = if self.consume_token(&Token::LParen)? { + let columns = if self.consume_token(&Token::LParen) { let column_names = self.parse_column_names()?; self.expect_token(&Token::RParen)?; column_names