From f55e3d5305fc306ffa045f51e70d2a10e0c517b7 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Fri, 31 May 2019 18:18:28 -0400 Subject: [PATCH 1/2] Introduce a peek_nth_token method This will be used in a future commit, where looking ahead by two tokens is important. --- src/sqlparser.rs | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 0db160d5..109d55e8 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -561,10 +561,28 @@ impl Parser { /// Return first non-whitespace token that has not yet been processed pub fn peek_token(&self) -> Option { - if let Some(n) = self.til_non_whitespace() { - self.token_at(n) - } else { - None + self.peek_nth_token(0) + } + + /// Return nth non-whitespace token that has not yet been processed + pub fn peek_nth_token(&self, mut n: usize) -> Option { + let mut index = self.index; + loop { + match self.token_at(index) { + Some(Token::Whitespace(_)) => { + index += 1; + } + Some(token) => { + if n == 0 { + return Some(token); + } + index += 1; + n -= 1; + } + None => { + return None; + } + } } } @@ -582,24 +600,6 @@ impl Parser { } } - /// get the index for non whitepsace token - fn til_non_whitespace(&self) -> Option { - let mut index = self.index; - loop { - match self.token_at(index) { - Some(Token::Whitespace(_)) => { - index += 1; - } - Some(_) => { - return Some(index); - } - None => { - return None; - } - } - } - } - /// see the token at this index fn token_at(&self, n: usize) -> Option { if let Some(token) = self.tokens.get(n) { From 90bcf55a6a3ba48485891273ca087799ada57ba5 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Mon, 27 May 2019 00:20:58 -0400 Subject: [PATCH 2/2] Fix the precedence of NOT LIKE NOT LIKE has the same precedence as the LIKE operator. The parser was previously assigning it the precedence of the unary NOT operator. NOT BETWEEN and NOT IN are treated similarly, as they are equivalent, from a precedence perspective, to NOT LIKE. The fix for this requires associating precedences with sequences of tokens, rather than single tokens, so that "NOT LIKE" and "NOT " can have different preferences. Perhaps surprisingly, this change is not very invasive. An alternative I considered involved adjusting the tokenizer to lex NOT, NOT LIKE, NOT BETWEEN, and NOT IN as separate tokens. This broke symmetry in strange ways, though, as NotLike, NotBetween, and NotIn gained dedicated tokens, while LIKE, BETWEEN, and IN remained as stringly identifiers. Fixes #81. --- src/sqlparser.rs | 71 +++++++++++++----------- tests/sqlparser_common.rs | 114 +++++++++++++++++++++++++++++--------- 2 files changed, 125 insertions(+), 60 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 109d55e8..beb76d71 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -190,13 +190,10 @@ impl Parser { } "CASE" => self.parse_case_expression(), "CAST" => self.parse_cast_expression(), - "NOT" => { - let p = self.get_precedence(&Token::make_keyword("NOT"))?; - Ok(ASTNode::SQLUnary { - operator: SQLOperator::Not, - expr: Box::new(self.parse_subexpr(p)?), - }) - } + "NOT" => Ok(ASTNode::SQLUnary { + operator: SQLOperator::Not, + expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + }), // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token() { @@ -230,7 +227,6 @@ impl Parser { }, // End of Token::SQLWord Token::Mult => Ok(ASTNode::SQLWildcard), tok @ Token::Minus | tok @ Token::Plus => { - let p = self.get_precedence(&tok)?; let operator = if tok == Token::Plus { SQLOperator::Plus } else { @@ -238,7 +234,7 @@ impl Parser { }; Ok(ASTNode::SQLUnary { operator, - expr: Box::new(self.parse_subexpr(p)?), + expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } Token::Number(_) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) => { @@ -510,10 +506,9 @@ impl Parser { pub fn parse_between(&mut self, expr: ASTNode, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. - let prec = self.get_precedence(&Token::make_keyword("BETWEEN"))?; - let low = self.parse_subexpr(prec)?; + let low = self.parse_subexpr(Self::BETWEEN_PREC)?; self.expect_keyword("AND")?; - let high = self.parse_subexpr(prec)?; + let high = self.parse_subexpr(Self::BETWEEN_PREC)?; Ok(ASTNode::SQLBetween { expr: Box::new(expr), negated, @@ -530,35 +525,45 @@ impl Parser { }) } + const UNARY_NOT_PREC: u8 = 15; + const BETWEEN_PREC: u8 = 20; + const PLUS_MINUS_PREC: u8 = 30; + /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { if let Some(token) = self.peek_token() { - self.get_precedence(&token) + debug!("get_precedence() {:?}", token); + + match &token { + Token::SQLWord(k) if k.keyword == "OR" => Ok(5), + Token::SQLWord(k) if k.keyword == "AND" => Ok(10), + Token::SQLWord(k) if k.keyword == "NOT" => match &self.peek_nth_token(1) { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise it + // takes on UNARY_NOT_PREC. + Some(Token::SQLWord(k)) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC), + Some(Token::SQLWord(k)) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC), + Some(Token::SQLWord(k)) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC), + _ => Ok(Self::UNARY_NOT_PREC), + }, + Token::SQLWord(k) if k.keyword == "IS" => Ok(17), + Token::SQLWord(k) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC), + Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC), + Token::SQLWord(k) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC), + Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => { + Ok(20) + } + Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), + Token::Mult | Token::Div | Token::Mod => Ok(40), + Token::DoubleColon => Ok(50), + _ => Ok(0), + } } else { Ok(0) } } - /// Get the precedence of a token - pub fn get_precedence(&self, tok: &Token) -> Result { - debug!("get_precedence() {:?}", tok); - - match tok { - Token::SQLWord(k) if k.keyword == "OR" => Ok(5), - Token::SQLWord(k) if k.keyword == "AND" => Ok(10), - Token::SQLWord(k) if k.keyword == "NOT" => Ok(15), - Token::SQLWord(k) if k.keyword == "IS" => Ok(17), - Token::SQLWord(k) if k.keyword == "IN" => Ok(20), - Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(20), - Token::SQLWord(k) if k.keyword == "LIKE" => Ok(20), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20), - Token::Plus | Token::Minus => Ok(30), - Token::Mult | Token::Div | Token::Mod => Ok(40), - Token::DoubleColon => Ok(50), - _ => Ok(0), - } - } - /// Return first non-whitespace token that has not yet been processed pub fn peek_token(&self) -> Option { self.peek_nth_token(0) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2139b34a..9ef87a00 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -413,38 +413,98 @@ fn parse_not_precedence() { operator: SQLOperator::Not, .. }); + + // NOT has lower precedence than BETWEEN, so the following parses as NOT (1 NOT BETWEEN 1 AND 2) + let sql = "NOT 1 NOT BETWEEN 1 AND 2"; + assert_eq!( + verified_expr(sql), + SQLUnary { + operator: SQLOperator::Not, + expr: Box::new(SQLBetween { + expr: Box::new(SQLValue(Value::Long(1))), + low: Box::new(SQLValue(Value::Long(1))), + high: Box::new(SQLValue(Value::Long(2))), + negated: true, + }), + }, + ); + + // NOT has lower precedence than LIKE, so the following parses as NOT ('a' NOT LIKE 'b') + let sql = "NOT 'a' NOT LIKE 'b'"; + assert_eq!( + verified_expr(sql), + SQLUnary { + operator: SQLOperator::Not, + expr: Box::new(SQLBinaryExpr { + left: Box::new(SQLValue(Value::SingleQuotedString("a".into()))), + op: SQLOperator::NotLike, + right: Box::new(SQLValue(Value::SingleQuotedString("b".into()))), + }), + }, + ); + + // NOT has lower precedence than IN, so the following parses as NOT (a NOT IN 'a') + let sql = "NOT a NOT IN ('a')"; + assert_eq!( + verified_expr(sql), + SQLUnary { + operator: SQLOperator::Not, + expr: Box::new(SQLInList { + expr: Box::new(SQLIdentifier("a".into())), + list: vec![SQLValue(Value::SingleQuotedString("a".into()))], + negated: true, + }), + }, + ); } #[test] fn parse_like() { - let sql = "SELECT * FROM customers WHERE name LIKE '%a'"; - let select = verified_only_select(sql); - assert_eq!( - ASTNode::SQLBinaryExpr { - left: Box::new(ASTNode::SQLIdentifier("name".to_string())), - op: SQLOperator::Like, - right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "%a".to_string() - ))), - }, - select.selection.unwrap() - ); -} + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE name {}LIKE '%a'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLBinaryExpr { + left: Box::new(ASTNode::SQLIdentifier("name".to_string())), + op: if negated { + SQLOperator::NotLike + } else { + SQLOperator::Like + }, + right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( + "%a".to_string() + ))), + }, + select.selection.unwrap() + ); -#[test] -fn parse_not_like() { - let sql = "SELECT * FROM customers WHERE name NOT LIKE '%a'"; - let select = verified_only_select(sql); - assert_eq!( - ASTNode::SQLBinaryExpr { - left: Box::new(ASTNode::SQLIdentifier("name".to_string())), - op: SQLOperator::NotLike, - right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "%a".to_string() - ))), - }, - select.selection.unwrap() - ); + // This statement tests that LIKE and NOT LIKE have the same precedence. + // This was previously mishandled (#81). + let sql = &format!( + "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLIsNull(Box::new(ASTNode::SQLBinaryExpr { + left: Box::new(ASTNode::SQLIdentifier("name".to_string())), + op: if negated { + SQLOperator::NotLike + } else { + SQLOperator::Like + }, + right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( + "%a".to_string() + ))), + })), + select.selection.unwrap() + ); + } + chk(false); + chk(true); } #[test]