Fix the precedence of NOT LIKE

NOT LIKE has the same precedence as the LIKE operator. The parser was
previously assigning it the precedence of the unary NOT operator. NOT
BETWEEN and NOT IN are treated similarly, as they are equivalent, from a
precedence perspective, to NOT LIKE.

The fix for this requires associating precedences with sequences of
tokens, rather than single tokens, so that "NOT LIKE" and "NOT <expr>"
can have different preferences. Perhaps surprisingly, this change is not
very invasive.

An alternative I considered involved adjusting the tokenizer to lex
NOT, NOT LIKE, NOT BETWEEN, and NOT IN as separate tokens. This broke
symmetry in strange ways, though, as NotLike, NotBetween, and NotIn
gained dedicated tokens, while LIKE, BETWEEN, and IN remained as
stringly identifiers.

Fixes #81.
This commit is contained in:
Nikhil Benesch 2019-05-27 00:20:58 -04:00
parent f55e3d5305
commit 90bcf55a6a
No known key found for this signature in database
GPG key ID: F7386C5DEADABA7F
2 changed files with 125 additions and 60 deletions

View file

@ -190,13 +190,10 @@ impl Parser {
}
"CASE" => self.parse_case_expression(),
"CAST" => self.parse_cast_expression(),
"NOT" => {
let p = self.get_precedence(&Token::make_keyword("NOT"))?;
Ok(ASTNode::SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(self.parse_subexpr(p)?),
})
}
"NOT" => Ok(ASTNode::SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
}),
// Here `w` is a word, check if it's a part of a multi-part
// identifier, a function call, or a simple identifier:
_ => match self.peek_token() {
@ -230,7 +227,6 @@ impl Parser {
}, // End of Token::SQLWord
Token::Mult => Ok(ASTNode::SQLWildcard),
tok @ Token::Minus | tok @ Token::Plus => {
let p = self.get_precedence(&tok)?;
let operator = if tok == Token::Plus {
SQLOperator::Plus
} else {
@ -238,7 +234,7 @@ impl Parser {
};
Ok(ASTNode::SQLUnary {
operator,
expr: Box::new(self.parse_subexpr(p)?),
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
})
}
Token::Number(_) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) => {
@ -510,10 +506,9 @@ impl Parser {
pub fn parse_between(&mut self, expr: ASTNode, negated: bool) -> Result<ASTNode, ParserError> {
// Stop parsing subexpressions for <low> and <high> on tokens with
// precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc.
let prec = self.get_precedence(&Token::make_keyword("BETWEEN"))?;
let low = self.parse_subexpr(prec)?;
let low = self.parse_subexpr(Self::BETWEEN_PREC)?;
self.expect_keyword("AND")?;
let high = self.parse_subexpr(prec)?;
let high = self.parse_subexpr(Self::BETWEEN_PREC)?;
Ok(ASTNode::SQLBetween {
expr: Box::new(expr),
negated,
@ -530,35 +525,45 @@ impl Parser {
})
}
const UNARY_NOT_PREC: u8 = 15;
const BETWEEN_PREC: u8 = 20;
const PLUS_MINUS_PREC: u8 = 30;
/// Get the precedence of the next token
pub fn get_next_precedence(&self) -> Result<u8, ParserError> {
if let Some(token) = self.peek_token() {
self.get_precedence(&token)
debug!("get_precedence() {:?}", token);
match &token {
Token::SQLWord(k) if k.keyword == "OR" => Ok(5),
Token::SQLWord(k) if k.keyword == "AND" => Ok(10),
Token::SQLWord(k) if k.keyword == "NOT" => match &self.peek_nth_token(1) {
// The precedence of NOT varies depending on keyword that
// follows it. If it is followed by IN, BETWEEN, or LIKE,
// it takes on the precedence of those tokens. Otherwise it
// takes on UNARY_NOT_PREC.
Some(Token::SQLWord(k)) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC),
Some(Token::SQLWord(k)) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC),
Some(Token::SQLWord(k)) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC),
_ => Ok(Self::UNARY_NOT_PREC),
},
Token::SQLWord(k) if k.keyword == "IS" => Ok(17),
Token::SQLWord(k) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC),
Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC),
Token::SQLWord(k) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC),
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => {
Ok(20)
}
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
Token::Mult | Token::Div | Token::Mod => Ok(40),
Token::DoubleColon => Ok(50),
_ => Ok(0),
}
} else {
Ok(0)
}
}
/// Get the precedence of a token
pub fn get_precedence(&self, tok: &Token) -> Result<u8, ParserError> {
debug!("get_precedence() {:?}", tok);
match tok {
Token::SQLWord(k) if k.keyword == "OR" => Ok(5),
Token::SQLWord(k) if k.keyword == "AND" => Ok(10),
Token::SQLWord(k) if k.keyword == "NOT" => Ok(15),
Token::SQLWord(k) if k.keyword == "IS" => Ok(17),
Token::SQLWord(k) if k.keyword == "IN" => Ok(20),
Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(20),
Token::SQLWord(k) if k.keyword == "LIKE" => Ok(20),
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20),
Token::Plus | Token::Minus => Ok(30),
Token::Mult | Token::Div | Token::Mod => Ok(40),
Token::DoubleColon => Ok(50),
_ => Ok(0),
}
}
/// Return first non-whitespace token that has not yet been processed
pub fn peek_token(&self) -> Option<Token> {
self.peek_nth_token(0)

View file

@ -413,38 +413,98 @@ fn parse_not_precedence() {
operator: SQLOperator::Not,
..
});
// NOT has lower precedence than BETWEEN, so the following parses as NOT (1 NOT BETWEEN 1 AND 2)
let sql = "NOT 1 NOT BETWEEN 1 AND 2";
assert_eq!(
verified_expr(sql),
SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(SQLBetween {
expr: Box::new(SQLValue(Value::Long(1))),
low: Box::new(SQLValue(Value::Long(1))),
high: Box::new(SQLValue(Value::Long(2))),
negated: true,
}),
},
);
// NOT has lower precedence than LIKE, so the following parses as NOT ('a' NOT LIKE 'b')
let sql = "NOT 'a' NOT LIKE 'b'";
assert_eq!(
verified_expr(sql),
SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(SQLBinaryExpr {
left: Box::new(SQLValue(Value::SingleQuotedString("a".into()))),
op: SQLOperator::NotLike,
right: Box::new(SQLValue(Value::SingleQuotedString("b".into()))),
}),
},
);
// NOT has lower precedence than IN, so the following parses as NOT (a NOT IN 'a')
let sql = "NOT a NOT IN ('a')";
assert_eq!(
verified_expr(sql),
SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(SQLInList {
expr: Box::new(SQLIdentifier("a".into())),
list: vec![SQLValue(Value::SingleQuotedString("a".into()))],
negated: true,
}),
},
);
}
#[test]
fn parse_like() {
let sql = "SELECT * FROM customers WHERE name LIKE '%a'";
let select = verified_only_select(sql);
assert_eq!(
ASTNode::SQLBinaryExpr {
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
op: SQLOperator::Like,
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
"%a".to_string()
))),
},
select.selection.unwrap()
);
}
fn chk(negated: bool) {
let sql = &format!(
"SELECT * FROM customers WHERE name {}LIKE '%a'",
if negated { "NOT " } else { "" }
);
let select = verified_only_select(sql);
assert_eq!(
ASTNode::SQLBinaryExpr {
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
op: if negated {
SQLOperator::NotLike
} else {
SQLOperator::Like
},
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
"%a".to_string()
))),
},
select.selection.unwrap()
);
#[test]
fn parse_not_like() {
let sql = "SELECT * FROM customers WHERE name NOT LIKE '%a'";
let select = verified_only_select(sql);
assert_eq!(
ASTNode::SQLBinaryExpr {
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
op: SQLOperator::NotLike,
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
"%a".to_string()
))),
},
select.selection.unwrap()
);
// This statement tests that LIKE and NOT LIKE have the same precedence.
// This was previously mishandled (#81).
let sql = &format!(
"SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL",
if negated { "NOT " } else { "" }
);
let select = verified_only_select(sql);
assert_eq!(
ASTNode::SQLIsNull(Box::new(ASTNode::SQLBinaryExpr {
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
op: if negated {
SQLOperator::NotLike
} else {
SQLOperator::Like
},
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
"%a".to_string()
))),
})),
select.selection.unwrap()
);
}
chk(false);
chk(true);
}
#[test]