mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-04 06:18:17 +00:00
Fix the precedence of NOT LIKE
NOT LIKE has the same precedence as the LIKE operator. The parser was previously assigning it the precedence of the unary NOT operator. NOT BETWEEN and NOT IN are treated similarly, as they are equivalent, from a precedence perspective, to NOT LIKE. The fix for this requires associating precedences with sequences of tokens, rather than single tokens, so that "NOT LIKE" and "NOT <expr>" can have different preferences. Perhaps surprisingly, this change is not very invasive. An alternative I considered involved adjusting the tokenizer to lex NOT, NOT LIKE, NOT BETWEEN, and NOT IN as separate tokens. This broke symmetry in strange ways, though, as NotLike, NotBetween, and NotIn gained dedicated tokens, while LIKE, BETWEEN, and IN remained as stringly identifiers. Fixes #81.
This commit is contained in:
parent
f55e3d5305
commit
90bcf55a6a
2 changed files with 125 additions and 60 deletions
|
@ -190,13 +190,10 @@ impl Parser {
|
|||
}
|
||||
"CASE" => self.parse_case_expression(),
|
||||
"CAST" => self.parse_cast_expression(),
|
||||
"NOT" => {
|
||||
let p = self.get_precedence(&Token::make_keyword("NOT"))?;
|
||||
Ok(ASTNode::SQLUnary {
|
||||
operator: SQLOperator::Not,
|
||||
expr: Box::new(self.parse_subexpr(p)?),
|
||||
})
|
||||
}
|
||||
"NOT" => Ok(ASTNode::SQLUnary {
|
||||
operator: SQLOperator::Not,
|
||||
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
|
||||
}),
|
||||
// Here `w` is a word, check if it's a part of a multi-part
|
||||
// identifier, a function call, or a simple identifier:
|
||||
_ => match self.peek_token() {
|
||||
|
@ -230,7 +227,6 @@ impl Parser {
|
|||
}, // End of Token::SQLWord
|
||||
Token::Mult => Ok(ASTNode::SQLWildcard),
|
||||
tok @ Token::Minus | tok @ Token::Plus => {
|
||||
let p = self.get_precedence(&tok)?;
|
||||
let operator = if tok == Token::Plus {
|
||||
SQLOperator::Plus
|
||||
} else {
|
||||
|
@ -238,7 +234,7 @@ impl Parser {
|
|||
};
|
||||
Ok(ASTNode::SQLUnary {
|
||||
operator,
|
||||
expr: Box::new(self.parse_subexpr(p)?),
|
||||
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
|
||||
})
|
||||
}
|
||||
Token::Number(_) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) => {
|
||||
|
@ -510,10 +506,9 @@ impl Parser {
|
|||
pub fn parse_between(&mut self, expr: ASTNode, negated: bool) -> Result<ASTNode, ParserError> {
|
||||
// Stop parsing subexpressions for <low> and <high> on tokens with
|
||||
// precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc.
|
||||
let prec = self.get_precedence(&Token::make_keyword("BETWEEN"))?;
|
||||
let low = self.parse_subexpr(prec)?;
|
||||
let low = self.parse_subexpr(Self::BETWEEN_PREC)?;
|
||||
self.expect_keyword("AND")?;
|
||||
let high = self.parse_subexpr(prec)?;
|
||||
let high = self.parse_subexpr(Self::BETWEEN_PREC)?;
|
||||
Ok(ASTNode::SQLBetween {
|
||||
expr: Box::new(expr),
|
||||
negated,
|
||||
|
@ -530,35 +525,45 @@ impl Parser {
|
|||
})
|
||||
}
|
||||
|
||||
const UNARY_NOT_PREC: u8 = 15;
|
||||
const BETWEEN_PREC: u8 = 20;
|
||||
const PLUS_MINUS_PREC: u8 = 30;
|
||||
|
||||
/// Get the precedence of the next token
|
||||
pub fn get_next_precedence(&self) -> Result<u8, ParserError> {
|
||||
if let Some(token) = self.peek_token() {
|
||||
self.get_precedence(&token)
|
||||
debug!("get_precedence() {:?}", token);
|
||||
|
||||
match &token {
|
||||
Token::SQLWord(k) if k.keyword == "OR" => Ok(5),
|
||||
Token::SQLWord(k) if k.keyword == "AND" => Ok(10),
|
||||
Token::SQLWord(k) if k.keyword == "NOT" => match &self.peek_nth_token(1) {
|
||||
// The precedence of NOT varies depending on keyword that
|
||||
// follows it. If it is followed by IN, BETWEEN, or LIKE,
|
||||
// it takes on the precedence of those tokens. Otherwise it
|
||||
// takes on UNARY_NOT_PREC.
|
||||
Some(Token::SQLWord(k)) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC),
|
||||
Some(Token::SQLWord(k)) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC),
|
||||
Some(Token::SQLWord(k)) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC),
|
||||
_ => Ok(Self::UNARY_NOT_PREC),
|
||||
},
|
||||
Token::SQLWord(k) if k.keyword == "IS" => Ok(17),
|
||||
Token::SQLWord(k) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC),
|
||||
Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC),
|
||||
Token::SQLWord(k) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC),
|
||||
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => {
|
||||
Ok(20)
|
||||
}
|
||||
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
|
||||
Token::Mult | Token::Div | Token::Mod => Ok(40),
|
||||
Token::DoubleColon => Ok(50),
|
||||
_ => Ok(0),
|
||||
}
|
||||
} else {
|
||||
Ok(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the precedence of a token
|
||||
pub fn get_precedence(&self, tok: &Token) -> Result<u8, ParserError> {
|
||||
debug!("get_precedence() {:?}", tok);
|
||||
|
||||
match tok {
|
||||
Token::SQLWord(k) if k.keyword == "OR" => Ok(5),
|
||||
Token::SQLWord(k) if k.keyword == "AND" => Ok(10),
|
||||
Token::SQLWord(k) if k.keyword == "NOT" => Ok(15),
|
||||
Token::SQLWord(k) if k.keyword == "IS" => Ok(17),
|
||||
Token::SQLWord(k) if k.keyword == "IN" => Ok(20),
|
||||
Token::SQLWord(k) if k.keyword == "BETWEEN" => Ok(20),
|
||||
Token::SQLWord(k) if k.keyword == "LIKE" => Ok(20),
|
||||
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20),
|
||||
Token::Plus | Token::Minus => Ok(30),
|
||||
Token::Mult | Token::Div | Token::Mod => Ok(40),
|
||||
Token::DoubleColon => Ok(50),
|
||||
_ => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return first non-whitespace token that has not yet been processed
|
||||
pub fn peek_token(&self) -> Option<Token> {
|
||||
self.peek_nth_token(0)
|
||||
|
|
|
@ -413,38 +413,98 @@ fn parse_not_precedence() {
|
|||
operator: SQLOperator::Not,
|
||||
..
|
||||
});
|
||||
|
||||
// NOT has lower precedence than BETWEEN, so the following parses as NOT (1 NOT BETWEEN 1 AND 2)
|
||||
let sql = "NOT 1 NOT BETWEEN 1 AND 2";
|
||||
assert_eq!(
|
||||
verified_expr(sql),
|
||||
SQLUnary {
|
||||
operator: SQLOperator::Not,
|
||||
expr: Box::new(SQLBetween {
|
||||
expr: Box::new(SQLValue(Value::Long(1))),
|
||||
low: Box::new(SQLValue(Value::Long(1))),
|
||||
high: Box::new(SQLValue(Value::Long(2))),
|
||||
negated: true,
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
// NOT has lower precedence than LIKE, so the following parses as NOT ('a' NOT LIKE 'b')
|
||||
let sql = "NOT 'a' NOT LIKE 'b'";
|
||||
assert_eq!(
|
||||
verified_expr(sql),
|
||||
SQLUnary {
|
||||
operator: SQLOperator::Not,
|
||||
expr: Box::new(SQLBinaryExpr {
|
||||
left: Box::new(SQLValue(Value::SingleQuotedString("a".into()))),
|
||||
op: SQLOperator::NotLike,
|
||||
right: Box::new(SQLValue(Value::SingleQuotedString("b".into()))),
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
// NOT has lower precedence than IN, so the following parses as NOT (a NOT IN 'a')
|
||||
let sql = "NOT a NOT IN ('a')";
|
||||
assert_eq!(
|
||||
verified_expr(sql),
|
||||
SQLUnary {
|
||||
operator: SQLOperator::Not,
|
||||
expr: Box::new(SQLInList {
|
||||
expr: Box::new(SQLIdentifier("a".into())),
|
||||
list: vec![SQLValue(Value::SingleQuotedString("a".into()))],
|
||||
negated: true,
|
||||
}),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_like() {
|
||||
let sql = "SELECT * FROM customers WHERE name LIKE '%a'";
|
||||
let select = verified_only_select(sql);
|
||||
assert_eq!(
|
||||
ASTNode::SQLBinaryExpr {
|
||||
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
|
||||
op: SQLOperator::Like,
|
||||
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
|
||||
"%a".to_string()
|
||||
))),
|
||||
},
|
||||
select.selection.unwrap()
|
||||
);
|
||||
}
|
||||
fn chk(negated: bool) {
|
||||
let sql = &format!(
|
||||
"SELECT * FROM customers WHERE name {}LIKE '%a'",
|
||||
if negated { "NOT " } else { "" }
|
||||
);
|
||||
let select = verified_only_select(sql);
|
||||
assert_eq!(
|
||||
ASTNode::SQLBinaryExpr {
|
||||
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
|
||||
op: if negated {
|
||||
SQLOperator::NotLike
|
||||
} else {
|
||||
SQLOperator::Like
|
||||
},
|
||||
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
|
||||
"%a".to_string()
|
||||
))),
|
||||
},
|
||||
select.selection.unwrap()
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn parse_not_like() {
|
||||
let sql = "SELECT * FROM customers WHERE name NOT LIKE '%a'";
|
||||
let select = verified_only_select(sql);
|
||||
assert_eq!(
|
||||
ASTNode::SQLBinaryExpr {
|
||||
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
|
||||
op: SQLOperator::NotLike,
|
||||
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
|
||||
"%a".to_string()
|
||||
))),
|
||||
},
|
||||
select.selection.unwrap()
|
||||
);
|
||||
// This statement tests that LIKE and NOT LIKE have the same precedence.
|
||||
// This was previously mishandled (#81).
|
||||
let sql = &format!(
|
||||
"SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL",
|
||||
if negated { "NOT " } else { "" }
|
||||
);
|
||||
let select = verified_only_select(sql);
|
||||
assert_eq!(
|
||||
ASTNode::SQLIsNull(Box::new(ASTNode::SQLBinaryExpr {
|
||||
left: Box::new(ASTNode::SQLIdentifier("name".to_string())),
|
||||
op: if negated {
|
||||
SQLOperator::NotLike
|
||||
} else {
|
||||
SQLOperator::Like
|
||||
},
|
||||
right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
|
||||
"%a".to_string()
|
||||
))),
|
||||
})),
|
||||
select.selection.unwrap()
|
||||
);
|
||||
}
|
||||
chk(false);
|
||||
chk(true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue