Add support for the MATCH and REGEXP binary operators (#1840)

This commit is contained in:
Ophir LOJKINE 2025-05-09 01:48:23 +02:00 committed by GitHub
parent 6cd237ea43
commit 2182f7ea71
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 65 additions and 1 deletions

View file

@ -139,6 +139,11 @@ pub enum BinaryOperator {
DuckIntegerDivide,
/// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
MyIntegerDivide,
/// MATCH operator, e.g. `a MATCH b` (SQLite-specific)
/// See <https://www.sqlite.org/lang_expr.html#the_like_glob_regexp_match_and_extract_operators>
Match,
/// REGEXP operator, e.g. `a REGEXP b` (SQLite-specific)
Regexp,
/// Support for custom operators (such as Postgres custom operators)
Custom(String),
/// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific)
@ -350,6 +355,8 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::BitwiseXor => f.write_str("^"),
BinaryOperator::DuckIntegerDivide => f.write_str("//"),
BinaryOperator::MyIntegerDivide => f.write_str("DIV"),
BinaryOperator::Match => f.write_str("MATCH"),
BinaryOperator::Regexp => f.write_str("REGEXP"),
BinaryOperator::Custom(s) => f.write_str(s),
BinaryOperator::PGBitwiseXor => f.write_str("#"),
BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"),

View file

@ -619,6 +619,7 @@ pub trait Dialect: Debug + Any {
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
_ => Ok(self.prec_unknown()),
},
@ -630,6 +631,7 @@ pub trait Dialect: Debug + Any {
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),

View file

@ -15,7 +15,11 @@
// specific language governing permissions and limitations
// under the License.
use crate::ast::Statement;
#[cfg(not(feature = "std"))]
use alloc::boxed::Box;
use crate::ast::BinaryOperator;
use crate::ast::{Expr, Statement};
use crate::dialect::Dialect;
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
@ -70,6 +74,27 @@ impl Dialect for SQLiteDialect {
}
}
fn parse_infix(
&self,
parser: &mut crate::parser::Parser,
expr: &crate::ast::Expr,
_precedence: u8,
) -> Option<Result<crate::ast::Expr, ParserError>> {
// Parse MATCH and REGEXP as operators
// See <https://www.sqlite.org/lang_expr.html#the_like_glob_regexp_match_and_extract_operators>
for (keyword, op) in [
(Keyword::REGEXP, BinaryOperator::Regexp),
(Keyword::MATCH, BinaryOperator::Match),
] {
if parser.parse_keyword(keyword) {
let left = Box::new(expr.clone());
let right = Box::new(parser.parse_expr().unwrap());
return Some(Ok(Expr::BinaryOp { left, op, right }));
}
}
None
}
fn supports_in_empty_list(&self) -> bool {
true
}

View file

@ -562,6 +562,36 @@ fn test_dollar_identifier_as_placeholder() {
}
}
#[test]
fn test_match_operator() {
assert_eq!(
sqlite().verified_expr("col MATCH 'pattern'"),
Expr::BinaryOp {
op: BinaryOperator::Match,
left: Box::new(Expr::Identifier(Ident::new("col"))),
right: Box::new(Expr::Value(
(Value::SingleQuotedString("pattern".to_string())).with_empty_span()
))
}
);
sqlite().verified_only_select("SELECT * FROM email WHERE email MATCH 'fts5'");
}
#[test]
fn test_regexp_operator() {
assert_eq!(
sqlite().verified_expr("col REGEXP 'pattern'"),
Expr::BinaryOp {
op: BinaryOperator::Regexp,
left: Box::new(Expr::Identifier(Ident::new("col"))),
right: Box::new(Expr::Value(
(Value::SingleQuotedString("pattern".to_string())).with_empty_span()
))
}
);
sqlite().verified_only_select(r#"SELECT count(*) FROM messages WHERE msg_text REGEXP '\d+'"#);
}
fn sqlite() -> TestedDialects {
TestedDialects::new(vec![Box::new(SQLiteDialect {})])
}