mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-22 13:42:31 +00:00
support regex match
This commit is contained in:
parent
e5991f3ae5
commit
80759a4deb
4 changed files with 102 additions and 2 deletions
|
@ -80,6 +80,10 @@ pub enum BinaryOperator {
|
||||||
PGBitwiseXor,
|
PGBitwiseXor,
|
||||||
PGBitwiseShiftLeft,
|
PGBitwiseShiftLeft,
|
||||||
PGBitwiseShiftRight,
|
PGBitwiseShiftRight,
|
||||||
|
PGRegexMatch,
|
||||||
|
PGRegexIMatch,
|
||||||
|
PGRegexNotMatch,
|
||||||
|
PGRegexNotIMatch,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for BinaryOperator {
|
impl fmt::Display for BinaryOperator {
|
||||||
|
@ -110,6 +114,10 @@ impl fmt::Display for BinaryOperator {
|
||||||
BinaryOperator::PGBitwiseXor => "#",
|
BinaryOperator::PGBitwiseXor => "#",
|
||||||
BinaryOperator::PGBitwiseShiftLeft => "<<",
|
BinaryOperator::PGBitwiseShiftLeft => "<<",
|
||||||
BinaryOperator::PGBitwiseShiftRight => ">>",
|
BinaryOperator::PGBitwiseShiftRight => ">>",
|
||||||
|
BinaryOperator::PGRegexMatch => "~",
|
||||||
|
BinaryOperator::PGRegexIMatch => "~*",
|
||||||
|
BinaryOperator::PGRegexNotMatch => "!~",
|
||||||
|
BinaryOperator::PGRegexNotIMatch => "!~*",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -835,6 +835,10 @@ impl<'a> Parser<'a> {
|
||||||
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
|
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
|
||||||
Some(BinaryOperator::PGBitwiseXor)
|
Some(BinaryOperator::PGBitwiseXor)
|
||||||
}
|
}
|
||||||
|
Token::Tilde => Some(BinaryOperator::PGRegexMatch),
|
||||||
|
Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch),
|
||||||
|
Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch),
|
||||||
|
Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch),
|
||||||
Token::Word(w) => match w.keyword {
|
Token::Word(w) => match w.keyword {
|
||||||
Keyword::AND => Some(BinaryOperator::And),
|
Keyword::AND => Some(BinaryOperator::And),
|
||||||
Keyword::OR => Some(BinaryOperator::Or),
|
Keyword::OR => Some(BinaryOperator::Or),
|
||||||
|
@ -993,6 +997,10 @@ impl<'a> Parser<'a> {
|
||||||
| Token::Gt
|
| Token::Gt
|
||||||
| Token::GtEq
|
| Token::GtEq
|
||||||
| Token::DoubleEq
|
| Token::DoubleEq
|
||||||
|
| Token::Tilde
|
||||||
|
| Token::TildeAsterisk
|
||||||
|
| Token::ExclamationMarkTilde
|
||||||
|
| Token::ExclamationMarkTildeAsterisk
|
||||||
| Token::Spaceship => Ok(20),
|
| Token::Spaceship => Ok(20),
|
||||||
Token::Pipe => Ok(21),
|
Token::Pipe => Ok(21),
|
||||||
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
|
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
|
||||||
|
|
|
@ -108,8 +108,14 @@ pub enum Token {
|
||||||
RArrow,
|
RArrow,
|
||||||
/// Sharp `#` used for PostgreSQL Bitwise XOR operator
|
/// Sharp `#` used for PostgreSQL Bitwise XOR operator
|
||||||
Sharp,
|
Sharp,
|
||||||
/// Tilde `~` used for PostgreSQL Bitwise NOT operator
|
/// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular operator
|
||||||
Tilde,
|
Tilde,
|
||||||
|
/// `~*` , a case insensitive match regular operator in PostgreSQL
|
||||||
|
TildeAsterisk,
|
||||||
|
/// `!~` , a case sensitive not match regular operator in PostgreSQL
|
||||||
|
ExclamationMarkTilde,
|
||||||
|
/// `!~*` , a case insensitive not match regular operator in PostgreSQL
|
||||||
|
ExclamationMarkTildeAsterisk,
|
||||||
/// `<<`, a bitwise shift left operator in PostgreSQL
|
/// `<<`, a bitwise shift left operator in PostgreSQL
|
||||||
ShiftLeft,
|
ShiftLeft,
|
||||||
/// `>>`, a bitwise shift right operator in PostgreSQL
|
/// `>>`, a bitwise shift right operator in PostgreSQL
|
||||||
|
@ -171,6 +177,9 @@ impl fmt::Display for Token {
|
||||||
Token::ExclamationMark => f.write_str("!"),
|
Token::ExclamationMark => f.write_str("!"),
|
||||||
Token::DoubleExclamationMark => f.write_str("!!"),
|
Token::DoubleExclamationMark => f.write_str("!!"),
|
||||||
Token::Tilde => f.write_str("~"),
|
Token::Tilde => f.write_str("~"),
|
||||||
|
Token::TildeAsterisk => f.write_str("~*"),
|
||||||
|
Token::ExclamationMarkTilde => f.write_str("!~"),
|
||||||
|
Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
|
||||||
Token::AtSign => f.write_str("@"),
|
Token::AtSign => f.write_str("@"),
|
||||||
Token::ShiftLeft => f.write_str("<<"),
|
Token::ShiftLeft => f.write_str("<<"),
|
||||||
Token::ShiftRight => f.write_str(">>"),
|
Token::ShiftRight => f.write_str(">>"),
|
||||||
|
@ -486,6 +495,14 @@ impl<'a> Tokenizer<'a> {
|
||||||
match chars.peek() {
|
match chars.peek() {
|
||||||
Some('=') => self.consume_and_return(chars, Token::Neq),
|
Some('=') => self.consume_and_return(chars, Token::Neq),
|
||||||
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
|
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
|
||||||
|
Some('~') => {
|
||||||
|
chars.next();
|
||||||
|
match chars.peek() {
|
||||||
|
Some('*') => self
|
||||||
|
.consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
|
||||||
|
_ => Ok(Some(Token::ExclamationMarkTilde)),
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => Ok(Some(Token::ExclamationMark)),
|
_ => Ok(Some(Token::ExclamationMark)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -535,7 +552,13 @@ impl<'a> Tokenizer<'a> {
|
||||||
comment,
|
comment,
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
'~' => self.consume_and_return(chars, Token::Tilde),
|
'~' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
match chars.peek() {
|
||||||
|
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
|
||||||
|
_ => Ok(Some(Token::Tilde)),
|
||||||
|
}
|
||||||
|
}
|
||||||
'#' => self.consume_and_return(chars, Token::Sharp),
|
'#' => self.consume_and_return(chars, Token::Sharp),
|
||||||
'@' => self.consume_and_return(chars, Token::AtSign),
|
'@' => self.consume_and_return(chars, Token::AtSign),
|
||||||
other => self.consume_and_return(chars, Token::Char(other)),
|
other => self.consume_and_return(chars, Token::Char(other)),
|
||||||
|
@ -1111,6 +1134,45 @@ mod tests {
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_pg_regex_match() {
|
||||||
|
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
|
||||||
|
let dialect = GenericDialect {};
|
||||||
|
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::make_keyword("SELECT"),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word("col", None),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Tilde,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::SingleQuotedString("^a".into()),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word("col", None),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::TildeAsterisk,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::SingleQuotedString("^a".into()),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word("col", None),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::ExclamationMarkTilde,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::SingleQuotedString("^a".into()),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word("col", None),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::ExclamationMarkTildeAsterisk,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::SingleQuotedString("^a".into()),
|
||||||
|
];
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
||||||
//println!("------------------------------");
|
//println!("------------------------------");
|
||||||
//println!("tokens = {:?}", actual);
|
//println!("tokens = {:?}", actual);
|
||||||
|
|
|
@ -647,6 +647,28 @@ fn parse_pg_postfix_factorial() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_pg_regex_match_ops() {
|
||||||
|
let pg_regex_match_ops = &[
|
||||||
|
("~", BinaryOperator::PGRegexMatch),
|
||||||
|
("~*", BinaryOperator::PGRegexIMatch),
|
||||||
|
("!~", BinaryOperator::PGRegexNotMatch),
|
||||||
|
("!~*", BinaryOperator::PGRegexNotIMatch),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (str_op, op) in pg_regex_match_ops {
|
||||||
|
let select = pg().verified_only_select(&format!("SELECT 'abc' {} '^a'", &str_op));
|
||||||
|
assert_eq!(
|
||||||
|
SelectItem::UnnamedExpr(Expr::BinaryOp {
|
||||||
|
left: Box::new(Expr::Value(Value::SingleQuotedString("abc".into()))),
|
||||||
|
op: op.clone(),
|
||||||
|
right: Box::new(Expr::Value(Value::SingleQuotedString("^a".into()))),
|
||||||
|
}),
|
||||||
|
select.projection[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn pg() -> TestedDialects {
|
fn pg() -> TestedDialects {
|
||||||
TestedDialects {
|
TestedDialects {
|
||||||
dialects: vec![Box::new(PostgreSqlDialect {})],
|
dialects: vec![Box::new(PostgreSqlDialect {})],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue