Merge pull request #328 from b41sh/regexp_match

Add support for PostgreSQL regex match
This commit is contained in:
Andrew Lamb 2021-08-20 13:54:24 -04:00 committed by GitHub
commit 783bc21d29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 2 deletions

View file

@ -81,6 +81,10 @@ pub enum BinaryOperator {
PGBitwiseXor,
PGBitwiseShiftLeft,
PGBitwiseShiftRight,
PGRegexMatch,
PGRegexIMatch,
PGRegexNotMatch,
PGRegexNotIMatch,
}
impl fmt::Display for BinaryOperator {
@ -111,6 +115,10 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::PGBitwiseXor => "#",
BinaryOperator::PGBitwiseShiftLeft => "<<",
BinaryOperator::PGBitwiseShiftRight => ">>",
BinaryOperator::PGRegexMatch => "~",
BinaryOperator::PGRegexIMatch => "~*",
BinaryOperator::PGRegexNotMatch => "!~",
BinaryOperator::PGRegexNotIMatch => "!~*",
})
}
}

View file

@ -862,6 +862,10 @@ impl<'a> Parser<'a> {
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
Some(BinaryOperator::PGBitwiseXor)
}
Token::Tilde => Some(BinaryOperator::PGRegexMatch),
Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch),
Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch),
Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch),
Token::Word(w) => match w.keyword {
Keyword::AND => Some(BinaryOperator::And),
Keyword::OR => Some(BinaryOperator::Or),
@ -1020,6 +1024,10 @@ impl<'a> Parser<'a> {
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::Spaceship => Ok(20),
Token::Pipe => Ok(21),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),

View file

@ -109,8 +109,14 @@ pub enum Token {
RArrow,
/// Sharp `#` used for PostgreSQL Bitwise XOR operator
Sharp,
/// Tilde `~` used for PostgreSQL Bitwise NOT operator
/// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular expression operator
Tilde,
/// `~*` , a case insensitive match regular expression operator in PostgreSQL
TildeAsterisk,
/// `!~` , a case sensitive not match regular expression operator in PostgreSQL
ExclamationMarkTilde,
/// `!~*` , a case insensitive not match regular expression operator in PostgreSQL
ExclamationMarkTildeAsterisk,
/// `<<`, a bitwise shift left operator in PostgreSQL
ShiftLeft,
/// `>>`, a bitwise shift right operator in PostgreSQL
@ -172,6 +178,9 @@ impl fmt::Display for Token {
Token::ExclamationMark => f.write_str("!"),
Token::DoubleExclamationMark => f.write_str("!!"),
Token::Tilde => f.write_str("~"),
Token::TildeAsterisk => f.write_str("~*"),
Token::ExclamationMarkTilde => f.write_str("!~"),
Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
Token::AtSign => f.write_str("@"),
Token::ShiftLeft => f.write_str("<<"),
Token::ShiftRight => f.write_str(">>"),
@ -489,6 +498,14 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some('=') => self.consume_and_return(chars, Token::Neq),
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
Some('~') => {
chars.next();
match chars.peek() {
Some('*') => self
.consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
_ => Ok(Some(Token::ExclamationMarkTilde)),
}
}
_ => Ok(Some(Token::ExclamationMark)),
}
}
@ -538,7 +555,13 @@ impl<'a> Tokenizer<'a> {
comment,
})))
}
'~' => self.consume_and_return(chars, Token::Tilde),
'~' => {
chars.next(); // consume
match chars.peek() {
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
_ => Ok(Some(Token::Tilde)),
}
}
'#' => self.consume_and_return(chars, Token::Sharp),
'@' => self.consume_and_return(chars, Token::AtSign),
other => self.consume_and_return(chars, Token::Char(other)),
@ -1114,6 +1137,45 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_pg_regex_match() {
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::Tilde,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::TildeAsterisk,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::ExclamationMarkTilde,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::ExclamationMarkTildeAsterisk,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
];
compare(expected, tokens);
}
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------");
//println!("tokens = {:?}", actual);

View file

@ -647,6 +647,28 @@ fn parse_pg_postfix_factorial() {
}
}
#[test]
fn parse_pg_regex_match_ops() {
let pg_regex_match_ops = &[
("~", BinaryOperator::PGRegexMatch),
("~*", BinaryOperator::PGRegexIMatch),
("!~", BinaryOperator::PGRegexNotMatch),
("!~*", BinaryOperator::PGRegexNotIMatch),
];
for (str_op, op) in pg_regex_match_ops {
let select = pg().verified_only_select(&format!("SELECT 'abc' {} '^a'", &str_op));
assert_eq!(
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Value(Value::SingleQuotedString("abc".into()))),
op: op.clone(),
right: Box::new(Expr::Value(Value::SingleQuotedString("^a".into()))),
}),
select.projection[0]
);
}
}
fn pg() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(PostgreSqlDialect {})],