From b4699bd4a737a0b5daac3b8fc08169d84ea68d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Wed, 3 Jun 2020 18:02:05 +0200 Subject: [PATCH] Support bitwise and, or, xor (#181) Operator precedence is coming from: https://cloud.google.com/bigquery/docs/reference/standard-sql/operators --- src/ast/operator.rs | 6 ++++++ src/parser.rs | 6 ++++++ src/tokenizer.rs | 38 +++++++++++++++++++++++++++++++++----- tests/sqlparser_common.rs | 21 +++++++++++++++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 2d75c46f..c9f5eb2e 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -49,6 +49,9 @@ pub enum BinaryOperator { Or, Like, NotLike, + BitwiseOr, + BitwiseAnd, + BitwiseXor, } impl fmt::Display for BinaryOperator { @@ -70,6 +73,9 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Or => "OR", BinaryOperator::Like => "LIKE", BinaryOperator::NotLike => "NOT LIKE", + BinaryOperator::BitwiseOr => "|", + BinaryOperator::BitwiseAnd => "&", + BinaryOperator::BitwiseXor => "^", }) } } diff --git a/src/parser.rs b/src/parser.rs index 00dd2494..0a50d2d7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -578,6 +578,9 @@ impl Parser { Token::Mult => Some(BinaryOperator::Multiply), Token::Mod => Some(BinaryOperator::Modulus), Token::StringConcat => Some(BinaryOperator::StringConcat), + Token::Pipe => Some(BinaryOperator::BitwiseOr), + Token::Caret => Some(BinaryOperator::BitwiseXor), + Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), Token::Word(ref k) => match k.keyword.as_ref() { "AND" => Some(BinaryOperator::And), @@ -708,6 +711,9 @@ impl Parser { Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => { Ok(20) } + Token::Pipe => Ok(21), + Token::Caret => Ok(22), + Token::Ampersand => Ok(23), Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f3504ffb..06c52c2c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -84,8 +84,12 @@ pub enum Token { LBracket, /// Right bracket `]` RBracket, - /// Ampersand & + /// Ampersand `&` Ampersand, + /// Pipe `|` + Pipe, + /// Caret `^` + Caret, /// Left brace `{` LBrace, /// Right brace `}` @@ -125,6 +129,8 @@ impl fmt::Display for Token { Token::LBracket => f.write_str("["), Token::RBracket => f.write_str("]"), Token::Ampersand => f.write_str("&"), + Token::Caret => f.write_str("^"), + Token::Pipe => f.write_str("|"), Token::LBrace => f.write_str("{"), Token::RBrace => f.write_str("}"), } @@ -381,10 +387,8 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '|' match chars.peek() { Some('|') => self.consume_and_return(chars, Token::StringConcat), - _ => Err(TokenizerError(format!( - "Expecting to see `||`. Bitwise or operator `|` is not supported. \nError at Line: {}, Col: {}", - self.line, self.col - ))), + // Bitshift '|' operator + _ => Ok(Some(Token::Pipe)), } } '=' => self.consume_and_return(chars, Token::Eq), @@ -426,6 +430,7 @@ impl<'a> Tokenizer<'a> { '[' => self.consume_and_return(chars, Token::LBracket), ']' => self.consume_and_return(chars, Token::RBracket), '&' => self.consume_and_return(chars, Token::Ampersand), + '^' => self.consume_and_return(chars, Token::Caret), '{' => self.consume_and_return(chars, Token::LBrace), '}' => self.consume_and_return(chars, Token::RBrace), other => self.consume_and_return(chars, Token::Char(other)), @@ -594,6 +599,29 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_bitwise_op() { + let sql = String::from("SELECT one | two ^ three"); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("one", None), + Token::Whitespace(Whitespace::Space), + Token::Pipe, + Token::Whitespace(Whitespace::Space), + Token::make_word("two", None), + Token::Whitespace(Whitespace::Space), + Token::Caret, + Token::Whitespace(Whitespace::Space), + Token::make_word("three", None), + ]; + + compare(expected, tokens); + } #[test] fn tokenize_simple_select() { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 34f8c589..1e1c54e1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -680,6 +680,27 @@ fn parse_string_agg() { ); } +#[test] +fn parse_bitwise_ops() { + let bitwise_ops = &[ + ("^", BinaryOperator::BitwiseXor), + ("|", BinaryOperator::BitwiseOr), + ("&", BinaryOperator::BitwiseAnd), + ]; + + for (str_op, op) in bitwise_ops { + let select = verified_only_select(&format!("SELECT a {} b", &str_op)); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: op.clone(), + right: Box::new(Expr::Identifier(Ident::new("b"))), + }), + select.projection[0] + ); + } +} + #[test] fn parse_between() { fn chk(negated: bool) {