Support Dialect level precedence, update Postgres Dialect to match Postgres (#1360)

This commit is contained in:
Samuel Colvin 2024-08-06 12:49:37 +01:00 committed by GitHub
parent 8f8c96f87f
commit a5480ae498
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 440 additions and 130 deletions

View file

@ -151,7 +151,7 @@ pub enum BinaryOperator {
Arrow,
/// The `->>` operator.
///
/// On PostgreSQL, this operator that extracts a JSON object field or JSON
/// On PostgreSQL, this operator extracts a JSON object field or JSON
/// array element and converts it to text, for example `'{"a":"b"}'::json
/// ->> 'a'` or `[1, 2, 3]'::json ->> 2`.
///

View file

@ -24,12 +24,13 @@ mod redshift;
mod snowflake;
mod sqlite;
use crate::ast::{Expr, Statement};
use core::any::{Any, TypeId};
use core::fmt::Debug;
use core::iter::Peekable;
use core::str::Chars;
use log::debug;
pub use self::ansi::AnsiDialect;
pub use self::bigquery::BigQueryDialect;
pub use self::clickhouse::ClickHouseDialect;
@ -43,8 +44,11 @@ pub use self::postgresql::PostgreSqlDialect;
pub use self::redshift::RedshiftSqlDialect;
pub use self::snowflake::SnowflakeDialect;
pub use self::sqlite::SQLiteDialect;
use crate::ast::{Expr, Statement};
pub use crate::keywords;
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Token;
#[cfg(not(feature = "std"))]
use alloc::boxed::Box;
@ -300,13 +304,172 @@ pub trait Dialect: Debug + Any {
// return None to fall back to the default behavior
None
}
/// Get the precedence of the next token. This "full" method means all precedence logic and remain
/// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to
/// fallback to the default behavior.
///
/// Higher number => higher precedence
fn get_next_precedence_full(&self, parser: &Parser) -> Result<u8, ParserError> {
if let Some(precedence) = self.get_next_precedence(parser) {
return precedence;
}
let token = parser.peek_token();
debug!("get_next_precedence() {:?}", token);
match token.token {
Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC),
Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC),
Token::Word(w) if w.keyword == Keyword::AT => {
match (
parser.peek_nth_token(1).token,
parser.peek_nth_token(2).token,
) {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
Ok(AT_TZ_PREC)
}
_ => Ok(UNKNOWN_PREC),
}
}
Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
// The precedence of NOT varies depending on keyword that
// follows it. If it is followed by IN, BETWEEN, or LIKE,
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
_ => Ok(UNKNOWN_PREC),
},
Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC),
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC),
Token::Eq
| Token::Lt
| Token::LtEq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::DoubleTilde
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => Ok(EQ_PREC),
Token::Pipe => Ok(PIPE_PREC),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC),
Token::Ampersand => Ok(AMPERSAND_PREC),
Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC),
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
Ok(MUL_DIV_MOD_OP_PREC)
}
Token::DoubleColon
| Token::ExclamationMark
| Token::LBracket
| Token::Overlap
| Token::CaretAt => Ok(DOUBLE_COLON_PREC),
// Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC),
Token::Arrow
| Token::LongArrow
| Token::HashArrow
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
| Token::HashMinus
| Token::AtQuestion
| Token::AtAt
| Token::Question
| Token::QuestionAnd
| Token::QuestionPipe
| Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC),
_ => Ok(UNKNOWN_PREC),
}
}
/// Dialect-specific statement parser override
fn parse_statement(&self, _parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
// return None to fall back to the default behavior
None
}
/// The following precedence values are used directly by `Parse` or in dialects,
/// so have to be made public by the dialect.
fn prec_double_colon(&self) -> u8 {
DOUBLE_COLON_PREC
}
fn prec_mul_div_mod_op(&self) -> u8 {
MUL_DIV_MOD_OP_PREC
}
fn prec_plus_minus(&self) -> u8 {
PLUS_MINUS_PREC
}
fn prec_between(&self) -> u8 {
BETWEEN_PREC
}
fn prec_like(&self) -> u8 {
LIKE_PREC
}
fn prec_unary_not(&self) -> u8 {
UNARY_NOT_PREC
}
fn prec_unknown(&self) -> u8 {
UNKNOWN_PREC
}
}
// Define the lexical Precedence of operators.
//
// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
// higher number = higher precedence
//
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
// actually has higher precedence than addition.
// See <https://postgrespro.com/list/thread-id/2673331>.
const DOUBLE_COLON_PREC: u8 = 50;
const AT_TZ_PREC: u8 = 41;
const MUL_DIV_MOD_OP_PREC: u8 = 40;
const PLUS_MINUS_PREC: u8 = 30;
const XOR_PREC: u8 = 24;
const AMPERSAND_PREC: u8 = 23;
const CARET_PREC: u8 = 22;
const PIPE_PREC: u8 = 21;
const BETWEEN_PREC: u8 = 20;
const EQ_PREC: u8 = 20;
const LIKE_PREC: u8 = 19;
const IS_PREC: u8 = 17;
const PG_OTHER_PREC: u8 = 16;
const UNARY_NOT_PREC: u8 = 15;
const AND_PREC: u8 = 10;
const OR_PREC: u8 = 5;
const UNKNOWN_PREC: u8 = 0;
impl dyn Dialect {
#[inline]
pub fn is<T: Dialect>(&self) -> bool {

View file

@ -9,6 +9,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use log::debug;
use crate::ast::{CommentObject, Statement};
use crate::dialect::Dialect;
@ -20,6 +21,23 @@ use crate::tokenizer::Token;
#[derive(Debug)]
pub struct PostgreSqlDialect {}
const DOUBLE_COLON_PREC: u8 = 140;
const BRACKET_PREC: u8 = 130;
const COLLATE_PREC: u8 = 120;
const AT_TZ_PREC: u8 = 110;
const CARET_PREC: u8 = 100;
const MUL_DIV_MOD_OP_PREC: u8 = 90;
const PLUS_MINUS_PREC: u8 = 80;
// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests
const XOR_PREC: u8 = 75;
const PG_OTHER_PREC: u8 = 70;
const BETWEEN_LIKE_PREC: u8 = 60;
const EQ_PREC: u8 = 50;
const IS_PREC: u8 = 40;
const NOT_PREC: u8 = 30;
const AND_PREC: u8 = 20;
const OR_PREC: u8 = 10;
impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
Some('"')
@ -67,6 +85,102 @@ impl Dialect for PostgreSqlDialect {
)
}
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
let token = parser.peek_token();
debug!("get_next_precedence() {:?}", token);
let precedence = match token.token {
Token::Word(w) if w.keyword == Keyword::OR => OR_PREC,
Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC,
Token::Word(w) if w.keyword == Keyword::AND => AND_PREC,
Token::Word(w) if w.keyword == Keyword::AT => {
match (
parser.peek_nth_token(1).token,
parser.peek_nth_token(2).token,
) {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
AT_TZ_PREC
}
_ => self.prec_unknown(),
}
}
Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
// The precedence of NOT varies depending on keyword that
// follows it. If it is followed by IN, BETWEEN, or LIKE,
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
_ => self.prec_unknown(),
},
Token::Word(w) if w.keyword == Keyword::IS => IS_PREC,
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC,
Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC,
Token::Eq
| Token::Lt
| Token::LtEq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::DoubleTilde
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => EQ_PREC,
Token::Caret => CARET_PREC,
Token::Plus | Token::Minus => PLUS_MINUS_PREC,
Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC,
Token::DoubleColon => DOUBLE_COLON_PREC,
Token::LBracket => BRACKET_PREC,
Token::Arrow
| Token::LongArrow
| Token::HashArrow
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
| Token::HashMinus
| Token::AtQuestion
| Token::AtAt
| Token::Question
| Token::QuestionAnd
| Token::QuestionPipe
| Token::ExclamationMark
| Token::Overlap
| Token::CaretAt
| Token::StringConcat
| Token::Sharp
| Token::ShiftRight
| Token::ShiftLeft
| Token::Pipe
| Token::Ampersand
| Token::CustomBinaryOperator(_) => PG_OTHER_PREC,
_ => self.prec_unknown(),
};
Some(Ok(precedence))
}
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
if parser.parse_keyword(Keyword::COMMENT) {
Some(parse_comment(parser))
@ -82,6 +196,26 @@ impl Dialect for PostgreSqlDialect {
fn supports_group_by_expr(&self) -> bool {
true
}
fn prec_mul_div_mod_op(&self) -> u8 {
MUL_DIV_MOD_OP_PREC
}
fn prec_plus_minus(&self) -> u8 {
PLUS_MINUS_PREC
}
fn prec_between(&self) -> u8 {
BETWEEN_LIKE_PREC
}
fn prec_like(&self) -> u8 {
BETWEEN_LIKE_PREC
}
fn prec_unary_not(&self) -> u8 {
NOT_PREC
}
}
pub fn parse_comment(parser: &mut Parser) -> Result<Statement, ParserError> {

View file

@ -145,6 +145,15 @@ impl Dialect for SnowflakeDialect {
None
}
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
let token = parser.peek_token();
// Snowflake supports the `:` cast operator unlike other dialects
match token.token {
Token::Colon => Some(Ok(self.prec_double_colon())),
_ => None,
}
}
}
/// Parse snowflake create table statement.

View file

@ -875,7 +875,7 @@ impl<'a> Parser<'a> {
/// Parse a new expression.
pub fn parse_expr(&mut self) -> Result<Expr, ParserError> {
let _guard = self.recursion_counter.try_decrease()?;
self.parse_subexpr(0)
self.parse_subexpr(self.dialect.prec_unknown())
}
/// Parse tokens until the precedence changes.
@ -897,7 +897,7 @@ impl<'a> Parser<'a> {
}
pub fn parse_interval_expr(&mut self) -> Result<Expr, ParserError> {
let precedence = 0;
let precedence = self.dialect.prec_unknown();
let mut expr = self.parse_prefix()?;
loop {
@ -918,9 +918,9 @@ impl<'a> Parser<'a> {
let token = self.peek_token();
match token.token {
Token::Word(w) if w.keyword == Keyword::AND => Ok(0),
Token::Word(w) if w.keyword == Keyword::OR => Ok(0),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(0),
Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()),
Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()),
_ => self.get_next_precedence(),
}
}
@ -1079,7 +1079,7 @@ impl<'a> Parser<'a> {
self.parse_bigquery_struct_literal()
}
Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => {
let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?;
let expr = self.parse_subexpr(self.dialect.prec_plus_minus())?;
Ok(Expr::Prior(Box::new(expr)))
}
Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => {
@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
};
Ok(Expr::UnaryOp {
op,
expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?),
expr: Box::new(self.parse_subexpr(self.dialect.prec_mul_div_mod_op())?),
})
}
tok @ Token::DoubleExclamationMark
@ -1187,7 +1187,7 @@ impl<'a> Parser<'a> {
};
Ok(Expr::UnaryOp {
op,
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
expr: Box::new(self.parse_subexpr(self.dialect.prec_plus_minus())?),
})
}
Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
@ -1716,12 +1716,13 @@ impl<'a> Parser<'a> {
}
pub fn parse_position_expr(&mut self, ident: Ident) -> Result<Expr, ParserError> {
let between_prec = self.dialect.prec_between();
let position_expr = self.maybe_parse(|p| {
// PARSE SELECT POSITION('@' in field)
p.expect_token(&Token::LParen)?;
// Parse the subexpr till the IN keyword
let expr = p.parse_subexpr(Self::BETWEEN_PREC)?;
let expr = p.parse_subexpr(between_prec)?;
p.expect_keyword(Keyword::IN)?;
let from = p.parse_expr()?;
p.expect_token(&Token::RParen)?;
@ -1963,12 +1964,12 @@ impl<'a> Parser<'a> {
}
_ => Ok(Expr::UnaryOp {
op: UnaryOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?),
}),
},
_ => Ok(Expr::UnaryOp {
op: UnaryOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?),
}),
}
}
@ -2641,7 +2642,7 @@ impl<'a> Parser<'a> {
Ok(Expr::RLike {
negated,
expr: Box::new(expr),
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
regexp,
})
} else if self.parse_keyword(Keyword::IN) {
@ -2652,21 +2653,21 @@ impl<'a> Parser<'a> {
Ok(Expr::Like {
negated,
expr: Box::new(expr),
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
escape_char: self.parse_escape_char()?,
})
} else if self.parse_keyword(Keyword::ILIKE) {
Ok(Expr::ILike {
negated,
expr: Box::new(expr),
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
escape_char: self.parse_escape_char()?,
})
} else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) {
Ok(Expr::SimilarTo {
negated,
expr: Box::new(expr),
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
escape_char: self.parse_escape_char()?,
})
} else {
@ -2941,9 +2942,9 @@ impl<'a> Parser<'a> {
pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParserError> {
// Stop parsing subexpressions for <low> and <high> on tokens with
// precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc.
let low = self.parse_subexpr(Self::BETWEEN_PREC)?;
let low = self.parse_subexpr(self.dialect.prec_between())?;
self.expect_keyword(Keyword::AND)?;
let high = self.parse_subexpr(Self::BETWEEN_PREC)?;
let high = self.parse_subexpr(self.dialect.prec_between())?;
Ok(Expr::Between {
expr: Box::new(expr),
negated,
@ -2962,118 +2963,9 @@ impl<'a> Parser<'a> {
})
}
// Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference
// higher number = higher precedence
//
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
// actually has higher precedence than addition.
// See https://postgrespro.com/list/thread-id/2673331.
const AT_TZ_PREC: u8 = 41;
const MUL_DIV_MOD_OP_PREC: u8 = 40;
const PLUS_MINUS_PREC: u8 = 30;
const XOR_PREC: u8 = 24;
const BETWEEN_PREC: u8 = 20;
const LIKE_PREC: u8 = 19;
const IS_PREC: u8 = 17;
const PG_OTHER_PREC: u8 = 16;
const UNARY_NOT_PREC: u8 = 15;
const AND_PREC: u8 = 10;
const OR_PREC: u8 = 5;
/// Get the precedence of the next token
pub fn get_next_precedence(&self) -> Result<u8, ParserError> {
// allow the dialect to override precedence logic
if let Some(precedence) = self.dialect.get_next_precedence(self) {
return precedence;
}
let token = self.peek_token();
debug!("get_next_precedence() {:?}", token);
let [token_0, token_1, token_2] = self.peek_tokens_with_location();
debug!("0: {token_0} 1: {token_1} 2: {token_2}");
match token.token {
Token::Word(w) if w.keyword == Keyword::OR => Ok(Self::OR_PREC),
Token::Word(w) if w.keyword == Keyword::AND => Ok(Self::AND_PREC),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(Self::XOR_PREC),
Token::Word(w) if w.keyword == Keyword::AT => {
match (self.peek_nth_token(1).token, self.peek_nth_token(2).token) {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
Ok(Self::AT_TZ_PREC)
}
_ => Ok(0),
}
}
Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token {
// The precedence of NOT varies depending on keyword that
// follows it. If it is followed by IN, BETWEEN, or LIKE,
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
_ => Ok(0),
},
Token::Word(w) if w.keyword == Keyword::IS => Ok(Self::IS_PREC),
Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC),
Token::Eq
| Token::Lt
| Token::LtEq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::DoubleTilde
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => Ok(20),
Token::Pipe => Ok(21),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
Token::Ampersand => Ok(23),
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
Ok(Self::MUL_DIV_MOD_OP_PREC)
}
Token::DoubleColon => Ok(50),
Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
Token::ExclamationMark => Ok(50),
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
Token::Arrow
| Token::LongArrow
| Token::HashArrow
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
| Token::HashMinus
| Token::AtQuestion
| Token::AtAt
| Token::Question
| Token::QuestionAnd
| Token::QuestionPipe
| Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC),
_ => Ok(0),
}
self.dialect.get_next_precedence_full(self)
}
/// Return the first non-whitespace token that has not yet been processed
@ -8051,7 +7943,7 @@ impl<'a> Parser<'a> {
format_clause: None,
})
} else {
let body = self.parse_boxed_query_body(0)?;
let body = self.parse_boxed_query_body(self.dialect.prec_unknown())?;
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;

View file

@ -4476,3 +4476,115 @@ fn test_unicode_string_literal() {
}
}
}
fn check_arrow_precedence(sql: &str, arrow_operator: BinaryOperator) {
assert_eq!(
pg().verified_stmt(sql),
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident {
value: "foo".to_string(),
quote_style: None,
})),
op: arrow_operator,
right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))),
}),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))),
})],
into: None,
from: vec![],
lateral_views: vec![],
prewhere: None,
selection: None,
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
having: None,
named_window: vec![],
qualify: None,
window_before_qualify: false,
value_table_mode: None,
connect_by: None,
}))),
order_by: None,
limit: None,
limit_by: vec![],
offset: None,
fetch: None,
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
)
}
#[test]
fn arrow_precedence() {
check_arrow_precedence("SELECT foo -> 'bar' = 'spam'", BinaryOperator::Arrow);
}
#[test]
fn long_arrow_precedence() {
check_arrow_precedence("SELECT foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow);
}
#[test]
fn arrow_cast_precedence() {
// check this matches postgres where you would need `(foo -> 'bar')::TEXT`
let stmt = pg().verified_stmt("SELECT foo -> 'bar'::TEXT");
assert_eq!(
stmt,
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident {
value: "foo".to_string(),
quote_style: None,
})),
op: BinaryOperator::Arrow,
right: Box::new(Expr::Cast {
kind: CastKind::DoubleColon,
expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))),
data_type: DataType::Text,
format: None,
}),
})],
into: None,
from: vec![],
lateral_views: vec![],
prewhere: None,
selection: None,
group_by: GroupByExpr::Expressions(vec![], vec![]),
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
having: None,
named_window: vec![],
qualify: None,
window_before_qualify: false,
value_table_mode: None,
connect_by: None,
}))),
order_by: None,
limit: None,
limit_by: vec![],
offset: None,
fetch: None,
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
)
}