mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-03 20:50:33 +00:00
Support Dialect
level precedence, update Postgres Dialect
to match Postgres (#1360)
This commit is contained in:
parent
8f8c96f87f
commit
a5480ae498
6 changed files with 440 additions and 130 deletions
|
@ -875,7 +875,7 @@ impl<'a> Parser<'a> {
|
|||
/// Parse a new expression.
|
||||
pub fn parse_expr(&mut self) -> Result<Expr, ParserError> {
|
||||
let _guard = self.recursion_counter.try_decrease()?;
|
||||
self.parse_subexpr(0)
|
||||
self.parse_subexpr(self.dialect.prec_unknown())
|
||||
}
|
||||
|
||||
/// Parse tokens until the precedence changes.
|
||||
|
@ -897,7 +897,7 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
pub fn parse_interval_expr(&mut self) -> Result<Expr, ParserError> {
|
||||
let precedence = 0;
|
||||
let precedence = self.dialect.prec_unknown();
|
||||
let mut expr = self.parse_prefix()?;
|
||||
|
||||
loop {
|
||||
|
@ -918,9 +918,9 @@ impl<'a> Parser<'a> {
|
|||
let token = self.peek_token();
|
||||
|
||||
match token.token {
|
||||
Token::Word(w) if w.keyword == Keyword::AND => Ok(0),
|
||||
Token::Word(w) if w.keyword == Keyword::OR => Ok(0),
|
||||
Token::Word(w) if w.keyword == Keyword::XOR => Ok(0),
|
||||
Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()),
|
||||
Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()),
|
||||
Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()),
|
||||
_ => self.get_next_precedence(),
|
||||
}
|
||||
}
|
||||
|
@ -1079,7 +1079,7 @@ impl<'a> Parser<'a> {
|
|||
self.parse_bigquery_struct_literal()
|
||||
}
|
||||
Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => {
|
||||
let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?;
|
||||
let expr = self.parse_subexpr(self.dialect.prec_plus_minus())?;
|
||||
Ok(Expr::Prior(Box::new(expr)))
|
||||
}
|
||||
Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => {
|
||||
|
@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
|
|||
};
|
||||
Ok(Expr::UnaryOp {
|
||||
op,
|
||||
expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?),
|
||||
expr: Box::new(self.parse_subexpr(self.dialect.prec_mul_div_mod_op())?),
|
||||
})
|
||||
}
|
||||
tok @ Token::DoubleExclamationMark
|
||||
|
@ -1187,7 +1187,7 @@ impl<'a> Parser<'a> {
|
|||
};
|
||||
Ok(Expr::UnaryOp {
|
||||
op,
|
||||
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
|
||||
expr: Box::new(self.parse_subexpr(self.dialect.prec_plus_minus())?),
|
||||
})
|
||||
}
|
||||
Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
|
||||
|
@ -1716,12 +1716,13 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
pub fn parse_position_expr(&mut self, ident: Ident) -> Result<Expr, ParserError> {
|
||||
let between_prec = self.dialect.prec_between();
|
||||
let position_expr = self.maybe_parse(|p| {
|
||||
// PARSE SELECT POSITION('@' in field)
|
||||
p.expect_token(&Token::LParen)?;
|
||||
|
||||
// Parse the subexpr till the IN keyword
|
||||
let expr = p.parse_subexpr(Self::BETWEEN_PREC)?;
|
||||
let expr = p.parse_subexpr(between_prec)?;
|
||||
p.expect_keyword(Keyword::IN)?;
|
||||
let from = p.parse_expr()?;
|
||||
p.expect_token(&Token::RParen)?;
|
||||
|
@ -1963,12 +1964,12 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
_ => Ok(Expr::UnaryOp {
|
||||
op: UnaryOperator::Not,
|
||||
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
|
||||
expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?),
|
||||
}),
|
||||
},
|
||||
_ => Ok(Expr::UnaryOp {
|
||||
op: UnaryOperator::Not,
|
||||
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
|
||||
expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
@ -2641,7 +2642,7 @@ impl<'a> Parser<'a> {
|
|||
Ok(Expr::RLike {
|
||||
negated,
|
||||
expr: Box::new(expr),
|
||||
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
|
||||
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
|
||||
regexp,
|
||||
})
|
||||
} else if self.parse_keyword(Keyword::IN) {
|
||||
|
@ -2652,21 +2653,21 @@ impl<'a> Parser<'a> {
|
|||
Ok(Expr::Like {
|
||||
negated,
|
||||
expr: Box::new(expr),
|
||||
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
|
||||
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
|
||||
escape_char: self.parse_escape_char()?,
|
||||
})
|
||||
} else if self.parse_keyword(Keyword::ILIKE) {
|
||||
Ok(Expr::ILike {
|
||||
negated,
|
||||
expr: Box::new(expr),
|
||||
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
|
||||
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
|
||||
escape_char: self.parse_escape_char()?,
|
||||
})
|
||||
} else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) {
|
||||
Ok(Expr::SimilarTo {
|
||||
negated,
|
||||
expr: Box::new(expr),
|
||||
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
|
||||
pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?),
|
||||
escape_char: self.parse_escape_char()?,
|
||||
})
|
||||
} else {
|
||||
|
@ -2941,9 +2942,9 @@ impl<'a> Parser<'a> {
|
|||
pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParserError> {
|
||||
// Stop parsing subexpressions for <low> and <high> on tokens with
|
||||
// precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc.
|
||||
let low = self.parse_subexpr(Self::BETWEEN_PREC)?;
|
||||
let low = self.parse_subexpr(self.dialect.prec_between())?;
|
||||
self.expect_keyword(Keyword::AND)?;
|
||||
let high = self.parse_subexpr(Self::BETWEEN_PREC)?;
|
||||
let high = self.parse_subexpr(self.dialect.prec_between())?;
|
||||
Ok(Expr::Between {
|
||||
expr: Box::new(expr),
|
||||
negated,
|
||||
|
@ -2962,118 +2963,9 @@ impl<'a> Parser<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
// Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference
|
||||
// higher number = higher precedence
|
||||
//
|
||||
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
|
||||
// actually has higher precedence than addition.
|
||||
// See https://postgrespro.com/list/thread-id/2673331.
|
||||
const AT_TZ_PREC: u8 = 41;
|
||||
const MUL_DIV_MOD_OP_PREC: u8 = 40;
|
||||
const PLUS_MINUS_PREC: u8 = 30;
|
||||
const XOR_PREC: u8 = 24;
|
||||
const BETWEEN_PREC: u8 = 20;
|
||||
const LIKE_PREC: u8 = 19;
|
||||
const IS_PREC: u8 = 17;
|
||||
const PG_OTHER_PREC: u8 = 16;
|
||||
const UNARY_NOT_PREC: u8 = 15;
|
||||
const AND_PREC: u8 = 10;
|
||||
const OR_PREC: u8 = 5;
|
||||
|
||||
/// Get the precedence of the next token
|
||||
pub fn get_next_precedence(&self) -> Result<u8, ParserError> {
|
||||
// allow the dialect to override precedence logic
|
||||
if let Some(precedence) = self.dialect.get_next_precedence(self) {
|
||||
return precedence;
|
||||
}
|
||||
|
||||
let token = self.peek_token();
|
||||
debug!("get_next_precedence() {:?}", token);
|
||||
let [token_0, token_1, token_2] = self.peek_tokens_with_location();
|
||||
debug!("0: {token_0} 1: {token_1} 2: {token_2}");
|
||||
match token.token {
|
||||
Token::Word(w) if w.keyword == Keyword::OR => Ok(Self::OR_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::AND => Ok(Self::AND_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::XOR => Ok(Self::XOR_PREC),
|
||||
|
||||
Token::Word(w) if w.keyword == Keyword::AT => {
|
||||
match (self.peek_nth_token(1).token, self.peek_nth_token(2).token) {
|
||||
(Token::Word(w), Token::Word(w2))
|
||||
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
|
||||
{
|
||||
Ok(Self::AT_TZ_PREC)
|
||||
}
|
||||
_ => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token {
|
||||
// The precedence of NOT varies depending on keyword that
|
||||
// follows it. If it is followed by IN, BETWEEN, or LIKE,
|
||||
// it takes on the precedence of those tokens. Otherwise, it
|
||||
// is not an infix operator, and therefore has zero
|
||||
// precedence.
|
||||
Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
|
||||
_ => Ok(0),
|
||||
},
|
||||
Token::Word(w) if w.keyword == Keyword::IS => Ok(Self::IS_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC),
|
||||
Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC),
|
||||
Token::Eq
|
||||
| Token::Lt
|
||||
| Token::LtEq
|
||||
| Token::Neq
|
||||
| Token::Gt
|
||||
| Token::GtEq
|
||||
| Token::DoubleEq
|
||||
| Token::Tilde
|
||||
| Token::TildeAsterisk
|
||||
| Token::ExclamationMarkTilde
|
||||
| Token::ExclamationMarkTildeAsterisk
|
||||
| Token::DoubleTilde
|
||||
| Token::DoubleTildeAsterisk
|
||||
| Token::ExclamationMarkDoubleTilde
|
||||
| Token::ExclamationMarkDoubleTildeAsterisk
|
||||
| Token::Spaceship => Ok(20),
|
||||
Token::Pipe => Ok(21),
|
||||
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
|
||||
Token::Ampersand => Ok(23),
|
||||
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
|
||||
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
|
||||
Ok(Self::MUL_DIV_MOD_OP_PREC)
|
||||
}
|
||||
Token::DoubleColon => Ok(50),
|
||||
Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
|
||||
Token::ExclamationMark => Ok(50),
|
||||
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
|
||||
Token::Arrow
|
||||
| Token::LongArrow
|
||||
| Token::HashArrow
|
||||
| Token::HashLongArrow
|
||||
| Token::AtArrow
|
||||
| Token::ArrowAt
|
||||
| Token::HashMinus
|
||||
| Token::AtQuestion
|
||||
| Token::AtAt
|
||||
| Token::Question
|
||||
| Token::QuestionAnd
|
||||
| Token::QuestionPipe
|
||||
| Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC),
|
||||
_ => Ok(0),
|
||||
}
|
||||
self.dialect.get_next_precedence_full(self)
|
||||
}
|
||||
|
||||
/// Return the first non-whitespace token that has not yet been processed
|
||||
|
@ -8051,7 +7943,7 @@ impl<'a> Parser<'a> {
|
|||
format_clause: None,
|
||||
})
|
||||
} else {
|
||||
let body = self.parse_boxed_query_body(0)?;
|
||||
let body = self.parse_boxed_query_body(self.dialect.prec_unknown())?;
|
||||
|
||||
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
|
||||
let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue