mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-24 16:04:04 +00:00
Add support for arbitrary map access expr (#1179)
This commit is contained in:
parent
127be97369
commit
eda86d8ed7
5 changed files with 194 additions and 116 deletions
|
@ -374,6 +374,40 @@ pub enum CastFormat {
|
||||||
ValueAtTimeZone(Value, Value),
|
ValueAtTimeZone(Value, Value),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents the syntax/style used in a map access.
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub enum MapAccessSyntax {
|
||||||
|
/// Access using bracket notation. `mymap[mykey]`
|
||||||
|
Bracket,
|
||||||
|
/// Access using period notation. `mymap.mykey`
|
||||||
|
Period,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expression used to access a value in a nested structure.
|
||||||
|
///
|
||||||
|
/// Example: `SAFE_OFFSET(0)` in
|
||||||
|
/// ```sql
|
||||||
|
/// SELECT mymap[SAFE_OFFSET(0)];
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub struct MapAccessKey {
|
||||||
|
pub key: Expr,
|
||||||
|
pub syntax: MapAccessSyntax,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for MapAccessKey {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self.syntax {
|
||||||
|
MapAccessSyntax::Bracket => write!(f, "[{}]", self.key),
|
||||||
|
MapAccessSyntax::Period => write!(f, ".{}", self.key),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// An SQL expression of any type.
|
/// An SQL expression of any type.
|
||||||
///
|
///
|
||||||
/// The parser does not distinguish between expressions of different types
|
/// The parser does not distinguish between expressions of different types
|
||||||
|
@ -638,7 +672,7 @@ pub enum Expr {
|
||||||
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
|
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
|
||||||
MapAccess {
|
MapAccess {
|
||||||
column: Box<Expr>,
|
column: Box<Expr>,
|
||||||
keys: Vec<Expr>,
|
keys: Vec<MapAccessKey>,
|
||||||
},
|
},
|
||||||
/// Scalar function call e.g. `LEFT(foo, 5)`
|
/// Scalar function call e.g. `LEFT(foo, 5)`
|
||||||
Function(Function),
|
Function(Function),
|
||||||
|
@ -774,15 +808,7 @@ impl fmt::Display for Expr {
|
||||||
match self {
|
match self {
|
||||||
Expr::Identifier(s) => write!(f, "{s}"),
|
Expr::Identifier(s) => write!(f, "{s}"),
|
||||||
Expr::MapAccess { column, keys } => {
|
Expr::MapAccess { column, keys } => {
|
||||||
write!(f, "{column}")?;
|
write!(f, "{column}{}", display_separated(keys, ""))
|
||||||
for k in keys {
|
|
||||||
match k {
|
|
||||||
k @ Expr::Value(Value::Number(_, _)) => write!(f, "[{k}]")?,
|
|
||||||
Expr::Value(Value::SingleQuotedString(s)) => write!(f, "[\"{s}\"]")?,
|
|
||||||
_ => write!(f, "[{k}]")?,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
Expr::Wildcard => f.write_str("*"),
|
Expr::Wildcard => f.write_str("*"),
|
||||||
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),
|
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),
|
||||||
|
|
|
@ -2608,23 +2608,43 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
|
pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
|
||||||
let key = self.parse_map_key()?;
|
let key = self.parse_expr()?;
|
||||||
let tok = self.consume_token(&Token::RBracket);
|
self.expect_token(&Token::RBracket)?;
|
||||||
debug!("Tok: {}", tok);
|
|
||||||
let mut key_parts: Vec<Expr> = vec![key];
|
let mut keys = vec![MapAccessKey {
|
||||||
while self.consume_token(&Token::LBracket) {
|
key,
|
||||||
let key = self.parse_map_key()?;
|
syntax: MapAccessSyntax::Bracket,
|
||||||
let tok = self.consume_token(&Token::RBracket);
|
}];
|
||||||
debug!("Tok: {}", tok);
|
loop {
|
||||||
key_parts.push(key);
|
let key = match self.peek_token().token {
|
||||||
|
Token::LBracket => {
|
||||||
|
self.next_token(); // consume `[`
|
||||||
|
let key = self.parse_expr()?;
|
||||||
|
self.expect_token(&Token::RBracket)?;
|
||||||
|
MapAccessKey {
|
||||||
|
key,
|
||||||
|
syntax: MapAccessSyntax::Bracket,
|
||||||
}
|
}
|
||||||
match expr {
|
|
||||||
e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess {
|
|
||||||
column: Box::new(e),
|
|
||||||
keys: key_parts,
|
|
||||||
}),
|
|
||||||
_ => Ok(expr),
|
|
||||||
}
|
}
|
||||||
|
// Access on BigQuery nested and repeated expressions can
|
||||||
|
// mix notations in the same expression.
|
||||||
|
// https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns
|
||||||
|
Token::Period if dialect_of!(self is BigQueryDialect) => {
|
||||||
|
self.next_token(); // consume `.`
|
||||||
|
MapAccessKey {
|
||||||
|
key: self.parse_expr()?,
|
||||||
|
syntax: MapAccessSyntax::Period,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
};
|
||||||
|
keys.push(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Expr::MapAccess {
|
||||||
|
column: Box::new(expr),
|
||||||
|
keys,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses the parens following the `[ NOT ] IN` operator
|
/// Parses the parens following the `[ NOT ] IN` operator
|
||||||
|
@ -6329,31 +6349,6 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a map key string
|
|
||||||
pub fn parse_map_key(&mut self) -> Result<Expr, ParserError> {
|
|
||||||
let next_token = self.next_token();
|
|
||||||
match next_token.token {
|
|
||||||
// handle bigquery offset subscript operator which overlaps with OFFSET operator
|
|
||||||
Token::Word(Word { value, keyword, .. })
|
|
||||||
if (dialect_of!(self is BigQueryDialect) && keyword == Keyword::OFFSET) =>
|
|
||||||
{
|
|
||||||
self.parse_function(ObjectName(vec![Ident::new(value)]))
|
|
||||||
}
|
|
||||||
Token::Word(Word { value, keyword, .. }) if (keyword == Keyword::NoKeyword) => {
|
|
||||||
if self.peek_token() == Token::LParen {
|
|
||||||
return self.parse_function(ObjectName(vec![Ident::new(value)]));
|
|
||||||
}
|
|
||||||
Ok(Expr::Value(Value::SingleQuotedString(value)))
|
|
||||||
}
|
|
||||||
Token::SingleQuotedString(s) => Ok(Expr::Value(Value::SingleQuotedString(s))),
|
|
||||||
#[cfg(not(feature = "bigdecimal"))]
|
|
||||||
Token::Number(s, _) => Ok(Expr::Value(Value::Number(s, false))),
|
|
||||||
#[cfg(feature = "bigdecimal")]
|
|
||||||
Token::Number(s, _) => Ok(Expr::Value(Value::Number(s.parse().unwrap(), false))),
|
|
||||||
_ => self.expected("literal string, number or function", next_token),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
|
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
|
||||||
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
|
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
|
||||||
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
|
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
|
||||||
|
|
|
@ -1402,40 +1402,49 @@ fn bigquery_and_generic() -> TestedDialects {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_map_access_offset() {
|
fn parse_map_access_expr() {
|
||||||
let sql = "SELECT d[offset(0)]";
|
let sql = "users[-1][safe_offset(2)].a.b";
|
||||||
let _select = bigquery().verified_only_select(sql);
|
let expr = bigquery().verified_expr(sql);
|
||||||
assert_eq!(
|
|
||||||
_select.projection[0],
|
fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey {
|
||||||
SelectItem::UnnamedExpr(Expr::MapAccess {
|
MapAccessKey { key, syntax }
|
||||||
column: Box::new(Expr::Identifier(Ident {
|
}
|
||||||
value: "d".to_string(),
|
let expected = Expr::MapAccess {
|
||||||
quote_style: None,
|
column: Expr::Identifier(Ident::new("users")).into(),
|
||||||
})),
|
keys: vec![
|
||||||
keys: vec![Expr::Function(Function {
|
map_access_key(
|
||||||
name: ObjectName(vec!["offset".into()]),
|
Expr::UnaryOp {
|
||||||
|
op: UnaryOperator::Minus,
|
||||||
|
expr: Expr::Value(number("1")).into(),
|
||||||
|
},
|
||||||
|
MapAccessSyntax::Bracket,
|
||||||
|
),
|
||||||
|
map_access_key(
|
||||||
|
Expr::Function(Function {
|
||||||
|
name: ObjectName(vec![Ident::new("safe_offset")]),
|
||||||
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
|
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
|
||||||
number("0")
|
number("2"),
|
||||||
))),],
|
)))],
|
||||||
null_treatment: None,
|
|
||||||
filter: None,
|
filter: None,
|
||||||
|
null_treatment: None,
|
||||||
over: None,
|
over: None,
|
||||||
distinct: false,
|
distinct: false,
|
||||||
special: false,
|
special: false,
|
||||||
order_by: vec![],
|
order_by: vec![],
|
||||||
})],
|
}),
|
||||||
})
|
MapAccessSyntax::Bracket,
|
||||||
);
|
),
|
||||||
|
map_access_key(
|
||||||
|
Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]),
|
||||||
|
MapAccessSyntax::Period,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
assert_eq!(expr, expected);
|
||||||
|
|
||||||
// test other operators
|
let sql = "SELECT myfunc()[-1].a[SAFE_OFFSET(2)].b";
|
||||||
for sql in [
|
|
||||||
"SELECT d[SAFE_OFFSET(0)]",
|
|
||||||
"SELECT d[ORDINAL(0)]",
|
|
||||||
"SELECT d[SAFE_ORDINAL(0)]",
|
|
||||||
] {
|
|
||||||
bigquery().verified_only_select(sql);
|
bigquery().verified_only_select(sql);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bigquery_trim() {
|
fn test_bigquery_trim() {
|
||||||
|
|
|
@ -39,12 +39,13 @@ fn parse_map_access_expr() {
|
||||||
value: "string_values".to_string(),
|
value: "string_values".to_string(),
|
||||||
quote_style: None,
|
quote_style: None,
|
||||||
})),
|
})),
|
||||||
keys: vec![Expr::Function(Function {
|
keys: vec![MapAccessKey {
|
||||||
|
key: Expr::Function(Function {
|
||||||
name: ObjectName(vec!["indexOf".into()]),
|
name: ObjectName(vec!["indexOf".into()]),
|
||||||
args: vec![
|
args: vec![
|
||||||
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new(
|
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
|
||||||
"string_names"
|
Ident::new("string_names")
|
||||||
)))),
|
))),
|
||||||
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
|
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
|
||||||
Value::SingleQuotedString("endpoint".to_string())
|
Value::SingleQuotedString("endpoint".to_string())
|
||||||
))),
|
))),
|
||||||
|
@ -55,7 +56,9 @@ fn parse_map_access_expr() {
|
||||||
distinct: false,
|
distinct: false,
|
||||||
special: false,
|
special: false,
|
||||||
order_by: vec![],
|
order_by: vec![],
|
||||||
})],
|
}),
|
||||||
|
syntax: MapAccessSyntax::Bracket
|
||||||
|
}],
|
||||||
})],
|
})],
|
||||||
into: None,
|
into: None,
|
||||||
from: vec![TableWithJoins {
|
from: vec![TableWithJoins {
|
||||||
|
@ -80,7 +83,8 @@ fn parse_map_access_expr() {
|
||||||
right: Box::new(BinaryOp {
|
right: Box::new(BinaryOp {
|
||||||
left: Box::new(MapAccess {
|
left: Box::new(MapAccess {
|
||||||
column: Box::new(Identifier(Ident::new("string_value"))),
|
column: Box::new(Identifier(Ident::new("string_value"))),
|
||||||
keys: vec![Expr::Function(Function {
|
keys: vec![MapAccessKey {
|
||||||
|
key: Expr::Function(Function {
|
||||||
name: ObjectName(vec![Ident::new("indexOf")]),
|
name: ObjectName(vec![Ident::new("indexOf")]),
|
||||||
args: vec![
|
args: vec![
|
||||||
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
|
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
|
||||||
|
@ -96,7 +100,9 @@ fn parse_map_access_expr() {
|
||||||
distinct: false,
|
distinct: false,
|
||||||
special: false,
|
special: false,
|
||||||
order_by: vec![],
|
order_by: vec![],
|
||||||
})],
|
}),
|
||||||
|
syntax: MapAccessSyntax::Bracket
|
||||||
|
}],
|
||||||
}),
|
}),
|
||||||
op: BinaryOperator::NotEq,
|
op: BinaryOperator::NotEq,
|
||||||
right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))),
|
right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))),
|
||||||
|
|
|
@ -8643,3 +8643,45 @@ fn test_buffer_reuse() {
|
||||||
p.parse_statements().unwrap();
|
p.parse_statements().unwrap();
|
||||||
let _ = p.into_tokens();
|
let _ = p.into_tokens();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_map_access_expr() {
|
||||||
|
let sql = "users[-1][safe_offset(2)]";
|
||||||
|
let dialects = TestedDialects {
|
||||||
|
dialects: vec![Box::new(BigQueryDialect {}), Box::new(ClickHouseDialect {})],
|
||||||
|
options: None,
|
||||||
|
};
|
||||||
|
let expr = dialects.verified_expr(sql);
|
||||||
|
let expected = Expr::MapAccess {
|
||||||
|
column: Expr::Identifier(Ident::new("users")).into(),
|
||||||
|
keys: vec![
|
||||||
|
MapAccessKey {
|
||||||
|
key: Expr::UnaryOp {
|
||||||
|
op: UnaryOperator::Minus,
|
||||||
|
expr: Expr::Value(number("1")).into(),
|
||||||
|
},
|
||||||
|
syntax: MapAccessSyntax::Bracket,
|
||||||
|
},
|
||||||
|
MapAccessKey {
|
||||||
|
key: Expr::Function(Function {
|
||||||
|
name: ObjectName(vec![Ident::new("safe_offset")]),
|
||||||
|
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
|
||||||
|
number("2"),
|
||||||
|
)))],
|
||||||
|
filter: None,
|
||||||
|
null_treatment: None,
|
||||||
|
over: None,
|
||||||
|
distinct: false,
|
||||||
|
special: false,
|
||||||
|
order_by: vec![],
|
||||||
|
}),
|
||||||
|
syntax: MapAccessSyntax::Bracket,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
assert_eq!(expr, expected);
|
||||||
|
|
||||||
|
for sql in ["users[1]", "a[array_length(b) - 1 + 2][c + 3][d * 4]"] {
|
||||||
|
let _ = dialects.verified_expr(sql);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue