Add support for arbitrary map access expr (#1179)

This commit is contained in:
Ifeanyi Ubah 2024-04-09 23:21:22 +02:00 committed by GitHub
parent 127be97369
commit eda86d8ed7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 194 additions and 116 deletions

View file

@ -374,6 +374,40 @@ pub enum CastFormat {
ValueAtTimeZone(Value, Value),
}
/// Represents the syntax/style used in a map access.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum MapAccessSyntax {
/// Access using bracket notation. `mymap[mykey]`
Bracket,
/// Access using period notation. `mymap.mykey`
Period,
}
/// Expression used to access a value in a nested structure.
///
/// Example: `SAFE_OFFSET(0)` in
/// ```sql
/// SELECT mymap[SAFE_OFFSET(0)];
/// ```
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct MapAccessKey {
pub key: Expr,
pub syntax: MapAccessSyntax,
}
impl fmt::Display for MapAccessKey {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.syntax {
MapAccessSyntax::Bracket => write!(f, "[{}]", self.key),
MapAccessSyntax::Period => write!(f, ".{}", self.key),
}
}
}
/// An SQL expression of any type.
///
/// The parser does not distinguish between expressions of different types
@ -638,7 +672,7 @@ pub enum Expr {
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
MapAccess {
column: Box<Expr>,
keys: Vec<Expr>,
keys: Vec<MapAccessKey>,
},
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
@ -774,15 +808,7 @@ impl fmt::Display for Expr {
match self {
Expr::Identifier(s) => write!(f, "{s}"),
Expr::MapAccess { column, keys } => {
write!(f, "{column}")?;
for k in keys {
match k {
k @ Expr::Value(Value::Number(_, _)) => write!(f, "[{k}]")?,
Expr::Value(Value::SingleQuotedString(s)) => write!(f, "[\"{s}\"]")?,
_ => write!(f, "[{k}]")?,
}
}
Ok(())
write!(f, "{column}{}", display_separated(keys, ""))
}
Expr::Wildcard => f.write_str("*"),
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),

View file

@ -2608,23 +2608,43 @@ impl<'a> Parser<'a> {
}
pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let key = self.parse_map_key()?;
let tok = self.consume_token(&Token::RBracket);
debug!("Tok: {}", tok);
let mut key_parts: Vec<Expr> = vec![key];
while self.consume_token(&Token::LBracket) {
let key = self.parse_map_key()?;
let tok = self.consume_token(&Token::RBracket);
debug!("Tok: {}", tok);
key_parts.push(key);
}
match expr {
e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess {
column: Box::new(e),
keys: key_parts,
}),
_ => Ok(expr),
let key = self.parse_expr()?;
self.expect_token(&Token::RBracket)?;
let mut keys = vec![MapAccessKey {
key,
syntax: MapAccessSyntax::Bracket,
}];
loop {
let key = match self.peek_token().token {
Token::LBracket => {
self.next_token(); // consume `[`
let key = self.parse_expr()?;
self.expect_token(&Token::RBracket)?;
MapAccessKey {
key,
syntax: MapAccessSyntax::Bracket,
}
}
// Access on BigQuery nested and repeated expressions can
// mix notations in the same expression.
// https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns
Token::Period if dialect_of!(self is BigQueryDialect) => {
self.next_token(); // consume `.`
MapAccessKey {
key: self.parse_expr()?,
syntax: MapAccessSyntax::Period,
}
}
_ => break,
};
keys.push(key);
}
Ok(Expr::MapAccess {
column: Box::new(expr),
keys,
})
}
/// Parses the parens following the `[ NOT ] IN` operator
@ -6329,31 +6349,6 @@ impl<'a> Parser<'a> {
}
}
/// Parse a map key string
pub fn parse_map_key(&mut self) -> Result<Expr, ParserError> {
let next_token = self.next_token();
match next_token.token {
// handle bigquery offset subscript operator which overlaps with OFFSET operator
Token::Word(Word { value, keyword, .. })
if (dialect_of!(self is BigQueryDialect) && keyword == Keyword::OFFSET) =>
{
self.parse_function(ObjectName(vec![Ident::new(value)]))
}
Token::Word(Word { value, keyword, .. }) if (keyword == Keyword::NoKeyword) => {
if self.peek_token() == Token::LParen {
return self.parse_function(ObjectName(vec![Ident::new(value)]));
}
Ok(Expr::Value(Value::SingleQuotedString(value)))
}
Token::SingleQuotedString(s) => Ok(Expr::Value(Value::SingleQuotedString(s))),
#[cfg(not(feature = "bigdecimal"))]
Token::Number(s, _) => Ok(Expr::Value(Value::Number(s, false))),
#[cfg(feature = "bigdecimal")]
Token::Number(s, _) => Ok(Expr::Value(Value::Number(s.parse().unwrap(), false))),
_ => self.expected("literal string, number or function", next_token),
}
}
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
let (ty, trailing_bracket) = self.parse_data_type_helper()?;

View file

@ -1402,39 +1402,48 @@ fn bigquery_and_generic() -> TestedDialects {
}
#[test]
fn parse_map_access_offset() {
let sql = "SELECT d[offset(0)]";
let _select = bigquery().verified_only_select(sql);
assert_eq!(
_select.projection[0],
SelectItem::UnnamedExpr(Expr::MapAccess {
column: Box::new(Expr::Identifier(Ident {
value: "d".to_string(),
quote_style: None,
})),
keys: vec![Expr::Function(Function {
name: ObjectName(vec!["offset".into()]),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
number("0")
))),],
null_treatment: None,
filter: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
})],
})
);
fn parse_map_access_expr() {
let sql = "users[-1][safe_offset(2)].a.b";
let expr = bigquery().verified_expr(sql);
// test other operators
for sql in [
"SELECT d[SAFE_OFFSET(0)]",
"SELECT d[ORDINAL(0)]",
"SELECT d[SAFE_ORDINAL(0)]",
] {
bigquery().verified_only_select(sql);
fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey {
MapAccessKey { key, syntax }
}
let expected = Expr::MapAccess {
column: Expr::Identifier(Ident::new("users")).into(),
keys: vec![
map_access_key(
Expr::UnaryOp {
op: UnaryOperator::Minus,
expr: Expr::Value(number("1")).into(),
},
MapAccessSyntax::Bracket,
),
map_access_key(
Expr::Function(Function {
name: ObjectName(vec![Ident::new("safe_offset")]),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
number("2"),
)))],
filter: None,
null_treatment: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
}),
MapAccessSyntax::Bracket,
),
map_access_key(
Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]),
MapAccessSyntax::Period,
),
],
};
assert_eq!(expr, expected);
let sql = "SELECT myfunc()[-1].a[SAFE_OFFSET(2)].b";
bigquery().verified_only_select(sql);
}
#[test]

View file

@ -39,23 +39,26 @@ fn parse_map_access_expr() {
value: "string_values".to_string(),
quote_style: None,
})),
keys: vec![Expr::Function(Function {
name: ObjectName(vec!["indexOf".into()]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new(
"string_names"
)))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString("endpoint".to_string())
))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
})],
keys: vec![MapAccessKey {
key: Expr::Function(Function {
name: ObjectName(vec!["indexOf".into()]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
Ident::new("string_names")
))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString("endpoint".to_string())
))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
}),
syntax: MapAccessSyntax::Bracket
}],
})],
into: None,
from: vec![TableWithJoins {
@ -80,23 +83,26 @@ fn parse_map_access_expr() {
right: Box::new(BinaryOp {
left: Box::new(MapAccess {
column: Box::new(Identifier(Ident::new("string_value"))),
keys: vec![Expr::Function(Function {
name: ObjectName(vec![Ident::new("indexOf")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
Ident::new("string_name")
))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString("app".to_string())
))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
})],
keys: vec![MapAccessKey {
key: Expr::Function(Function {
name: ObjectName(vec![Ident::new("indexOf")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
Ident::new("string_name")
))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString("app".to_string())
))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
}),
syntax: MapAccessSyntax::Bracket
}],
}),
op: BinaryOperator::NotEq,
right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))),

View file

@ -8643,3 +8643,45 @@ fn test_buffer_reuse() {
p.parse_statements().unwrap();
let _ = p.into_tokens();
}
#[test]
fn parse_map_access_expr() {
let sql = "users[-1][safe_offset(2)]";
let dialects = TestedDialects {
dialects: vec![Box::new(BigQueryDialect {}), Box::new(ClickHouseDialect {})],
options: None,
};
let expr = dialects.verified_expr(sql);
let expected = Expr::MapAccess {
column: Expr::Identifier(Ident::new("users")).into(),
keys: vec![
MapAccessKey {
key: Expr::UnaryOp {
op: UnaryOperator::Minus,
expr: Expr::Value(number("1")).into(),
},
syntax: MapAccessSyntax::Bracket,
},
MapAccessKey {
key: Expr::Function(Function {
name: ObjectName(vec![Ident::new("safe_offset")]),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
number("2"),
)))],
filter: None,
null_treatment: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
}),
syntax: MapAccessSyntax::Bracket,
},
],
};
assert_eq!(expr, expected);
for sql in ["users[1]", "a[array_length(b) - 1 + 2][c + 3][d * 4]"] {
let _ = dialects.verified_expr(sql);
}
}