Consolidate MapAccess, and Subscript into CompoundExpr to handle the complex field access chain (#1551)

This commit is contained in:
Jax Liu 2024-12-22 22:28:44 +08:00 committed by GitHub
parent cd898cb6a4
commit 0647a4aa82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 455 additions and 287 deletions

View file

@ -459,40 +459,6 @@ pub enum CastFormat {
ValueAtTimeZone(Value, Value),
}
/// Represents the syntax/style used in a map access.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum MapAccessSyntax {
/// Access using bracket notation. `mymap[mykey]`
Bracket,
/// Access using period notation. `mymap.mykey`
Period,
}
/// Expression used to access a value in a nested structure.
///
/// Example: `SAFE_OFFSET(0)` in
/// ```sql
/// SELECT mymap[SAFE_OFFSET(0)];
/// ```
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct MapAccessKey {
pub key: Expr,
pub syntax: MapAccessSyntax,
}
impl fmt::Display for MapAccessKey {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.syntax {
MapAccessSyntax::Bracket => write!(f, "[{}]", self.key),
MapAccessSyntax::Period => write!(f, ".{}", self.key),
}
}
}
/// An element of a JSON path.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -629,6 +595,28 @@ pub enum Expr {
Identifier(Ident),
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
CompoundIdentifier(Vec<Ident>),
/// Multi-part expression access.
///
/// This structure represents an access chain in structured / nested types
/// such as maps, arrays, and lists:
/// - Array
/// - A 1-dim array `a[1]` will be represented like:
/// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]`
/// - A 2-dim array `a[1][2]` will be represented like:
/// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]`
/// - Map or Struct (Bracket-style)
/// - A map `a['field1']` will be represented like:
/// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]`
/// - A 2-dim map `a['field1']['field2']` will be represented like:
/// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]`
/// - Struct (Dot-style) (only effect when the chain contains both subscript and expr)
/// - A struct access `a[field1].field2` will be represented like:
/// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]`
/// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2])
CompoundFieldAccess {
root: Box<Expr>,
access_chain: Vec<AccessExpr>,
},
/// Access data nested in a value containing semi-structured data, such as
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
///
@ -882,14 +870,6 @@ pub enum Expr {
data_type: DataType,
value: String,
},
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
/// Note that depending on the dialect, struct like accesses may be
/// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
MapAccess {
column: Box<Expr>,
keys: Vec<MapAccessKey>,
},
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
/// Arbitrary expr method call
@ -978,11 +958,6 @@ pub enum Expr {
/// ```
/// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps
Map(Map),
/// An access of nested data using subscript syntax, for example `array[2]`.
Subscript {
expr: Box<Expr>,
subscript: Box<Subscript>,
},
/// An array expression e.g. `ARRAY[1, 2]`
Array(Array),
/// An interval expression e.g. `INTERVAL '1' YEAR`
@ -1099,6 +1074,27 @@ impl fmt::Display for Subscript {
}
}
/// An element of a [`Expr::CompoundFieldAccess`].
/// It can be an expression or a subscript.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum AccessExpr {
/// Accesses a field using dot notation, e.g. `foo.bar.baz`.
Dot(Expr),
/// Accesses a field or array element using bracket notation, e.g. `foo['bar']`.
Subscript(Subscript),
}
impl fmt::Display for AccessExpr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AccessExpr::Dot(expr) => write!(f, ".{}", expr),
AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript),
}
}
}
/// A lambda function.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -1295,12 +1291,16 @@ impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expr::Identifier(s) => write!(f, "{s}"),
Expr::MapAccess { column, keys } => {
write!(f, "{column}{}", display_separated(keys, ""))
}
Expr::Wildcard(_) => f.write_str("*"),
Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix),
Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
Expr::CompoundFieldAccess { root, access_chain } => {
write!(f, "{}", root)?;
for field in access_chain {
write!(f, "{}", field)?;
}
Ok(())
}
Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"),
Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"),
Expr::IsFalse(ast) => write!(f, "{ast} IS FALSE"),
@ -1720,12 +1720,6 @@ impl fmt::Display for Expr {
Expr::Map(map) => {
write!(f, "{map}")
}
Expr::Subscript {
expr,
subscript: key,
} => {
write!(f, "{expr}[{key}]")
}
Expr::Array(set) => {
write!(f, "{set}")
}