Improve parsing of JSON accesses on Postgres and Snowflake (#1215)

Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
Joey Hain 2024-04-30 07:49:05 -07:00 committed by GitHub
parent 0606024353
commit 4bfa399919
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 432 additions and 199 deletions

View file

@ -51,7 +51,8 @@ pub use self::query::{
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
TrimWhereField, Value,
};
use crate::ast::helpers::stmt_data_loading::{
@ -270,66 +271,6 @@ impl fmt::Display for Interval {
}
}
/// JsonOperator
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum JsonOperator {
/// -> keeps the value as json
Arrow,
/// ->> keeps the value as text or int.
LongArrow,
/// #> Extracts JSON sub-object at the specified path
HashArrow,
/// #>> Extracts JSON sub-object at the specified path as text
HashLongArrow,
/// : Colon is used by Snowflake (Which is similar to LongArrow)
Colon,
/// jsonb @> jsonb -> boolean: Test whether left json contains the right json
AtArrow,
/// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
ArrowAt,
/// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
/// path, where path elements can be either field keys or array indexes.
HashMinus,
/// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
/// JSON value?
AtQuestion,
/// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
/// for the specified JSON value. Only the first item of the result is taken into
/// account. If the result is not Boolean, then NULL is returned.
AtAt,
}
impl fmt::Display for JsonOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
JsonOperator::Arrow => {
write!(f, "->")
}
JsonOperator::LongArrow => {
write!(f, "->>")
}
JsonOperator::HashArrow => {
write!(f, "#>")
}
JsonOperator::HashLongArrow => {
write!(f, "#>>")
}
JsonOperator::Colon => {
write!(f, ":")
}
JsonOperator::AtArrow => {
write!(f, "@>")
}
JsonOperator::ArrowAt => write!(f, "<@"),
JsonOperator::HashMinus => write!(f, "#-"),
JsonOperator::AtQuestion => write!(f, "@?"),
JsonOperator::AtAt => write!(f, "@@"),
}
}
}
/// A field definition within a struct.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
@ -412,6 +353,59 @@ impl fmt::Display for MapAccessKey {
}
}
/// An element of a JSON path.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum JsonPathElem {
/// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation>.
Dot { key: String, quoted: bool },
/// Accesses an object field or array element using bracket notation,
/// e.g. `obj['foo']`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
Bracket { key: Expr },
}
/// A JSON path.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-json-path-expression.html>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct JsonPath {
pub path: Vec<JsonPathElem>,
}
impl fmt::Display for JsonPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, elem) in self.path.iter().enumerate() {
match elem {
JsonPathElem::Dot { key, quoted } => {
if i == 0 {
write!(f, ":")?;
} else {
write!(f, ".")?;
}
if *quoted {
write!(f, "\"{}\"", escape_double_quote_string(key))?;
} else {
write!(f, "{key}")?;
}
}
JsonPathElem::Bracket { key } => {
write!(f, "[{key}]")?;
}
}
}
Ok(())
}
}
/// The syntax used for in a cast expression.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -449,11 +443,16 @@ pub enum Expr {
Identifier(Ident),
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
CompoundIdentifier(Vec<Ident>),
/// JSON access (postgres) eg: data->'tags'
/// Access data nested in a value containing semi-structured data, such as
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>.
JsonAccess {
left: Box<Expr>,
operator: JsonOperator,
right: Box<Expr>,
/// The value being queried.
value: Box<Expr>,
/// The path to the data to extract.
path: JsonPath,
},
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
CompositeAccess {
@ -1224,16 +1223,8 @@ impl fmt::Display for Expr {
Expr::Array(set) => {
write!(f, "{set}")
}
Expr::JsonAccess {
left,
operator,
right,
} => {
if operator == &JsonOperator::Colon {
write!(f, "{left}{operator}{right}")
} else {
write!(f, "{left} {operator} {right}")
}
Expr::JsonAccess { value, path } => {
write!(f, "{value}{path}")
}
Expr::CompositeAccess { expr, key } => {
write!(f, "{expr}.{key}")