mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-14 07:52:02 +00:00
Improve parsing of JSON accesses on Postgres and Snowflake (#1215)
Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
parent
0606024353
commit
4bfa399919
7 changed files with 432 additions and 199 deletions
141
src/ast/mod.rs
141
src/ast/mod.rs
|
@ -51,7 +51,8 @@ pub use self::query::{
|
|||
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
|
||||
};
|
||||
pub use self::value::{
|
||||
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
|
||||
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
|
||||
TrimWhereField, Value,
|
||||
};
|
||||
|
||||
use crate::ast::helpers::stmt_data_loading::{
|
||||
|
@ -270,66 +271,6 @@ impl fmt::Display for Interval {
|
|||
}
|
||||
}
|
||||
|
||||
/// JsonOperator
|
||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum JsonOperator {
|
||||
/// -> keeps the value as json
|
||||
Arrow,
|
||||
/// ->> keeps the value as text or int.
|
||||
LongArrow,
|
||||
/// #> Extracts JSON sub-object at the specified path
|
||||
HashArrow,
|
||||
/// #>> Extracts JSON sub-object at the specified path as text
|
||||
HashLongArrow,
|
||||
/// : Colon is used by Snowflake (Which is similar to LongArrow)
|
||||
Colon,
|
||||
/// jsonb @> jsonb -> boolean: Test whether left json contains the right json
|
||||
AtArrow,
|
||||
/// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
|
||||
ArrowAt,
|
||||
/// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
|
||||
/// path, where path elements can be either field keys or array indexes.
|
||||
HashMinus,
|
||||
/// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
|
||||
/// JSON value?
|
||||
AtQuestion,
|
||||
/// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
|
||||
/// for the specified JSON value. Only the first item of the result is taken into
|
||||
/// account. If the result is not Boolean, then NULL is returned.
|
||||
AtAt,
|
||||
}
|
||||
|
||||
impl fmt::Display for JsonOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
JsonOperator::Arrow => {
|
||||
write!(f, "->")
|
||||
}
|
||||
JsonOperator::LongArrow => {
|
||||
write!(f, "->>")
|
||||
}
|
||||
JsonOperator::HashArrow => {
|
||||
write!(f, "#>")
|
||||
}
|
||||
JsonOperator::HashLongArrow => {
|
||||
write!(f, "#>>")
|
||||
}
|
||||
JsonOperator::Colon => {
|
||||
write!(f, ":")
|
||||
}
|
||||
JsonOperator::AtArrow => {
|
||||
write!(f, "@>")
|
||||
}
|
||||
JsonOperator::ArrowAt => write!(f, "<@"),
|
||||
JsonOperator::HashMinus => write!(f, "#-"),
|
||||
JsonOperator::AtQuestion => write!(f, "@?"),
|
||||
JsonOperator::AtAt => write!(f, "@@"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A field definition within a struct.
|
||||
///
|
||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||
|
@ -412,6 +353,59 @@ impl fmt::Display for MapAccessKey {
|
|||
}
|
||||
}
|
||||
|
||||
/// An element of a JSON path.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum JsonPathElem {
|
||||
/// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation>.
|
||||
Dot { key: String, quoted: bool },
|
||||
/// Accesses an object field or array element using bracket notation,
|
||||
/// e.g. `obj['foo']`.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
|
||||
Bracket { key: Expr },
|
||||
}
|
||||
|
||||
/// A JSON path.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
|
||||
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-json-path-expression.html>.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub struct JsonPath {
|
||||
pub path: Vec<JsonPathElem>,
|
||||
}
|
||||
|
||||
impl fmt::Display for JsonPath {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for (i, elem) in self.path.iter().enumerate() {
|
||||
match elem {
|
||||
JsonPathElem::Dot { key, quoted } => {
|
||||
if i == 0 {
|
||||
write!(f, ":")?;
|
||||
} else {
|
||||
write!(f, ".")?;
|
||||
}
|
||||
|
||||
if *quoted {
|
||||
write!(f, "\"{}\"", escape_double_quote_string(key))?;
|
||||
} else {
|
||||
write!(f, "{key}")?;
|
||||
}
|
||||
}
|
||||
JsonPathElem::Bracket { key } => {
|
||||
write!(f, "[{key}]")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The syntax used for in a cast expression.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
@ -449,11 +443,16 @@ pub enum Expr {
|
|||
Identifier(Ident),
|
||||
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
|
||||
CompoundIdentifier(Vec<Ident>),
|
||||
/// JSON access (postgres) eg: data->'tags'
|
||||
/// Access data nested in a value containing semi-structured data, such as
|
||||
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
|
||||
///
|
||||
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
|
||||
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>.
|
||||
JsonAccess {
|
||||
left: Box<Expr>,
|
||||
operator: JsonOperator,
|
||||
right: Box<Expr>,
|
||||
/// The value being queried.
|
||||
value: Box<Expr>,
|
||||
/// The path to the data to extract.
|
||||
path: JsonPath,
|
||||
},
|
||||
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
|
||||
CompositeAccess {
|
||||
|
@ -1224,16 +1223,8 @@ impl fmt::Display for Expr {
|
|||
Expr::Array(set) => {
|
||||
write!(f, "{set}")
|
||||
}
|
||||
Expr::JsonAccess {
|
||||
left,
|
||||
operator,
|
||||
right,
|
||||
} => {
|
||||
if operator == &JsonOperator::Colon {
|
||||
write!(f, "{left}{operator}{right}")
|
||||
} else {
|
||||
write!(f, "{left} {operator} {right}")
|
||||
}
|
||||
Expr::JsonAccess { value, path } => {
|
||||
write!(f, "{value}{path}")
|
||||
}
|
||||
Expr::CompositeAccess { expr, key } => {
|
||||
write!(f, "{expr}.{key}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue