Improve parsing of JSON accesses on Postgres and Snowflake (#1215)

Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
Joey Hain 2024-04-30 07:49:05 -07:00 committed by GitHub
parent 0606024353
commit 4bfa399919
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 432 additions and 199 deletions

View file

@ -51,7 +51,8 @@ pub use self::query::{
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
TrimWhereField, Value,
};
use crate::ast::helpers::stmt_data_loading::{
@ -270,66 +271,6 @@ impl fmt::Display for Interval {
}
}
/// JsonOperator
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum JsonOperator {
/// -> keeps the value as json
Arrow,
/// ->> keeps the value as text or int.
LongArrow,
/// #> Extracts JSON sub-object at the specified path
HashArrow,
/// #>> Extracts JSON sub-object at the specified path as text
HashLongArrow,
/// : Colon is used by Snowflake (Which is similar to LongArrow)
Colon,
/// jsonb @> jsonb -> boolean: Test whether left json contains the right json
AtArrow,
/// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
ArrowAt,
/// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
/// path, where path elements can be either field keys or array indexes.
HashMinus,
/// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
/// JSON value?
AtQuestion,
/// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
/// for the specified JSON value. Only the first item of the result is taken into
/// account. If the result is not Boolean, then NULL is returned.
AtAt,
}
impl fmt::Display for JsonOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
JsonOperator::Arrow => {
write!(f, "->")
}
JsonOperator::LongArrow => {
write!(f, "->>")
}
JsonOperator::HashArrow => {
write!(f, "#>")
}
JsonOperator::HashLongArrow => {
write!(f, "#>>")
}
JsonOperator::Colon => {
write!(f, ":")
}
JsonOperator::AtArrow => {
write!(f, "@>")
}
JsonOperator::ArrowAt => write!(f, "<@"),
JsonOperator::HashMinus => write!(f, "#-"),
JsonOperator::AtQuestion => write!(f, "@?"),
JsonOperator::AtAt => write!(f, "@@"),
}
}
}
/// A field definition within a struct.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
@ -412,6 +353,59 @@ impl fmt::Display for MapAccessKey {
}
}
/// An element of a JSON path.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum JsonPathElem {
/// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation>.
Dot { key: String, quoted: bool },
/// Accesses an object field or array element using bracket notation,
/// e.g. `obj['foo']`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
Bracket { key: Expr },
}
/// A JSON path.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-json-path-expression.html>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct JsonPath {
pub path: Vec<JsonPathElem>,
}
impl fmt::Display for JsonPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, elem) in self.path.iter().enumerate() {
match elem {
JsonPathElem::Dot { key, quoted } => {
if i == 0 {
write!(f, ":")?;
} else {
write!(f, ".")?;
}
if *quoted {
write!(f, "\"{}\"", escape_double_quote_string(key))?;
} else {
write!(f, "{key}")?;
}
}
JsonPathElem::Bracket { key } => {
write!(f, "[{key}]")?;
}
}
}
Ok(())
}
}
/// The syntax used for in a cast expression.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -449,11 +443,16 @@ pub enum Expr {
Identifier(Ident),
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
CompoundIdentifier(Vec<Ident>),
/// JSON access (postgres) eg: data->'tags'
/// Access data nested in a value containing semi-structured data, such as
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>.
JsonAccess {
left: Box<Expr>,
operator: JsonOperator,
right: Box<Expr>,
/// The value being queried.
value: Box<Expr>,
/// The path to the data to extract.
path: JsonPath,
},
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
CompositeAccess {
@ -1224,16 +1223,8 @@ impl fmt::Display for Expr {
Expr::Array(set) => {
write!(f, "{set}")
}
Expr::JsonAccess {
left,
operator,
right,
} => {
if operator == &JsonOperator::Colon {
write!(f, "{left}{operator}{right}")
} else {
write!(f, "{left} {operator} {right}")
}
Expr::JsonAccess { value, path } => {
write!(f, "{value}{path}")
}
Expr::CompositeAccess { expr, key } => {
write!(f, "{expr}.{key}")

View file

@ -141,6 +141,79 @@ pub enum BinaryOperator {
PGNotILikeMatch,
/// String "starts with", eg: `a ^@ b` (PostgreSQL-specific)
PGStartsWith,
/// The `->` operator.
///
/// On PostgreSQL, this operator extracts a JSON object field or array
/// element, for example `'{"a":"b"}'::json -> 'a'` or `[1, 2, 3]'::json
/// -> 2`.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
Arrow,
/// The `->>` operator.
///
/// On PostgreSQL, this operator that extracts a JSON object field or JSON
/// array element and converts it to text, for example `'{"a":"b"}'::json
/// ->> 'a'` or `[1, 2, 3]'::json ->> 2`.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
LongArrow,
/// The `#>` operator.
///
/// On PostgreSQL, this operator extracts a JSON sub-object at the specified
/// path, for example:
///
/// ```notrust
///'{"a": {"b": ["foo","bar"]}}'::json #> '{a,b,1}'
/// ```
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
HashArrow,
/// The `#>>` operator.
///
/// A PostgreSQL-specific operator that extracts JSON sub-object at the
/// specified path, for example
///
/// ```notrust
///'{"a": {"b": ["foo","bar"]}}'::json #>> '{a,b,1}'
/// ```
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
HashLongArrow,
/// The `@@` operator.
///
/// On PostgreSQL, this is used for JSON and text searches.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
AtAt,
/// The `@>` operator.
///
/// On PostgreSQL, this is used for JSON and text searches.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
AtArrow,
/// The `<@` operator.
///
/// On PostgreSQL, this is used for JSON and text searches.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
ArrowAt,
/// The `#-` operator.
///
/// On PostgreSQL, this operator is used to delete a field or array element
/// at a specified path.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
HashMinus,
/// The `@?` operator.
///
/// On PostgreSQL, this operator is used to check the given JSON path
/// returns an item for the JSON value.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
AtQuestion,
/// PostgreSQL-specific custom operator.
///
/// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html)
@ -187,6 +260,15 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::PGNotLikeMatch => f.write_str("!~~"),
BinaryOperator::PGNotILikeMatch => f.write_str("!~~*"),
BinaryOperator::PGStartsWith => f.write_str("^@"),
BinaryOperator::Arrow => f.write_str("->"),
BinaryOperator::LongArrow => f.write_str("->>"),
BinaryOperator::HashArrow => f.write_str("#>"),
BinaryOperator::HashLongArrow => f.write_str("#>>"),
BinaryOperator::AtAt => f.write_str("@@"),
BinaryOperator::AtArrow => f.write_str("@>"),
BinaryOperator::ArrowAt => f.write_str("<@"),
BinaryOperator::HashMinus => f.write_str("#-"),
BinaryOperator::AtQuestion => f.write_str("@?"),
BinaryOperator::PGCustomBinaryOperator(idents) => {
write!(f, "OPERATOR({})", display_separated(idents, "."))
}

View file

@ -65,8 +65,6 @@ pub enum Value {
Null,
/// `?` or `$` Prepared statement arg placeholder
Placeholder(String),
/// Add support of snowflake field:key - key should be a value
UnQuotedString(String),
}
impl fmt::Display for Value {
@ -85,7 +83,6 @@ impl fmt::Display for Value {
Value::RawStringLiteral(v) => write!(f, "R'{v}'"),
Value::Null => write!(f, "NULL"),
Value::Placeholder(v) => write!(f, "{v}"),
Value::UnQuotedString(v) => write!(f, "{v}"),
}
}
}

View file

@ -2346,6 +2346,16 @@ impl<'a> Parser<'a> {
Token::DoubleTildeAsterisk => Some(BinaryOperator::PGILikeMatch),
Token::ExclamationMarkDoubleTilde => Some(BinaryOperator::PGNotLikeMatch),
Token::ExclamationMarkDoubleTildeAsterisk => Some(BinaryOperator::PGNotILikeMatch),
Token::Arrow => Some(BinaryOperator::Arrow),
Token::LongArrow => Some(BinaryOperator::LongArrow),
Token::HashArrow => Some(BinaryOperator::HashArrow),
Token::HashLongArrow => Some(BinaryOperator::HashLongArrow),
Token::AtArrow => Some(BinaryOperator::AtArrow),
Token::ArrowAt => Some(BinaryOperator::ArrowAt),
Token::HashMinus => Some(BinaryOperator::HashMinus),
Token::AtQuestion => Some(BinaryOperator::AtQuestion),
Token::AtAt => Some(BinaryOperator::AtAt),
Token::Word(w) => match w.keyword {
Keyword::AND => Some(BinaryOperator::And),
Keyword::OR => Some(BinaryOperator::Or),
@ -2539,42 +2549,16 @@ impl<'a> Parser<'a> {
} else if Token::LBracket == tok {
if dialect_of!(self is PostgreSqlDialect | GenericDialect) {
// parse index
return self.parse_array_index(expr);
self.parse_array_index(expr)
} else if dialect_of!(self is SnowflakeDialect) {
self.prev_token();
self.parse_json_access(expr)
} else {
self.parse_map_access(expr)
}
self.parse_map_access(expr)
} else if Token::Colon == tok {
Ok(Expr::JsonAccess {
left: Box::new(expr),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(self.parse_value()?)),
})
} else if Token::Arrow == tok
|| Token::LongArrow == tok
|| Token::HashArrow == tok
|| Token::HashLongArrow == tok
|| Token::AtArrow == tok
|| Token::ArrowAt == tok
|| Token::HashMinus == tok
|| Token::AtQuestion == tok
|| Token::AtAt == tok
{
let operator = match tok.token {
Token::Arrow => JsonOperator::Arrow,
Token::LongArrow => JsonOperator::LongArrow,
Token::HashArrow => JsonOperator::HashArrow,
Token::HashLongArrow => JsonOperator::HashLongArrow,
Token::AtArrow => JsonOperator::AtArrow,
Token::ArrowAt => JsonOperator::ArrowAt,
Token::HashMinus => JsonOperator::HashMinus,
Token::AtQuestion => JsonOperator::AtQuestion,
Token::AtAt => JsonOperator::AtAt,
_ => unreachable!(),
};
Ok(Expr::JsonAccess {
left: Box::new(expr),
operator,
right: Box::new(self.parse_expr()?),
})
} else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok {
self.prev_token();
self.parse_json_access(expr)
} else {
// Can only happen if `get_next_precedence` got out of sync with this function
parser_err!(
@ -2608,6 +2592,60 @@ impl<'a> Parser<'a> {
})
}
fn parse_json_path_object_key(&mut self) -> Result<JsonPathElem, ParserError> {
let token = self.next_token();
match token.token {
Token::Word(Word {
value,
// path segments in SF dot notation can be unquoted or double quoted
quote_style: quote_style @ (Some('"') | None),
// some experimentation suggests that snowflake permits
// any keyword here unquoted.
keyword: _,
}) => Ok(JsonPathElem::Dot {
key: value,
quoted: quote_style.is_some(),
}),
// This token should never be generated on snowflake or generic
// dialects, but we handle it just in case this is used on future
// dialects.
Token::DoubleQuotedString(key) => Ok(JsonPathElem::Dot { key, quoted: true }),
_ => self.expected("variant object key name", token),
}
}
fn parse_json_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let mut path = Vec::new();
loop {
match self.next_token().token {
Token::Colon if path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
}
Token::Period if !path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
}
Token::LBracket => {
let key = self.parse_expr()?;
self.expect_token(&Token::RBracket)?;
path.push(JsonPathElem::Bracket { key });
}
_ => {
self.prev_token();
break;
}
};
}
debug_assert!(!path.is_empty());
Ok(Expr::JsonAccess {
value: Box::new(expr),
path: JsonPath { path },
})
}
pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let key = self.parse_expr()?;
self.expect_token(&Token::RBracket)?;
@ -2711,6 +2749,7 @@ impl<'a> Parser<'a> {
}
// use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference
// higher number = higher precedence
const MUL_DIV_MOD_OP_PREC: u8 = 40;
const PLUS_MINUS_PREC: u8 = 30;
const XOR_PREC: u8 = 24;
@ -2718,6 +2757,7 @@ impl<'a> Parser<'a> {
const BETWEEN_PREC: u8 = 20;
const LIKE_PREC: u8 = 19;
const IS_PREC: u8 = 17;
const PG_OTHER_PREC: u8 = 16;
const UNARY_NOT_PREC: u8 = 15;
const AND_PREC: u8 = 10;
const OR_PREC: u8 = 5;
@ -2802,18 +2842,16 @@ impl<'a> Parser<'a> {
Token::DoubleColon => Ok(50),
Token::Colon => Ok(50),
Token::ExclamationMark => Ok(50),
Token::LBracket
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
Token::Arrow
| Token::LongArrow
| Token::Arrow
| Token::Overlap
| Token::CaretAt
| Token::HashArrow
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
| Token::HashMinus
| Token::AtQuestion
| Token::AtAt => Ok(50),
| Token::AtAt => Ok(Self::PG_OTHER_PREC),
_ => Ok(0),
}
}
@ -6236,17 +6274,6 @@ impl<'a> Parser<'a> {
},
)?,
},
// Case when Snowflake Semi-structured data like key:value
Keyword::NoKeyword
| Keyword::LOCATION
| Keyword::TYPE
| Keyword::DATE
| Keyword::START
| Keyword::END
if dialect_of!(self is SnowflakeDialect | GenericDialect) =>
{
Ok(Value::UnQuotedString(w.value))
}
_ => self.expected(
"a concrete value",
TokenWithLocation {

View file

@ -1375,25 +1375,25 @@ fn pg_and_generic() -> TestedDialects {
#[test]
fn parse_json_ops_without_colon() {
use self::JsonOperator;
let binary_ops = &[
("->", JsonOperator::Arrow, all_dialects()),
("->>", JsonOperator::LongArrow, all_dialects()),
("#>", JsonOperator::HashArrow, pg_and_generic()),
("#>>", JsonOperator::HashLongArrow, pg_and_generic()),
("@>", JsonOperator::AtArrow, all_dialects()),
("<@", JsonOperator::ArrowAt, all_dialects()),
("#-", JsonOperator::HashMinus, pg_and_generic()),
("@?", JsonOperator::AtQuestion, all_dialects()),
("@@", JsonOperator::AtAt, all_dialects()),
use self::BinaryOperator::*;
let binary_ops = [
("->", Arrow, all_dialects()),
("->>", LongArrow, all_dialects()),
("#>", HashArrow, pg_and_generic()),
("#>>", HashLongArrow, pg_and_generic()),
("@>", AtArrow, all_dialects()),
("<@", ArrowAt, all_dialects()),
("#-", HashMinus, pg_and_generic()),
("@?", AtQuestion, all_dialects()),
("@@", AtAt, all_dialects()),
];
for (str_op, op, dialects) in binary_ops {
let select = dialects.verified_only_select(&format!("SELECT a {} b", &str_op));
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: *op,
op,
right: Box::new(Expr::Identifier(Ident::new("b"))),
}),
select.projection[0]

View file

@ -2235,9 +2235,9 @@ fn test_json() {
let sql = "SELECT params ->> 'name' FROM events";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("params"))),
operator: JsonOperator::LongArrow,
op: BinaryOperator::LongArrow,
right: Box::new(Expr::Value(Value::SingleQuotedString("name".to_string()))),
}),
select.projection[0]
@ -2246,9 +2246,9 @@ fn test_json() {
let sql = "SELECT params -> 'name' FROM events";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("params"))),
operator: JsonOperator::Arrow,
op: BinaryOperator::Arrow,
right: Box::new(Expr::Value(Value::SingleQuotedString("name".to_string()))),
}),
select.projection[0]
@ -2257,15 +2257,55 @@ fn test_json() {
let sql = "SELECT info -> 'items' ->> 'product' FROM orders";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("info"))),
operator: JsonOperator::Arrow,
right: Box::new(Expr::JsonAccess {
left: Box::new(Expr::Value(Value::SingleQuotedString("items".to_string()))),
operator: JsonOperator::LongArrow,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"product".to_string()
)))
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("info"))),
op: BinaryOperator::Arrow,
right: Box::new(Expr::Value(Value::SingleQuotedString("items".to_string())))
}),
op: BinaryOperator::LongArrow,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"product".to_string()
))),
}),
select.projection[0]
);
// the RHS can be a number (array element access)
let sql = "SELECT obj -> 42";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("obj"))),
op: BinaryOperator::Arrow,
right: Box::new(Expr::Value(number("42"))),
}),
select.projection[0]
);
// the RHS can be an identifier
let sql = "SELECT obj -> key";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("obj"))),
op: BinaryOperator::Arrow,
right: Box::new(Expr::Identifier(Ident::new("key"))),
}),
select.projection[0]
);
// -> operator has lower precedence than arithmetic ops
let sql = "SELECT obj -> 3 * 2";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("obj"))),
op: BinaryOperator::Arrow,
right: Box::new(Expr::BinaryOp {
left: Box::new(Expr::Value(number("3"))),
op: BinaryOperator::Multiply,
right: Box::new(Expr::Value(number("2"))),
}),
}),
select.projection[0]
@ -2274,9 +2314,9 @@ fn test_json() {
let sql = "SELECT info #> '{a,b,c}' FROM orders";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("info"))),
operator: JsonOperator::HashArrow,
op: BinaryOperator::HashArrow,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"{a,b,c}".to_string()
))),
@ -2287,9 +2327,9 @@ fn test_json() {
let sql = "SELECT info #>> '{a,b,c}' FROM orders";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("info"))),
operator: JsonOperator::HashLongArrow,
op: BinaryOperator::HashLongArrow,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"{a,b,c}".to_string()
))),
@ -2300,9 +2340,9 @@ fn test_json() {
let sql = "SELECT info FROM orders WHERE info @> '{\"a\": 1}'";
let select = pg().verified_only_select(sql);
assert_eq!(
Expr::JsonAccess {
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("info"))),
operator: JsonOperator::AtArrow,
op: BinaryOperator::AtArrow,
right: Box::new(Expr::Value(Value::SingleQuotedString(
"{\"a\": 1}".to_string()
))),
@ -2313,11 +2353,11 @@ fn test_json() {
let sql = "SELECT info FROM orders WHERE '{\"a\": 1}' <@ info";
let select = pg().verified_only_select(sql);
assert_eq!(
Expr::JsonAccess {
Expr::BinaryOp {
left: Box::new(Expr::Value(Value::SingleQuotedString(
"{\"a\": 1}".to_string()
))),
operator: JsonOperator::ArrowAt,
op: BinaryOperator::ArrowAt,
right: Box::new(Expr::Identifier(Ident::new("info"))),
},
select.selection.unwrap(),
@ -2326,9 +2366,9 @@ fn test_json() {
let sql = "SELECT info #- ARRAY['a', 'b'] FROM orders";
let select = pg().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::from("info"))),
operator: JsonOperator::HashMinus,
op: BinaryOperator::HashMinus,
right: Box::new(Expr::Array(Array {
elem: vec![
Expr::Value(Value::SingleQuotedString("a".to_string())),
@ -2343,9 +2383,9 @@ fn test_json() {
let sql = "SELECT info FROM orders WHERE info @? '$.a'";
let select = pg().verified_only_select(sql);
assert_eq!(
Expr::JsonAccess {
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::from("info"))),
operator: JsonOperator::AtQuestion,
op: BinaryOperator::AtQuestion,
right: Box::new(Expr::Value(Value::SingleQuotedString("$.a".to_string())),),
},
select.selection.unwrap(),
@ -2354,9 +2394,9 @@ fn test_json() {
let sql = "SELECT info FROM orders WHERE info @@ '$.a'";
let select = pg().verified_only_select(sql);
assert_eq!(
Expr::JsonAccess {
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::from("info"))),
operator: JsonOperator::AtAt,
op: BinaryOperator::AtAt,
right: Box::new(Expr::Value(Value::SingleQuotedString("$.a".to_string())),),
},
select.selection.unwrap(),

View file

@ -183,71 +183,167 @@ fn parse_lateral_flatten() {
snowflake().verified_only_select(r#"SELECT emp.employee_ID, emp.last_name, index, value AS project_name FROM employees AS emp, LATERAL FLATTEN(INPUT => emp.project_names) AS proj_names"#);
}
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_json_using_colon() {
fn parse_semi_structured_data_traversal() {
// most basic case
let sql = "SELECT a:b FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("b".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);
let sql = "SELECT a:type FROM t";
// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("type".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);
let sql = "SELECT a:location FROM t";
// expressions are allowed in bracket notation
let sql = r#"SELECT a[2 + 2] FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("location".to_string()))),
}),
select.projection[0]
);
let sql = "SELECT a:date FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("date".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Bracket {
key: Expr::BinaryOp {
left: Box::new(Expr::Value(number("2"))),
op: BinaryOperator::Plus,
right: Box::new(Expr::Value(number("2")))
},
}]
},
}),
select.projection[0]
);
snowflake().verified_stmt("SELECT a:b::INT FROM t");
let sql = "SELECT a:start, a:end FROM t";
// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("start".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("end".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);
// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::Value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with bracket case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a[0].foo.bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Bracket {
key: Expr::Value(number("0")),
},
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
}
#[test]