Improve parsing of JSON accesses on Postgres and Snowflake (#1215)

Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
Joey Hain 2024-04-30 07:49:05 -07:00 committed by GitHub
parent 0606024353
commit 4bfa399919
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 432 additions and 199 deletions

View file

@ -183,71 +183,167 @@ fn parse_lateral_flatten() {
snowflake().verified_only_select(r#"SELECT emp.employee_ID, emp.last_name, index, value AS project_name FROM employees AS emp, LATERAL FLATTEN(INPUT => emp.project_names) AS proj_names"#);
}
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_json_using_colon() {
fn parse_semi_structured_data_traversal() {
// most basic case
let sql = "SELECT a:b FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("b".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);
let sql = "SELECT a:type FROM t";
// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("type".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);
let sql = "SELECT a:location FROM t";
// expressions are allowed in bracket notation
let sql = r#"SELECT a[2 + 2] FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("location".to_string()))),
}),
select.projection[0]
);
let sql = "SELECT a:date FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("date".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Bracket {
key: Expr::BinaryOp {
left: Box::new(Expr::Value(number("2"))),
op: BinaryOperator::Plus,
right: Box::new(Expr::Value(number("2")))
},
}]
},
}),
select.projection[0]
);
snowflake().verified_stmt("SELECT a:b::INT FROM t");
let sql = "SELECT a:start, a:end FROM t";
// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("start".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
left: Box::new(Expr::Identifier(Ident::new("a"))),
operator: JsonOperator::Colon,
right: Box::new(Expr::Value(Value::UnQuotedString("end".to_string()))),
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);
// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::Value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with bracket case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a[0].foo.bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Bracket {
key: Expr::Value(number("0")),
},
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
}
#[test]