This commit is contained in:
Samyak Sarnayak 2025-12-20 23:06:40 +05:30 committed by GitHub
commit a13a117fa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 151 additions and 116 deletions

View file

@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
Ok(p!(DoubleColon))
}
Token::Colon => match parser.peek_nth_token(1).token {
// When colon is followed by a string or a number, it's usually in MAP syntax.
Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()),
// In other cases, it's used in semi-structured data traversal like in variant or JSON
// string columns. See `JsonAccess`.
_ => Ok(p!(Colon)),
},
Token::Arrow
| Token::LongArrow
| Token::HashArrow
@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
Precedence::Ampersand => 23,
Precedence::Caret => 22,
Precedence::Pipe => 21,
Precedence::Colon => 21,
Precedence::Between => 20,
Precedence::Eq => 20,
Precedence::Like => 19,
@ -1232,6 +1240,7 @@ pub enum Precedence {
Ampersand,
Caret,
Pipe,
Colon,
Between,
Eq,
Like,

View file

@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
None
}
}
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
let token = parser.peek_token();
match token.token {
// lowest prec to prevent it from turning into a binary op
Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}
}
impl MsSqlDialect {

View file

@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
| Token::ShiftRight
| Token::ShiftLeft
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
// lowest prec to prevent it from turning into a binary op
Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}

View file

@ -3847,7 +3847,8 @@ impl<'a> Parser<'a> {
let lower_bound = if self.consume_token(&Token::Colon) {
None
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};
// check for end
@ -3875,7 +3876,8 @@ impl<'a> Parser<'a> {
stride: None,
});
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};
// check for end
@ -3892,7 +3894,8 @@ impl<'a> Parser<'a> {
let stride = if self.consume_token(&Token::RBracket) {
None
} else {
Some(self.parse_expr()?)
// parse expr until we hit a colon (or any token with lower precedence)
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};
if stride.is_some() {

View file

@ -17972,3 +17972,126 @@ fn parse_select_parenthesized_wildcard() {
assert_eq!(select2.projection.len(), 1);
assert!(matches!(select2.projection[0], SelectItem::Wildcard(_)));
}
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_semi_structured_data_traversal() {
let dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(SnowflakeDialect {}),
]);
// most basic case
let sql = "SELECT a:b FROM t";
let select = dialects.verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);
// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);
dialects.verified_stmt("SELECT a:b::INT FROM t");
// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);
// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
}

View file

@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_semi_structured_data_traversal() {
// most basic case
let sql = "SELECT a:b FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "b".to_owned(),
quoted: false
}]
},
}),
select.projection[0]
);
// identifier can be quoted
let sql = r#"SELECT a:"my long object key name" FROM t"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "my long object key name".to_owned(),
quoted: true
}]
},
}),
select.projection[0]
);
// see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test
// cases. This test only has Snowflake-specific syntax like array access.
// expressions are allowed in bracket notation
let sql = r#"SELECT a[2 + 2] FROM t"#;
@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
select.projection[0]
);
snowflake().verified_stmt("SELECT a:b::INT FROM t");
// unquoted keywords are permitted in the object key
let sql = "SELECT a:select, a:from FROM t";
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "select".to_owned(),
quoted: false
}]
},
}),
SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "from".to_owned(),
quoted: false
}]
},
})
],
select.projection
);
// multiple levels can be traversed
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo."bar".baz"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: true,
},
JsonPathElem::Dot {
key: "baz".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with : case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a:foo[0].bar"#;
let select = snowflake().verified_only_select(sql);
assert_eq!(
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("a"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "foo".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
JsonPathElem::Dot {
key: "bar".to_owned(),
quoted: false,
}
]
},
})],
select.projection
);
// dot and bracket notation can be mixed (starting with bracket case)
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a[0].foo.bar"#;