mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-22 13:42:31 +00:00
Support for Postgres array slice syntax (#1290)
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
parent
80c03f5c6a
commit
afa5f08db9
5 changed files with 356 additions and 61 deletions
|
@ -679,7 +679,7 @@ pub enum Expr {
|
|||
},
|
||||
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
|
||||
/// Note that depending on the dialect, struct like accesses may be
|
||||
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
|
||||
/// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
|
||||
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
|
||||
MapAccess {
|
||||
column: Box<Expr>,
|
||||
|
@ -746,10 +746,10 @@ pub enum Expr {
|
|||
/// ```
|
||||
/// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs
|
||||
Dictionary(Vec<DictionaryField>),
|
||||
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
|
||||
ArrayIndex {
|
||||
obj: Box<Expr>,
|
||||
indexes: Vec<Expr>,
|
||||
/// An access of nested data using subscript syntax, for example `array[2]`.
|
||||
Subscript {
|
||||
expr: Box<Expr>,
|
||||
subscript: Box<Subscript>,
|
||||
},
|
||||
/// An array expression e.g. `ARRAY[1, 2]`
|
||||
Array(Array),
|
||||
|
@ -805,6 +805,68 @@ pub enum Expr {
|
|||
Lambda(LambdaFunction),
|
||||
}
|
||||
|
||||
/// The contents inside the `[` and `]` in a subscript expression.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum Subscript {
|
||||
/// Accesses the element of the array at the given index.
|
||||
Index { index: Expr },
|
||||
|
||||
/// Accesses a slice of an array on PostgreSQL, e.g.
|
||||
///
|
||||
/// ```plaintext
|
||||
/// => select (array[1,2,3,4,5,6])[2:5];
|
||||
/// -----------
|
||||
/// {2,3,4,5}
|
||||
/// ```
|
||||
///
|
||||
/// The lower and/or upper bound can be omitted to slice from the start or
|
||||
/// end of the array respectively.
|
||||
///
|
||||
/// See <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING>.
|
||||
///
|
||||
/// Also supports an optional "stride" as the last element (this is not
|
||||
/// supported by postgres), e.g.
|
||||
///
|
||||
/// ```plaintext
|
||||
/// => select (array[1,2,3,4,5,6])[1:6:2];
|
||||
/// -----------
|
||||
/// {1,3,5}
|
||||
/// ```
|
||||
Slice {
|
||||
lower_bound: Option<Expr>,
|
||||
upper_bound: Option<Expr>,
|
||||
stride: Option<Expr>,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Display for Subscript {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Subscript::Index { index } => write!(f, "{index}"),
|
||||
Subscript::Slice {
|
||||
lower_bound,
|
||||
upper_bound,
|
||||
stride,
|
||||
} => {
|
||||
if let Some(lower) = lower_bound {
|
||||
write!(f, "{lower}")?;
|
||||
}
|
||||
write!(f, ":")?;
|
||||
if let Some(upper) = upper_bound {
|
||||
write!(f, "{upper}")?;
|
||||
}
|
||||
if let Some(stride) = stride {
|
||||
write!(f, ":")?;
|
||||
write!(f, "{stride}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A lambda function.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
@ -1251,12 +1313,11 @@ impl fmt::Display for Expr {
|
|||
Expr::Dictionary(fields) => {
|
||||
write!(f, "{{{}}}", display_comma_separated(fields))
|
||||
}
|
||||
Expr::ArrayIndex { obj, indexes } => {
|
||||
write!(f, "{obj}")?;
|
||||
for i in indexes {
|
||||
write!(f, "[{i}]")?;
|
||||
}
|
||||
Ok(())
|
||||
Expr::Subscript {
|
||||
expr,
|
||||
subscript: key,
|
||||
} => {
|
||||
write!(f, "{expr}[{key}]")
|
||||
}
|
||||
Expr::Array(set) => {
|
||||
write!(f, "{set}")
|
||||
|
|
|
@ -2544,8 +2544,7 @@ impl<'a> Parser<'a> {
|
|||
})
|
||||
} else if Token::LBracket == tok {
|
||||
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
|
||||
// parse index
|
||||
self.parse_array_index(expr)
|
||||
self.parse_subscript(expr)
|
||||
} else if dialect_of!(self is SnowflakeDialect) {
|
||||
self.prev_token();
|
||||
self.parse_json_access(expr)
|
||||
|
@ -2573,18 +2572,87 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_array_index(&mut self, expr: Expr) -> Result<Expr, ParserError> {
|
||||
let index = self.parse_expr()?;
|
||||
self.expect_token(&Token::RBracket)?;
|
||||
let mut indexes: Vec<Expr> = vec![index];
|
||||
while self.consume_token(&Token::LBracket) {
|
||||
let index = self.parse_expr()?;
|
||||
self.expect_token(&Token::RBracket)?;
|
||||
indexes.push(index);
|
||||
/// Parses an array subscript like
|
||||
/// * `[:]`
|
||||
/// * `[l]`
|
||||
/// * `[l:]`
|
||||
/// * `[:u]`
|
||||
/// * `[l:u]`
|
||||
/// * `[l:u:s]`
|
||||
///
|
||||
/// Parser is right after `[`
|
||||
fn parse_subscript_inner(&mut self) -> Result<Subscript, ParserError> {
|
||||
// at either `<lower>:(rest)` or `:(rest)]`
|
||||
let lower_bound = if self.consume_token(&Token::Colon) {
|
||||
None
|
||||
} else {
|
||||
Some(self.parse_expr()?)
|
||||
};
|
||||
|
||||
// check for end
|
||||
if self.consume_token(&Token::RBracket) {
|
||||
if let Some(lower_bound) = lower_bound {
|
||||
return Ok(Subscript::Index { index: lower_bound });
|
||||
};
|
||||
return Ok(Subscript::Slice {
|
||||
lower_bound,
|
||||
upper_bound: None,
|
||||
stride: None,
|
||||
});
|
||||
}
|
||||
Ok(Expr::ArrayIndex {
|
||||
obj: Box::new(expr),
|
||||
indexes,
|
||||
|
||||
// consume the `:`
|
||||
if lower_bound.is_some() {
|
||||
self.expect_token(&Token::Colon)?;
|
||||
}
|
||||
|
||||
// we are now at either `]`, `<upper>(rest)]`
|
||||
let upper_bound = if self.consume_token(&Token::RBracket) {
|
||||
return Ok(Subscript::Slice {
|
||||
lower_bound,
|
||||
upper_bound: None,
|
||||
stride: None,
|
||||
});
|
||||
} else {
|
||||
Some(self.parse_expr()?)
|
||||
};
|
||||
|
||||
// check for end
|
||||
if self.consume_token(&Token::RBracket) {
|
||||
return Ok(Subscript::Slice {
|
||||
lower_bound,
|
||||
upper_bound,
|
||||
stride: None,
|
||||
});
|
||||
}
|
||||
|
||||
// we are now at `:]` or `:stride]`
|
||||
self.expect_token(&Token::Colon)?;
|
||||
let stride = if self.consume_token(&Token::RBracket) {
|
||||
None
|
||||
} else {
|
||||
Some(self.parse_expr()?)
|
||||
};
|
||||
|
||||
if stride.is_some() {
|
||||
self.expect_token(&Token::RBracket)?;
|
||||
}
|
||||
|
||||
Ok(Subscript::Slice {
|
||||
lower_bound,
|
||||
upper_bound,
|
||||
stride,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses an array subscript like `[1:3]`
|
||||
///
|
||||
/// Parser is right after `[`
|
||||
pub fn parse_subscript(&mut self, expr: Expr) -> Result<Expr, ParserError> {
|
||||
let subscript = self.parse_subscript_inner()?;
|
||||
Ok(Expr::Subscript {
|
||||
expr: Box::new(expr),
|
||||
subscript: Box::new(subscript),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -2838,7 +2906,7 @@ impl<'a> Parser<'a> {
|
|||
Ok(Self::MUL_DIV_MOD_OP_PREC)
|
||||
}
|
||||
Token::DoubleColon => Ok(50),
|
||||
Token::Colon => Ok(50),
|
||||
Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
|
||||
Token::ExclamationMark => Ok(50),
|
||||
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
|
||||
Token::Arrow
|
||||
|
|
|
@ -528,8 +528,8 @@ fn test_array_index() {
|
|||
_ => panic!("Expected an expression with alias"),
|
||||
};
|
||||
assert_eq!(
|
||||
&Expr::ArrayIndex {
|
||||
obj: Box::new(Expr::Array(Array {
|
||||
&Expr::Subscript {
|
||||
expr: Box::new(Expr::Array(Array {
|
||||
elem: vec![
|
||||
Expr::Value(Value::SingleQuotedString("a".to_owned())),
|
||||
Expr::Value(Value::SingleQuotedString("b".to_owned())),
|
||||
|
@ -537,7 +537,9 @@ fn test_array_index() {
|
|||
],
|
||||
named: false
|
||||
})),
|
||||
indexes: vec![Expr::Value(number("3"))]
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: Expr::Value(number("3"))
|
||||
})
|
||||
},
|
||||
expr
|
||||
);
|
||||
|
|
|
@ -1873,9 +1873,11 @@ fn parse_array_index_expr() {
|
|||
let sql = "SELECT foo[0] FROM foos";
|
||||
let select = pg_and_generic().verified_only_select(sql);
|
||||
assert_eq!(
|
||||
&Expr::ArrayIndex {
|
||||
obj: Box::new(Expr::Identifier(Ident::new("foo"))),
|
||||
indexes: vec![num[0].clone()],
|
||||
&Expr::Subscript {
|
||||
expr: Box::new(Expr::Identifier(Ident::new("foo"))),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: num[0].clone()
|
||||
}),
|
||||
},
|
||||
expr_from_projection(only(&select.projection)),
|
||||
);
|
||||
|
@ -1883,9 +1885,16 @@ fn parse_array_index_expr() {
|
|||
let sql = "SELECT foo[0][0] FROM foos";
|
||||
let select = pg_and_generic().verified_only_select(sql);
|
||||
assert_eq!(
|
||||
&Expr::ArrayIndex {
|
||||
obj: Box::new(Expr::Identifier(Ident::new("foo"))),
|
||||
indexes: vec![num[0].clone(), num[0].clone()],
|
||||
&Expr::Subscript {
|
||||
expr: Box::new(Expr::Subscript {
|
||||
expr: Box::new(Expr::Identifier(Ident::new("foo"))),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: num[0].clone()
|
||||
}),
|
||||
}),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: num[0].clone()
|
||||
}),
|
||||
},
|
||||
expr_from_projection(only(&select.projection)),
|
||||
);
|
||||
|
@ -1893,19 +1902,27 @@ fn parse_array_index_expr() {
|
|||
let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#;
|
||||
let select = pg_and_generic().verified_only_select(sql);
|
||||
assert_eq!(
|
||||
&Expr::ArrayIndex {
|
||||
obj: Box::new(Expr::Identifier(Ident::new("bar"))),
|
||||
indexes: vec![
|
||||
num[0].clone(),
|
||||
Expr::Identifier(Ident {
|
||||
value: "baz".to_string(),
|
||||
quote_style: Some('"')
|
||||
&Expr::Subscript {
|
||||
expr: Box::new(Expr::Subscript {
|
||||
expr: Box::new(Expr::Subscript {
|
||||
expr: Box::new(Expr::Identifier(Ident::new("bar"))),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: num[0].clone()
|
||||
})
|
||||
}),
|
||||
Expr::Identifier(Ident {
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: Expr::Identifier(Ident {
|
||||
value: "baz".to_string(),
|
||||
quote_style: Some('"')
|
||||
})
|
||||
})
|
||||
}),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: Expr::Identifier(Ident {
|
||||
value: "fooz".to_string(),
|
||||
quote_style: Some('"')
|
||||
})
|
||||
],
|
||||
})
|
||||
},
|
||||
expr_from_projection(only(&select.projection)),
|
||||
);
|
||||
|
@ -1913,26 +1930,33 @@ fn parse_array_index_expr() {
|
|||
let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]";
|
||||
let select = pg_and_generic().verified_only_select(sql);
|
||||
assert_eq!(
|
||||
&Expr::ArrayIndex {
|
||||
obj: Box::new(Expr::Nested(Box::new(Expr::Cast {
|
||||
kind: CastKind::Cast,
|
||||
expr: Box::new(Expr::Array(Array {
|
||||
elem: vec![Expr::Array(Array {
|
||||
elem: vec![num[2].clone(), num[3].clone(),],
|
||||
&Expr::Subscript {
|
||||
expr: Box::new(Expr::Subscript {
|
||||
expr: Box::new(Expr::Nested(Box::new(Expr::Cast {
|
||||
kind: CastKind::Cast,
|
||||
expr: Box::new(Expr::Array(Array {
|
||||
elem: vec![Expr::Array(Array {
|
||||
elem: vec![num[2].clone(), num[3].clone(),],
|
||||
named: true,
|
||||
})],
|
||||
named: true,
|
||||
})],
|
||||
named: true,
|
||||
})),
|
||||
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||
Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||
Box::new(DataType::Int(None)),
|
||||
})),
|
||||
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||
Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||
Box::new(DataType::Int(None)),
|
||||
None
|
||||
))),
|
||||
None
|
||||
))),
|
||||
None
|
||||
)),
|
||||
format: None,
|
||||
}))),
|
||||
indexes: vec![num[1].clone(), num[2].clone()],
|
||||
)),
|
||||
format: None,
|
||||
}))),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: num[1].clone()
|
||||
}),
|
||||
}),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: num[2].clone()
|
||||
}),
|
||||
},
|
||||
expr_from_projection(only(&select.projection)),
|
||||
);
|
||||
|
@ -1948,6 +1972,116 @@ fn parse_array_index_expr() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_array_subscript() {
|
||||
let tests = [
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[2]",
|
||||
Subscript::Index {
|
||||
index: Expr::Value(number("2")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[foo]",
|
||||
Subscript::Index {
|
||||
index: Expr::Identifier(Ident::new("foo")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[2:5]",
|
||||
Subscript::Slice {
|
||||
lower_bound: Some(Expr::Value(number("2"))),
|
||||
upper_bound: Some(Expr::Value(number("5"))),
|
||||
stride: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[2:5:3]",
|
||||
Subscript::Slice {
|
||||
lower_bound: Some(Expr::Value(number("2"))),
|
||||
upper_bound: Some(Expr::Value(number("5"))),
|
||||
stride: Some(Expr::Value(number("3"))),
|
||||
},
|
||||
),
|
||||
(
|
||||
"arr[array_length(arr) - 3:array_length(arr) - 1]",
|
||||
Subscript::Slice {
|
||||
lower_bound: Some(Expr::BinaryOp {
|
||||
left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])),
|
||||
op: BinaryOperator::Minus,
|
||||
right: Box::new(Expr::Value(number("3"))),
|
||||
}),
|
||||
upper_bound: Some(Expr::BinaryOp {
|
||||
left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])),
|
||||
op: BinaryOperator::Minus,
|
||||
right: Box::new(Expr::Value(number("1"))),
|
||||
}),
|
||||
stride: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[:5]",
|
||||
Subscript::Slice {
|
||||
lower_bound: None,
|
||||
upper_bound: Some(Expr::Value(number("5"))),
|
||||
stride: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[2:]",
|
||||
Subscript::Slice {
|
||||
lower_bound: Some(Expr::Value(number("2"))),
|
||||
upper_bound: None,
|
||||
stride: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"(ARRAY[1, 2, 3, 4, 5, 6])[:]",
|
||||
Subscript::Slice {
|
||||
lower_bound: None,
|
||||
upper_bound: None,
|
||||
stride: None,
|
||||
},
|
||||
),
|
||||
];
|
||||
for (sql, expect) in tests {
|
||||
let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else {
|
||||
panic!("expected subscript expr");
|
||||
};
|
||||
assert_eq!(expect, *subscript);
|
||||
}
|
||||
|
||||
pg_and_generic().verified_expr("schedule[:2][2:]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_array_multi_subscript() {
|
||||
let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]");
|
||||
assert_eq!(
|
||||
Expr::Subscript {
|
||||
expr: Box::new(Expr::Subscript {
|
||||
expr: Box::new(call(
|
||||
"make_array",
|
||||
vec![
|
||||
Expr::Value(number("1")),
|
||||
Expr::Value(number("2")),
|
||||
Expr::Value(number("3"))
|
||||
]
|
||||
)),
|
||||
subscript: Box::new(Subscript::Slice {
|
||||
lower_bound: Some(Expr::Value(number("1"))),
|
||||
upper_bound: Some(Expr::Value(number("2"))),
|
||||
stride: None,
|
||||
}),
|
||||
}),
|
||||
subscript: Box::new(Subscript::Index {
|
||||
index: Expr::Value(number("2")),
|
||||
}),
|
||||
},
|
||||
expr,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_index() {
|
||||
let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)";
|
||||
|
|
|
@ -394,6 +394,36 @@ fn parse_semi_structured_data_traversal() {
|
|||
})],
|
||||
select.projection
|
||||
);
|
||||
|
||||
// a json access used as a key to another json access
|
||||
assert_eq!(
|
||||
snowflake().verified_expr("a[b:c]"),
|
||||
Expr::JsonAccess {
|
||||
value: Box::new(Expr::Identifier(Ident::new("a"))),
|
||||
path: JsonPath {
|
||||
path: vec![JsonPathElem::Bracket {
|
||||
key: Expr::JsonAccess {
|
||||
value: Box::new(Expr::Identifier(Ident::new("b"))),
|
||||
path: JsonPath {
|
||||
path: vec![JsonPathElem::Dot {
|
||||
key: "c".to_owned(),
|
||||
quoted: false
|
||||
}]
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// unquoted object keys cannot start with a digit
|
||||
assert_eq!(
|
||||
snowflake()
|
||||
.parse_sql_statements("SELECT a:42")
|
||||
.unwrap_err()
|
||||
.to_string(),
|
||||
"sql parser error: Expected variant object key name, found: 42"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue