mirror of
				https://github.com/apache/datafusion-sqlparser-rs.git
				synced 2025-10-31 07:14:20 +00:00 
			
		
		
		
	Support for Postgres array slice syntax (#1290)
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
		
							parent
							
								
									80c03f5c6a
								
							
						
					
					
						commit
						afa5f08db9
					
				
					 5 changed files with 356 additions and 61 deletions
				
			
		|  | @ -679,7 +679,7 @@ pub enum Expr { | |||
|     }, | ||||
|     /// Access a map-like object by field (e.g. `column['field']` or `column[4]`
 | ||||
|     /// Note that depending on the dialect, struct like accesses may be
 | ||||
|     /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
 | ||||
|     /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
 | ||||
|     /// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
 | ||||
|     MapAccess { | ||||
|         column: Box<Expr>, | ||||
|  | @ -746,10 +746,10 @@ pub enum Expr { | |||
|     /// ```
 | ||||
|     /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs
 | ||||
|     Dictionary(Vec<DictionaryField>), | ||||
|     /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
 | ||||
|     ArrayIndex { | ||||
|         obj: Box<Expr>, | ||||
|         indexes: Vec<Expr>, | ||||
|     /// An access of nested data using subscript syntax, for example `array[2]`.
 | ||||
|     Subscript { | ||||
|         expr: Box<Expr>, | ||||
|         subscript: Box<Subscript>, | ||||
|     }, | ||||
|     /// An array expression e.g. `ARRAY[1, 2]`
 | ||||
|     Array(Array), | ||||
|  | @ -805,6 +805,68 @@ pub enum Expr { | |||
|     Lambda(LambdaFunction), | ||||
| } | ||||
| 
 | ||||
| /// The contents inside the `[` and `]` in a subscript expression.
 | ||||
| #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||||
| pub enum Subscript { | ||||
|     /// Accesses the element of the array at the given index.
 | ||||
|     Index { index: Expr }, | ||||
| 
 | ||||
|     /// Accesses a slice of an array on PostgreSQL, e.g.
 | ||||
|     ///
 | ||||
|     /// ```plaintext
 | ||||
|     /// => select (array[1,2,3,4,5,6])[2:5];
 | ||||
|     /// -----------
 | ||||
|     /// {2,3,4,5}
 | ||||
|     /// ```
 | ||||
|     ///
 | ||||
|     /// The lower and/or upper bound can be omitted to slice from the start or
 | ||||
|     /// end of the array respectively.
 | ||||
|     ///
 | ||||
|     /// See <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING>.
 | ||||
|     ///
 | ||||
|     /// Also supports an optional "stride" as the last element (this is not
 | ||||
|     /// supported by postgres), e.g.
 | ||||
|     ///
 | ||||
|     /// ```plaintext
 | ||||
|     /// => select (array[1,2,3,4,5,6])[1:6:2];
 | ||||
|     /// -----------
 | ||||
|     /// {1,3,5}
 | ||||
|     /// ```
 | ||||
|     Slice { | ||||
|         lower_bound: Option<Expr>, | ||||
|         upper_bound: Option<Expr>, | ||||
|         stride: Option<Expr>, | ||||
|     }, | ||||
| } | ||||
| 
 | ||||
| impl fmt::Display for Subscript { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         match self { | ||||
|             Subscript::Index { index } => write!(f, "{index}"), | ||||
|             Subscript::Slice { | ||||
|                 lower_bound, | ||||
|                 upper_bound, | ||||
|                 stride, | ||||
|             } => { | ||||
|                 if let Some(lower) = lower_bound { | ||||
|                     write!(f, "{lower}")?; | ||||
|                 } | ||||
|                 write!(f, ":")?; | ||||
|                 if let Some(upper) = upper_bound { | ||||
|                     write!(f, "{upper}")?; | ||||
|                 } | ||||
|                 if let Some(stride) = stride { | ||||
|                     write!(f, ":")?; | ||||
|                     write!(f, "{stride}")?; | ||||
|                 } | ||||
|                 Ok(()) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// A lambda function.
 | ||||
| #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||
|  | @ -1251,12 +1313,11 @@ impl fmt::Display for Expr { | |||
|             Expr::Dictionary(fields) => { | ||||
|                 write!(f, "{{{}}}", display_comma_separated(fields)) | ||||
|             } | ||||
|             Expr::ArrayIndex { obj, indexes } => { | ||||
|                 write!(f, "{obj}")?; | ||||
|                 for i in indexes { | ||||
|                     write!(f, "[{i}]")?; | ||||
|                 } | ||||
|                 Ok(()) | ||||
|             Expr::Subscript { | ||||
|                 expr, | ||||
|                 subscript: key, | ||||
|             } => { | ||||
|                 write!(f, "{expr}[{key}]") | ||||
|             } | ||||
|             Expr::Array(set) => { | ||||
|                 write!(f, "{set}") | ||||
|  |  | |||
|  | @ -2544,8 +2544,7 @@ impl<'a> Parser<'a> { | |||
|             }) | ||||
|         } else if Token::LBracket == tok { | ||||
|             if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { | ||||
|                 // parse index
 | ||||
|                 self.parse_array_index(expr) | ||||
|                 self.parse_subscript(expr) | ||||
|             } else if dialect_of!(self is SnowflakeDialect) { | ||||
|                 self.prev_token(); | ||||
|                 self.parse_json_access(expr) | ||||
|  | @ -2573,18 +2572,87 @@ impl<'a> Parser<'a> { | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn parse_array_index(&mut self, expr: Expr) -> Result<Expr, ParserError> { | ||||
|         let index = self.parse_expr()?; | ||||
|         self.expect_token(&Token::RBracket)?; | ||||
|         let mut indexes: Vec<Expr> = vec![index]; | ||||
|         while self.consume_token(&Token::LBracket) { | ||||
|             let index = self.parse_expr()?; | ||||
|             self.expect_token(&Token::RBracket)?; | ||||
|             indexes.push(index); | ||||
|     /// Parses an array subscript like
 | ||||
|     /// * `[:]`
 | ||||
|     /// * `[l]`
 | ||||
|     /// * `[l:]`
 | ||||
|     /// * `[:u]`
 | ||||
|     /// * `[l:u]`
 | ||||
|     /// * `[l:u:s]`
 | ||||
|     ///
 | ||||
|     /// Parser is right after `[`
 | ||||
|     fn parse_subscript_inner(&mut self) -> Result<Subscript, ParserError> { | ||||
|         // at either `<lower>:(rest)` or `:(rest)]`
 | ||||
|         let lower_bound = if self.consume_token(&Token::Colon) { | ||||
|             None | ||||
|         } else { | ||||
|             Some(self.parse_expr()?) | ||||
|         }; | ||||
| 
 | ||||
|         // check for end
 | ||||
|         if self.consume_token(&Token::RBracket) { | ||||
|             if let Some(lower_bound) = lower_bound { | ||||
|                 return Ok(Subscript::Index { index: lower_bound }); | ||||
|             }; | ||||
|             return Ok(Subscript::Slice { | ||||
|                 lower_bound, | ||||
|                 upper_bound: None, | ||||
|                 stride: None, | ||||
|             }); | ||||
|         } | ||||
|         Ok(Expr::ArrayIndex { | ||||
|             obj: Box::new(expr), | ||||
|             indexes, | ||||
| 
 | ||||
|         // consume the `:`
 | ||||
|         if lower_bound.is_some() { | ||||
|             self.expect_token(&Token::Colon)?; | ||||
|         } | ||||
| 
 | ||||
|         // we are now at either `]`, `<upper>(rest)]`
 | ||||
|         let upper_bound = if self.consume_token(&Token::RBracket) { | ||||
|             return Ok(Subscript::Slice { | ||||
|                 lower_bound, | ||||
|                 upper_bound: None, | ||||
|                 stride: None, | ||||
|             }); | ||||
|         } else { | ||||
|             Some(self.parse_expr()?) | ||||
|         }; | ||||
| 
 | ||||
|         // check for end
 | ||||
|         if self.consume_token(&Token::RBracket) { | ||||
|             return Ok(Subscript::Slice { | ||||
|                 lower_bound, | ||||
|                 upper_bound, | ||||
|                 stride: None, | ||||
|             }); | ||||
|         } | ||||
| 
 | ||||
|         // we are now at `:]` or `:stride]`
 | ||||
|         self.expect_token(&Token::Colon)?; | ||||
|         let stride = if self.consume_token(&Token::RBracket) { | ||||
|             None | ||||
|         } else { | ||||
|             Some(self.parse_expr()?) | ||||
|         }; | ||||
| 
 | ||||
|         if stride.is_some() { | ||||
|             self.expect_token(&Token::RBracket)?; | ||||
|         } | ||||
| 
 | ||||
|         Ok(Subscript::Slice { | ||||
|             lower_bound, | ||||
|             upper_bound, | ||||
|             stride, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     /// Parses an array subscript like `[1:3]`
 | ||||
|     ///
 | ||||
|     /// Parser is right after `[`
 | ||||
|     pub fn parse_subscript(&mut self, expr: Expr) -> Result<Expr, ParserError> { | ||||
|         let subscript = self.parse_subscript_inner()?; | ||||
|         Ok(Expr::Subscript { | ||||
|             expr: Box::new(expr), | ||||
|             subscript: Box::new(subscript), | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|  | @ -2838,7 +2906,7 @@ impl<'a> Parser<'a> { | |||
|                 Ok(Self::MUL_DIV_MOD_OP_PREC) | ||||
|             } | ||||
|             Token::DoubleColon => Ok(50), | ||||
|             Token::Colon => Ok(50), | ||||
|             Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), | ||||
|             Token::ExclamationMark => Ok(50), | ||||
|             Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), | ||||
|             Token::Arrow | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Joey Hain
						Joey Hain