Support for Postgres array slice syntax (#1290)

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
2025-10-31 07:14:20 +00:00 · 2024-05-31 14:38:35 -07:00 · 2024-05-31 14:38:35 -07:00 · afa5f08db9
commit afa5f08db9
parent 80c03f5c6a
5 changed files with 356 additions and 61 deletions
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@ -679,7 +679,7 @@ pub enum Expr {
    },
    /// Access a map-like object by field (e.g. `column['field']` or `column[4]`
    /// Note that depending on the dialect, struct like accesses may be
-    /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
+    /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
    /// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
    MapAccess {
        column: Box<Expr>,
@ -746,10 +746,10 @@ pub enum Expr {
    /// ```
    /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs
    Dictionary(Vec<DictionaryField>),
-    /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
-    ArrayIndex {
-        obj: Box<Expr>,
-        indexes: Vec<Expr>,
+    /// An access of nested data using subscript syntax, for example `array[2]`.
+    Subscript {
+        expr: Box<Expr>,
+        subscript: Box<Subscript>,
    },
    /// An array expression e.g. `ARRAY[1, 2]`
    Array(Array),
@ -805,6 +805,68 @@ pub enum Expr {
    Lambda(LambdaFunction),
 }

+/// The contents inside the `[` and `]` in a subscript expression.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum Subscript {
+    /// Accesses the element of the array at the given index.
+    Index { index: Expr },
+
+    /// Accesses a slice of an array on PostgreSQL, e.g.
+    ///
+    /// ```plaintext
+    /// => select (array[1,2,3,4,5,6])[2:5];
+    /// -----------
+    /// {2,3,4,5}
+    /// ```
+    ///
+    /// The lower and/or upper bound can be omitted to slice from the start or
+    /// end of the array respectively.
+    ///
+    /// See <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING>.
+    ///
+    /// Also supports an optional "stride" as the last element (this is not
+    /// supported by postgres), e.g.
+    ///
+    /// ```plaintext
+    /// => select (array[1,2,3,4,5,6])[1:6:2];
+    /// -----------
+    /// {1,3,5}
+    /// ```
+    Slice {
+        lower_bound: Option<Expr>,
+        upper_bound: Option<Expr>,
+        stride: Option<Expr>,
+    },
+}
+
+impl fmt::Display for Subscript {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Subscript::Index { index } => write!(f, "{index}"),
+            Subscript::Slice {
+                lower_bound,
+                upper_bound,
+                stride,
+            } => {
+                if let Some(lower) = lower_bound {
+                    write!(f, "{lower}")?;
+                }
+                write!(f, ":")?;
+                if let Some(upper) = upper_bound {
+                    write!(f, "{upper}")?;
+                }
+                if let Some(stride) = stride {
+                    write!(f, ":")?;
+                    write!(f, "{stride}")?;
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
 /// A lambda function.
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -1251,12 +1313,11 @@ impl fmt::Display for Expr {
            Expr::Dictionary(fields) => {
                write!(f, "{{{}}}", display_comma_separated(fields))
            }
-            Expr::ArrayIndex { obj, indexes } => {
-                write!(f, "{obj}")?;
-                for i in indexes {
-                    write!(f, "[{i}]")?;
-                }
-                Ok(())
+            Expr::Subscript {
+                expr,
+                subscript: key,
+            } => {
+                write!(f, "{expr}[{key}]")
            }
            Expr::Array(set) => {
                write!(f, "{set}")
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -2544,8 +2544,7 @@ impl<'a> Parser<'a> {
            })
        } else if Token::LBracket == tok {
            if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
-                // parse index
-                self.parse_array_index(expr)
+                self.parse_subscript(expr)
            } else if dialect_of!(self is SnowflakeDialect) {
                self.prev_token();
                self.parse_json_access(expr)
@ -2573,18 +2572,87 @@ impl<'a> Parser<'a> {
        }
    }

-    pub fn parse_array_index(&mut self, expr: Expr) -> Result<Expr, ParserError> {
-        let index = self.parse_expr()?;
-        self.expect_token(&Token::RBracket)?;
-        let mut indexes: Vec<Expr> = vec![index];
-        while self.consume_token(&Token::LBracket) {
-            let index = self.parse_expr()?;
-            self.expect_token(&Token::RBracket)?;
-            indexes.push(index);
+    /// Parses an array subscript like
+    /// * `[:]`
+    /// * `[l]`
+    /// * `[l:]`
+    /// * `[:u]`
+    /// * `[l:u]`
+    /// * `[l:u:s]`
+    ///
+    /// Parser is right after `[`
+    fn parse_subscript_inner(&mut self) -> Result<Subscript, ParserError> {
+        // at either `<lower>:(rest)` or `:(rest)]`
+        let lower_bound = if self.consume_token(&Token::Colon) {
+            None
+        } else {
+            Some(self.parse_expr()?)
+        };
+
+        // check for end
+        if self.consume_token(&Token::RBracket) {
+            if let Some(lower_bound) = lower_bound {
+                return Ok(Subscript::Index { index: lower_bound });
+            };
+            return Ok(Subscript::Slice {
+                lower_bound,
+                upper_bound: None,
+                stride: None,
+            });
        }
-        Ok(Expr::ArrayIndex {
-            obj: Box::new(expr),
-            indexes,
+
+        // consume the `:`
+        if lower_bound.is_some() {
+            self.expect_token(&Token::Colon)?;
+        }
+
+        // we are now at either `]`, `<upper>(rest)]`
+        let upper_bound = if self.consume_token(&Token::RBracket) {
+            return Ok(Subscript::Slice {
+                lower_bound,
+                upper_bound: None,
+                stride: None,
+            });
+        } else {
+            Some(self.parse_expr()?)
+        };
+
+        // check for end
+        if self.consume_token(&Token::RBracket) {
+            return Ok(Subscript::Slice {
+                lower_bound,
+                upper_bound,
+                stride: None,
+            });
+        }
+
+        // we are now at `:]` or `:stride]`
+        self.expect_token(&Token::Colon)?;
+        let stride = if self.consume_token(&Token::RBracket) {
+            None
+        } else {
+            Some(self.parse_expr()?)
+        };
+
+        if stride.is_some() {
+            self.expect_token(&Token::RBracket)?;
+        }
+
+        Ok(Subscript::Slice {
+            lower_bound,
+            upper_bound,
+            stride,
+        })
+    }
+
+    /// Parses an array subscript like `[1:3]`
+    ///
+    /// Parser is right after `[`
+    pub fn parse_subscript(&mut self, expr: Expr) -> Result<Expr, ParserError> {
+        let subscript = self.parse_subscript_inner()?;
+        Ok(Expr::Subscript {
+            expr: Box::new(expr),
+            subscript: Box::new(subscript),
        })
    }

@ -2838,7 +2906,7 @@ impl<'a> Parser<'a> {
                Ok(Self::MUL_DIV_MOD_OP_PREC)
            }
            Token::DoubleColon => Ok(50),
-            Token::Colon => Ok(50),
+            Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
            Token::ExclamationMark => Ok(50),
            Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
            Token::Arrow