Extend support for INDEX parsing (#1707)

Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
Luca Cappelletti 2025-03-04 06:59:39 +01:00 committed by GitHub
parent d5dbe86da9
commit 6ec5223f50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 404 additions and 60 deletions

View file

@ -1174,13 +1174,20 @@ impl fmt::Display for KeyOrIndexDisplay {
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/create-table.html
/// [2]: https://dev.mysql.com/doc/refman/8.0/en/create-index.html
/// [3]: https://www.postgresql.org/docs/14/sql-createindex.html
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum IndexType {
BTree,
Hash,
// TODO add Postgresql's possible indexes
GIN,
GiST,
SPGiST,
BRIN,
Bloom,
/// Users may define their own index types, which would
/// not be covered by the above variants.
Custom(Ident),
}
impl fmt::Display for IndexType {
@ -1188,6 +1195,12 @@ impl fmt::Display for IndexType {
match self {
Self::BTree => write!(f, "BTREE"),
Self::Hash => write!(f, "HASH"),
Self::GIN => write!(f, "GIN"),
Self::GiST => write!(f, "GIST"),
Self::SPGiST => write!(f, "SPGIST"),
Self::BRIN => write!(f, "BRIN"),
Self::Bloom => write!(f, "BLOOM"),
Self::Custom(name) => write!(f, "{}", name),
}
}
}

View file

@ -34,12 +34,31 @@ pub use super::ddl::{ColumnDef, TableConstraint};
use super::{
display_comma_separated, display_separated, query::InputFormatClause, Assignment, ClusteredBy,
CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat,
HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert,
OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption,
SqliteOnConflict, StorageSerializationPolicy, TableEngine, TableObject, TableWithJoins, Tag,
WrappedCollection,
HiveRowFormat, Ident, IndexType, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit,
OnInsert, OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting,
SqlOption, SqliteOnConflict, StorageSerializationPolicy, TableEngine, TableObject,
TableWithJoins, Tag, WrappedCollection,
};
/// Index column type.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct IndexColumn {
pub column: OrderByExpr,
pub operator_class: Option<Ident>,
}
impl Display for IndexColumn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.column)?;
if let Some(operator_class) = &self.operator_class {
write!(f, " {}", operator_class)?;
}
Ok(())
}
}
/// CREATE INDEX statement.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -49,8 +68,8 @@ pub struct CreateIndex {
pub name: Option<ObjectName>,
#[cfg_attr(feature = "visitor", visit(with = "visit_relation"))]
pub table_name: ObjectName,
pub using: Option<Ident>,
pub columns: Vec<OrderByExpr>,
pub using: Option<IndexType>,
pub columns: Vec<IndexColumn>,
pub unique: bool,
pub concurrently: bool,
pub if_not_exists: bool,

View file

@ -58,7 +58,7 @@ pub use self::ddl::{
ReferentialAction, TableConstraint, TagsColumnOption, UserDefinedTypeCompositeAttributeDef,
UserDefinedTypeRepresentation, ViewColumnDef,
};
pub use self::dml::{CreateIndex, CreateTable, Delete, Insert};
pub use self::dml::{CreateIndex, CreateTable, Delete, IndexColumn, Insert};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
@ -91,6 +91,7 @@ pub use self::value::{
use crate::ast::helpers::key_value_options::KeyValueOptions;
use crate::ast::helpers::stmt_data_loading::{StageLoadSelectItem, StageParamsObject};
#[cfg(feature = "visitor")]
pub use visitor::*;

View file

@ -704,7 +704,7 @@ impl Spanned for CreateIndex {
let CreateIndex {
name,
table_name,
using,
using: _,
columns,
unique: _, // bool
concurrently: _, // bool
@ -719,8 +719,7 @@ impl Spanned for CreateIndex {
name.iter()
.map(|i| i.span())
.chain(core::iter::once(table_name.span()))
.chain(using.iter().map(|i| i.span))
.chain(columns.iter().map(|i| i.span()))
.chain(columns.iter().map(|i| i.column.span()))
.chain(include.iter().map(|i| i.span))
.chain(with.iter().map(|i| i.span()))
.chain(predicate.iter().map(|i| i.span())),

View file

@ -137,11 +137,13 @@ define_keywords!(
BIT,
BLOB,
BLOCK,
BLOOM,
BLOOMFILTER,
BOOL,
BOOLEAN,
BOTH,
BOX,
BRIN,
BROWSE,
BTREE,
BUCKET,
@ -386,6 +388,8 @@ define_keywords!(
GENERATED,
GEOGRAPHY,
GET,
GIN,
GIST,
GLOBAL,
GRANT,
GRANTED,
@ -805,6 +809,7 @@ define_keywords!(
SPATIAL,
SPECIFIC,
SPECIFICTYPE,
SPGIST,
SQL,
SQLEXCEPTION,
SQLSTATE,

View file

@ -3955,6 +3955,18 @@ impl<'a> Parser<'a> {
true
}
/// If the current token is one of the given `keywords`, returns the keyword
/// that matches, without consuming the token. Otherwise, returns [`None`].
#[must_use]
pub fn peek_one_of_keywords(&self, keywords: &[Keyword]) -> Option<Keyword> {
for keyword in keywords {
if self.peek_keyword(*keyword) {
return Some(*keyword);
}
}
None
}
/// If the current token is one of the given `keywords`, consume the token
/// and return the keyword that matches. Otherwise, no tokens are consumed
/// and returns [`None`].
@ -6406,12 +6418,13 @@ impl<'a> Parser<'a> {
};
let table_name = self.parse_object_name(false)?;
let using = if self.parse_keyword(Keyword::USING) {
Some(self.parse_identifier()?)
Some(self.parse_index_type()?)
} else {
None
};
self.expect_token(&Token::LParen)?;
let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?;
let columns = self.parse_comma_separated(Parser::parse_create_index_expr)?;
self.expect_token(&Token::RParen)?;
let include = if self.parse_keyword(Keyword::INCLUDE) {
@ -7629,16 +7642,30 @@ impl<'a> Parser<'a> {
}
pub fn parse_index_type(&mut self) -> Result<IndexType, ParserError> {
if self.parse_keyword(Keyword::BTREE) {
Ok(IndexType::BTree)
Ok(if self.parse_keyword(Keyword::BTREE) {
IndexType::BTree
} else if self.parse_keyword(Keyword::HASH) {
Ok(IndexType::Hash)
IndexType::Hash
} else if self.parse_keyword(Keyword::GIN) {
IndexType::GIN
} else if self.parse_keyword(Keyword::GIST) {
IndexType::GiST
} else if self.parse_keyword(Keyword::SPGIST) {
IndexType::SPGiST
} else if self.parse_keyword(Keyword::BRIN) {
IndexType::BRIN
} else if self.parse_keyword(Keyword::BLOOM) {
IndexType::Bloom
} else {
self.expected("index type {BTREE | HASH}", self.peek_token())
}
IndexType::Custom(self.parse_identifier()?)
})
}
/// Parse [USING {BTREE | HASH}]
/// Optionally parse the `USING` keyword, followed by an [IndexType]
/// Example:
/// ```sql
//// USING BTREE (name, age DESC)
/// ```
pub fn parse_optional_using_then_index_type(
&mut self,
) -> Result<Option<IndexType>, ParserError> {
@ -13631,10 +13658,42 @@ impl<'a> Parser<'a> {
}
}
/// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
/// Parse an [OrderByExpr] expression.
pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
self.parse_order_by_expr_inner(false)
.map(|(order_by, _)| order_by)
}
/// Parse an [IndexColumn].
pub fn parse_create_index_expr(&mut self) -> Result<IndexColumn, ParserError> {
self.parse_order_by_expr_inner(true)
.map(|(column, operator_class)| IndexColumn {
column,
operator_class,
})
}
fn parse_order_by_expr_inner(
&mut self,
with_operator_class: bool,
) -> Result<(OrderByExpr, Option<Ident>), ParserError> {
let expr = self.parse_expr()?;
let operator_class: Option<Ident> = if with_operator_class {
// We check that if non of the following keywords are present, then we parse an
// identifier as operator class.
if self
.peek_one_of_keywords(&[Keyword::ASC, Keyword::DESC, Keyword::NULLS, Keyword::WITH])
.is_some()
{
None
} else {
self.maybe_parse(|parser| parser.parse_identifier())?
}
} else {
None
};
let options = self.parse_order_by_options()?;
let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect)
@ -13645,11 +13704,14 @@ impl<'a> Parser<'a> {
None
};
Ok(OrderByExpr {
expr,
options,
with_fill,
})
Ok((
OrderByExpr {
expr,
options,
with_fill,
},
operator_class,
))
}
fn parse_order_by_options(&mut self) -> Result<OrderByOptions, ParserError> {

View file

@ -8842,22 +8842,28 @@ fn ensure_multiple_dialects_are_tested() {
#[test]
fn parse_create_index() {
let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test(name,age DESC)";
let indexed_columns = vec![
OrderByExpr {
expr: Expr::Identifier(Ident::new("name")),
options: OrderByOptions {
asc: None,
nulls_first: None,
let indexed_columns: Vec<IndexColumn> = vec![
IndexColumn {
operator_class: None,
column: OrderByExpr {
expr: Expr::Identifier(Ident::new("name")),
with_fill: None,
options: OrderByOptions {
asc: None,
nulls_first: None,
},
},
with_fill: None,
},
OrderByExpr {
expr: Expr::Identifier(Ident::new("age")),
options: OrderByOptions {
asc: Some(false),
nulls_first: None,
IndexColumn {
operator_class: None,
column: OrderByExpr {
expr: Expr::Identifier(Ident::new("age")),
with_fill: None,
options: OrderByOptions {
asc: Some(false),
nulls_first: None,
},
},
with_fill: None,
},
];
match verified_stmt(sql) {
@ -8881,23 +8887,29 @@ fn parse_create_index() {
#[test]
fn test_create_index_with_using_function() {
let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test USING btree (name,age DESC)";
let indexed_columns = vec![
OrderByExpr {
expr: Expr::Identifier(Ident::new("name")),
options: OrderByOptions {
asc: None,
nulls_first: None,
let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test USING BTREE (name,age DESC)";
let indexed_columns: Vec<IndexColumn> = vec![
IndexColumn {
operator_class: None,
column: OrderByExpr {
expr: Expr::Identifier(Ident::new("name")),
with_fill: None,
options: OrderByOptions {
asc: None,
nulls_first: None,
},
},
with_fill: None,
},
OrderByExpr {
expr: Expr::Identifier(Ident::new("age")),
options: OrderByOptions {
asc: Some(false),
nulls_first: None,
IndexColumn {
operator_class: None,
column: OrderByExpr {
expr: Expr::Identifier(Ident::new("age")),
with_fill: None,
options: OrderByOptions {
asc: Some(false),
nulls_first: None,
},
},
with_fill: None,
},
];
match verified_stmt(sql) {
@ -8916,7 +8928,7 @@ fn test_create_index_with_using_function() {
}) => {
assert_eq!("idx_name", name.to_string());
assert_eq!("test", table_name.to_string());
assert_eq!("btree", using.unwrap().to_string());
assert_eq!("BTREE", using.unwrap().to_string());
assert_eq!(indexed_columns, columns);
assert!(unique);
assert!(!concurrently);
@ -8931,13 +8943,16 @@ fn test_create_index_with_using_function() {
#[test]
fn test_create_index_with_with_clause() {
let sql = "CREATE UNIQUE INDEX title_idx ON films(title) WITH (fillfactor = 70, single_param)";
let indexed_columns = vec![OrderByExpr {
expr: Expr::Identifier(Ident::new("title")),
options: OrderByOptions {
asc: None,
nulls_first: None,
let indexed_columns: Vec<IndexColumn> = vec![IndexColumn {
column: OrderByExpr {
expr: Expr::Identifier(Ident::new("title")),
options: OrderByOptions {
asc: None,
nulls_first: None,
},
with_fill: None,
},
with_fill: None,
operator_class: None,
}];
let with_parameters = vec![
Expr::BinaryOp {

View file

@ -2509,6 +2509,236 @@ fn parse_create_anonymous_index() {
}
}
#[test]
/// Test to verify the correctness of parsing the `CREATE INDEX` statement with optional operator classes.
///
/// # Implementative details
///
/// At this time, since the parser library is not intended to take care of the semantics of the SQL statements,
/// there is no way to verify the correctness of the operator classes, nor whether they are valid for the given
/// index type. This test is only intended to verify that the parser can correctly parse the statement. For this
/// reason, the test includes a `totally_not_valid` operator class.
fn parse_create_indices_with_operator_classes() {
let indices = [
IndexType::GIN,
IndexType::GiST,
IndexType::SPGiST,
IndexType::Custom("CustomIndexType".into()),
];
let operator_classes: [Option<Ident>; 4] = [
None,
Some("gin_trgm_ops".into()),
Some("gist_trgm_ops".into()),
Some("totally_not_valid".into()),
];
for expected_index_type in indices {
for expected_operator_class in &operator_classes {
let single_column_sql_statement = format!(
"CREATE INDEX the_index_name ON users USING {expected_index_type} (concat_users_name(first_name, last_name){})",
expected_operator_class.as_ref().map(|oc| format!(" {}", oc))
.unwrap_or_default()
);
let multi_column_sql_statement = format!(
"CREATE INDEX the_index_name ON users USING {expected_index_type} (column_name,concat_users_name(first_name, last_name){})",
expected_operator_class.as_ref().map(|oc| format!(" {}", oc))
.unwrap_or_default()
);
let expected_function_column = IndexColumn {
column: OrderByExpr {
expr: Expr::Function(Function {
name: ObjectName(vec![ObjectNamePart::Identifier(Ident {
value: "concat_users_name".to_owned(),
quote_style: None,
span: Span::empty(),
})]),
uses_odbc_syntax: false,
parameters: FunctionArguments::None,
args: FunctionArguments::List(FunctionArgumentList {
duplicate_treatment: None,
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
Ident {
value: "first_name".to_owned(),
quote_style: None,
span: Span::empty(),
},
))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
Ident {
value: "last_name".to_owned(),
quote_style: None,
span: Span::empty(),
},
))),
],
clauses: vec![],
}),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
}),
options: OrderByOptions {
asc: None,
nulls_first: None,
},
with_fill: None,
},
operator_class: expected_operator_class.clone(),
};
match pg().verified_stmt(&single_column_sql_statement) {
Statement::CreateIndex(CreateIndex {
name: Some(ObjectName(name)),
table_name: ObjectName(table_name),
using: Some(using),
columns,
unique: false,
concurrently: false,
if_not_exists: false,
include,
nulls_distinct: None,
with,
predicate: None,
}) => {
assert_eq_vec(&["the_index_name"], &name);
assert_eq_vec(&["users"], &table_name);
assert_eq!(expected_index_type, using);
assert_eq!(expected_function_column, columns[0],);
assert!(include.is_empty());
assert!(with.is_empty());
}
_ => unreachable!(),
}
match pg().verified_stmt(&multi_column_sql_statement) {
Statement::CreateIndex(CreateIndex {
name: Some(ObjectName(name)),
table_name: ObjectName(table_name),
using: Some(using),
columns,
unique: false,
concurrently: false,
if_not_exists: false,
include,
nulls_distinct: None,
with,
predicate: None,
}) => {
assert_eq_vec(&["the_index_name"], &name);
assert_eq_vec(&["users"], &table_name);
assert_eq!(expected_index_type, using);
assert_eq!(
IndexColumn {
column: OrderByExpr {
expr: Expr::Identifier(Ident {
value: "column_name".to_owned(),
quote_style: None,
span: Span::empty()
}),
options: OrderByOptions {
asc: None,
nulls_first: None,
},
with_fill: None,
},
operator_class: None
},
columns[0],
);
assert_eq!(expected_function_column, columns[1],);
assert!(include.is_empty());
assert!(with.is_empty());
}
_ => unreachable!(),
}
}
}
}
#[test]
fn parse_create_bloom() {
let sql =
"CREATE INDEX bloomidx ON tbloom USING BLOOM (i1,i2,i3) WITH (length = 80, col1 = 2, col2 = 2, col3 = 4)";
match pg().verified_stmt(sql) {
Statement::CreateIndex(CreateIndex {
name: Some(ObjectName(name)),
table_name: ObjectName(table_name),
using: Some(using),
columns,
unique: false,
concurrently: false,
if_not_exists: false,
include,
nulls_distinct: None,
with,
predicate: None,
}) => {
assert_eq_vec(&["bloomidx"], &name);
assert_eq_vec(&["tbloom"], &table_name);
assert_eq!(IndexType::Bloom, using);
assert_eq_vec(&["i1", "i2", "i3"], &columns);
assert!(include.is_empty());
assert_eq!(
vec![
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("length"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(number("80").into())),
},
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("col1"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(number("2").into())),
},
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("col2"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(number("2").into())),
},
Expr::BinaryOp {
left: Box::new(Expr::Identifier(Ident::new("col3"))),
op: BinaryOperator::Eq,
right: Box::new(Expr::Value(number("4").into())),
},
],
with
);
}
_ => unreachable!(),
}
}
#[test]
fn parse_create_brin() {
let sql = "CREATE INDEX brin_sensor_data_recorded_at ON sensor_data USING BRIN (recorded_at)";
match pg().verified_stmt(sql) {
Statement::CreateIndex(CreateIndex {
name: Some(ObjectName(name)),
table_name: ObjectName(table_name),
using: Some(using),
columns,
unique: false,
concurrently: false,
if_not_exists: false,
include,
nulls_distinct: None,
with,
predicate: None,
}) => {
assert_eq_vec(&["brin_sensor_data_recorded_at"], &name);
assert_eq_vec(&["sensor_data"], &table_name);
assert_eq!(IndexType::BRIN, using);
assert_eq_vec(&["recorded_at"], &columns);
assert!(include.is_empty());
assert!(with.is_empty());
}
_ => unreachable!(),
}
}
#[test]
fn parse_create_index_concurrently() {
let sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS my_index ON my_table(col1,col2)";