Fix incorrect usage of indexes with non-contiguous columns

Due to the left-prefix rule of indexes, for an index key to be usable,
it needs to:

- Use the columns in contiguous order (0, 1, 2...)
  * eg if WHERE refers to cols 0 and 2, only 0 can be used
- Stop at the first range operator
  * eg if WHERE: col1 = 5 AND col2 > 5 AND col3 = 5, only col1 and col2
    can be used.

This wasn't properly tested, and resulted in simulator failures. Added
some regression tests for this behavior.
This commit is contained in:
Jussi Saurio 2025-06-10 15:21:26 +03:00
parent 9770ffb902
commit 547ca6cf2a
2 changed files with 337 additions and 12 deletions

View file

@ -348,18 +348,13 @@ pub fn constraints_from_where_clause(
// Deduplicate by position, keeping first occurrence (which will be equality if one exists, since the constraints vec is sorted that way)
candidate.refs.dedup_by_key(|cref| cref.index_col_pos);
// Truncate at first gap in positions -- again, index columns must be consumed in contiguous order.
let mut last_pos = 0;
let mut i = 0;
for cref in candidate.refs.iter() {
if cref.index_col_pos != last_pos {
if cref.index_col_pos != last_pos + 1 {
break;
}
last_pos = cref.index_col_pos;
}
i += 1;
}
candidate.refs.truncate(i);
let contiguous_len = candidate
.refs
.iter()
.enumerate()
.take_while(|(i, cref)| cref.index_col_pos == *i)
.count();
candidate.refs.truncate(contiguous_len);
// Truncate after the first inequality, since the left-prefix rule of indexes requires that all constraints but the last one must be equalities;
// again see: https://www.solarwinds.com/blog/the-left-prefix-index-rule

View file

@ -1258,6 +1258,336 @@ mod tests {
}
}
#[test]
/// Test that [compute_best_join_order] figures out that the index can't be used when only the second column is referenced
fn test_index_second_column_only() {
let mut joined_tables = Vec::new();
let mut table_id_counter = TableRefIdCounter::new();
// Create a table with two columns
let table = _create_btree_table("t1", _create_column_list(&["x", "y"], Type::Integer));
// Create a two-column index on (x,y)
let index = Arc::new(Index {
name: "idx_xy".to_string(),
table_name: "t1".to_string(),
columns: vec![
IndexColumn {
name: "x".to_string(),
order: SortOrder::Asc,
pos_in_table: 0,
collation: None,
},
IndexColumn {
name: "y".to_string(),
order: SortOrder::Asc,
pos_in_table: 1,
collation: None,
},
],
unique: false,
root_page: 2,
ephemeral: false,
has_rowid: true,
});
let mut available_indexes = HashMap::new();
available_indexes.insert("t1".to_string(), vec![index]);
joined_tables.push(JoinedTable {
table: Table::BTree(table),
internal_id: table_id_counter.next(),
op: Operation::Scan {
iter_dir: IterationDirection::Forwards,
index: None,
},
identifier: "t1".to_string(),
join_info: None,
col_used_mask: ColumnUsedMask::new(),
});
// Create where clause that only references second column
let where_clause = vec![WhereTerm {
expr: Expr::Binary(
Box::new(Expr::Column {
database: None,
table: joined_tables[0].internal_id,
column: 1,
is_rowid_alias: false,
}),
ast::Operator::Equals,
Box::new(Expr::Literal(ast::Literal::Numeric(5.to_string()))),
),
from_outer_join: None,
consumed: Cell::new(false),
}];
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),
None,
&table_constraints,
&access_methods_arena,
)
.unwrap()
.unwrap();
// Verify access method is a scan, not a seek, because the index can't be used when only the second column is referenced
let access_method = &access_methods_arena.borrow()[best_plan.data[0].1];
assert!(access_method.is_scan());
}
#[test]
/// Test that an index with a gap in referenced columns (e.g. index on (a,b,c), where clause on a and c)
/// only uses the prefix before the gap.
fn test_index_skips_middle_column() {
let mut table_id_counter = TableRefIdCounter::new();
let mut joined_tables = Vec::new();
let mut available_indexes = HashMap::new();
let columns = _create_column_list(&["c1", "c2", "c3"], Type::Integer);
let table = _create_btree_table("t1", columns);
let index = Arc::new(Index {
name: "idx1".to_string(),
table_name: "t1".to_string(),
columns: vec![
IndexColumn {
name: "c1".to_string(),
order: SortOrder::Asc,
pos_in_table: 0,
collation: None,
},
IndexColumn {
name: "c2".to_string(),
order: SortOrder::Asc,
pos_in_table: 1,
collation: None,
},
IndexColumn {
name: "c3".to_string(),
order: SortOrder::Asc,
pos_in_table: 2,
collation: None,
},
],
unique: false,
root_page: 2,
ephemeral: false,
has_rowid: true,
});
available_indexes.insert("t1".to_string(), vec![index]);
joined_tables.push(JoinedTable {
table: Table::BTree(table),
internal_id: table_id_counter.next(),
op: Operation::Scan {
iter_dir: IterationDirection::Forwards,
index: None,
},
identifier: "t1".to_string(),
join_info: None,
col_used_mask: ColumnUsedMask::new(),
});
// Create where clause that references first and third columns
let where_clause = vec![
WhereTerm {
expr: Expr::Binary(
Box::new(Expr::Column {
database: None,
table: joined_tables[0].internal_id,
column: 0, // c1
is_rowid_alias: false,
}),
ast::Operator::Equals,
Box::new(Expr::Literal(ast::Literal::Numeric(5.to_string()))),
),
from_outer_join: None,
consumed: Cell::new(false),
},
WhereTerm {
expr: Expr::Binary(
Box::new(Expr::Column {
database: None,
table: joined_tables[0].internal_id,
column: 2, // c3
is_rowid_alias: false,
}),
ast::Operator::Equals,
Box::new(Expr::Literal(ast::Literal::Numeric(7.to_string()))),
),
from_outer_join: None,
consumed: Cell::new(false),
},
];
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),
None,
&table_constraints,
&access_methods_arena,
)
.unwrap()
.unwrap();
// Verify access method is a seek, and only uses the first column of the index
let access_method = &access_methods_arena.borrow()[best_plan.data[0].1];
assert!(!access_method.is_scan());
assert!(access_method
.index
.as_ref()
.is_some_and(|i| i.name == "idx1"));
assert!(access_method.constraint_refs.len() == 1);
let constraint =
&table_constraints[0].constraints[access_method.constraint_refs[0].constraint_vec_pos];
assert!(constraint.operator == ast::Operator::Equals);
assert!(constraint.table_col_pos == 0); // c1
}
#[test]
/// Test that an index seek stops after a range operator.
/// e.g. index on (a,b,c), where clause a=1, b>2, c=3. Only a and b should be used for seek.
fn test_index_stops_at_range_operator() {
let mut table_id_counter = TableRefIdCounter::new();
let mut joined_tables = Vec::new();
let mut available_indexes = HashMap::new();
let columns = _create_column_list(&["c1", "c2", "c3"], Type::Integer);
let table = _create_btree_table("t1", columns);
let index = Arc::new(Index {
name: "idx1".to_string(),
table_name: "t1".to_string(),
columns: vec![
IndexColumn {
name: "c1".to_string(),
order: SortOrder::Asc,
pos_in_table: 0,
collation: None,
},
IndexColumn {
name: "c2".to_string(),
order: SortOrder::Asc,
pos_in_table: 1,
collation: None,
},
IndexColumn {
name: "c3".to_string(),
order: SortOrder::Asc,
pos_in_table: 2,
collation: None,
},
],
root_page: 2,
ephemeral: false,
has_rowid: true,
unique: false,
});
available_indexes.insert("t1".to_string(), vec![index]);
joined_tables.push(JoinedTable {
table: Table::BTree(table),
internal_id: table_id_counter.next(),
op: Operation::Scan {
iter_dir: IterationDirection::Forwards,
index: None,
},
identifier: "t1".to_string(),
join_info: None,
col_used_mask: ColumnUsedMask::new(),
});
// Create where clause: c1 = 5 AND c2 > 10 AND c3 = 7
let where_clause = vec![
WhereTerm {
expr: Expr::Binary(
Box::new(Expr::Column {
database: None,
table: joined_tables[0].internal_id,
column: 0, // c1
is_rowid_alias: false,
}),
ast::Operator::Equals,
Box::new(Expr::Literal(ast::Literal::Numeric(5.to_string()))),
),
from_outer_join: None,
consumed: Cell::new(false),
},
WhereTerm {
expr: Expr::Binary(
Box::new(Expr::Column {
database: None,
table: joined_tables[0].internal_id,
column: 1, // c2
is_rowid_alias: false,
}),
ast::Operator::Greater,
Box::new(Expr::Literal(ast::Literal::Numeric(10.to_string()))),
),
from_outer_join: None,
consumed: Cell::new(false),
},
WhereTerm {
expr: Expr::Binary(
Box::new(Expr::Column {
database: None,
table: joined_tables[0].internal_id,
column: 2, // c3
is_rowid_alias: false,
}),
ast::Operator::Equals,
Box::new(Expr::Literal(ast::Literal::Numeric(7.to_string()))),
),
from_outer_join: None,
consumed: Cell::new(false),
},
];
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),
None,
&table_constraints,
&access_methods_arena,
)
.unwrap()
.unwrap();
// Verify access method is a seek, and uses the first two columns of the index.
// The third column can't be used because the second is a range query.
let access_method = &access_methods_arena.borrow()[best_plan.data[0].1];
assert!(!access_method.is_scan());
assert!(access_method
.index
.as_ref()
.is_some_and(|i| i.name == "idx1"));
assert!(access_method.constraint_refs.len() == 2);
let constraint =
&table_constraints[0].constraints[access_method.constraint_refs[0].constraint_vec_pos];
assert!(constraint.operator == ast::Operator::Equals);
assert!(constraint.table_col_pos == 0); // c1
let constraint =
&table_constraints[0].constraints[access_method.constraint_refs[1].constraint_vec_pos];
assert!(constraint.operator == ast::Operator::Greater);
assert!(constraint.table_col_pos == 1); // c2
}
fn _create_column(c: &TestColumn) -> Column {
Column {
name: Some(c.name.clone()),