Merge 'Fix bug: we cant remove order by terms from the head of the list' from Jussi Saurio

we had an incorrect optimization in `eliminate_orderby_like_groupby()`
where it could remove e.g. the first term of the ORDER BY if it matched
the first GROUP BY term and the result set was naturally ordered by that
term. this is invalid. see e.g.:
```sql
main branch - BAD: removes the `ORDER BY id` term because the results are naturally ordered by id.
However, this results in sorting the entire thing by last name only!

limbo> select id, last_name, count(1) from users GROUP BY 1,2 order by id, last_name desc limit 3;
┌──────┬───────────┬───────────┐
│ id   │ last_name │ count (1) │
├──────┼───────────┼───────────┤
│ 6235 │ Zuniga    │         1 │
├──────┼───────────┼───────────┤
│ 8043 │ Zuniga    │         1 │
├──────┼───────────┼───────────┤
│  944 │ Zimmerman │         1 │
└──────┴───────────┴───────────┘

after fix - GOOD:

limbo> select id, last_name, count(1) from users GROUP BY 1,2 order by id, last_name desc limit 3;
┌────┬───────────┬───────────┐
│ id │ last_name │ count (1) │
├────┼───────────┼───────────┤
│  1 │ Foster    │         1 │
├────┼───────────┼───────────┤
│  2 │ Salazar   │         1 │
├────┼───────────┼───────────┤
│  3 │ Perry     │         1 │
└────┴───────────┴───────────┘

I also refactored sorters to always use the ast `SortOrder` instead of boolean vectors, and use the `compare_immutable()` utility we use inside btrees too.

Closes #1365
This commit is contained in:
Jussi Saurio 2025-05-03 12:48:08 +03:00
commit 306e097950
13 changed files with 98 additions and 176 deletions

View file

@ -1,11 +1,10 @@
use std::rc::Rc;
use limbo_sqlite3_parser::ast;
use limbo_sqlite3_parser::ast::{self, SortOrder};
use crate::{
function::AggFunc,
schema::{Column, PseudoTable},
types::{OwnedValue, Record},
util::exprs_are_equivalent,
vdbe::{
builder::{CursorType, ProgramBuilder},
@ -74,15 +73,10 @@ pub fn init_group_by(
let label_subrtn_acc_clear = program.allocate_label();
let mut order = Vec::new();
const ASCENDING: i64 = 0;
for _ in group_by.exprs.iter() {
order.push(OwnedValue::Integer(ASCENDING));
}
program.emit_insn(Insn::SorterOpen {
cursor_id: sort_cursor,
columns: non_aggregate_count + plan.aggregates.len(),
order: Record::new(order),
order: (0..group_by.exprs.len()).map(|_| SortOrder::Asc).collect(),
});
program.add_comment(program.offset(), "clear group by abort flag");

View file

@ -3,13 +3,11 @@ use std::sync::Arc;
use crate::{
schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema},
storage::pager::CreateBTreeFlags,
types::Record,
util::normalize_ident,
vdbe::{
builder::{CursorType, ProgramBuilder, QueryMode},
insn::{IdxInsertFlags, Insn, RegisterOrLiteral},
},
OwnedValue,
};
use limbo_sqlite3_parser::ast::{self, Expr, Id, SortOrder, SortedColumn};
@ -114,21 +112,12 @@ pub fn translate_create_index(
);
// determine the order of the columns in the index for the sorter
let order = idx
.columns
.iter()
.map(|c| {
OwnedValue::Integer(match c.order {
SortOrder::Asc => 0,
SortOrder::Desc => 1,
})
})
.collect();
let order = idx.columns.iter().map(|c| c.order.clone()).collect();
// open the sorter and the pseudo table
program.emit_insn(Insn::SorterOpen {
cursor_id: sorter_cursor_id,
columns: columns.len(),
order: Record::new(order),
order,
});
let content_reg = program.alloc_register();
program.emit_insn(Insn::OpenPseudo {

View file

@ -13,8 +13,8 @@ use crate::{
use super::{
emitter::Resolver,
plan::{
DeletePlan, Direction, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search,
SeekDef, SeekKey, SelectPlan, TableReference, UpdatePlan, WhereTerm,
DeletePlan, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey,
SelectPlan, TableReference, UpdatePlan, WhereTerm,
},
planner::determine_where_to_eval_expr,
};
@ -112,59 +112,36 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
}
let order_by_clauses = plan.order_by.as_mut().unwrap();
// TODO: let's make the group by sorter aware of the order by directions so we dont need to skip
// descending terms.
if order_by_clauses
.iter()
.any(|(_, dir)| matches!(dir, SortOrder::Desc))
{
return Ok(());
}
let group_by_clauses = plan.group_by.as_mut().unwrap();
let mut group_by_insert_position = 0;
let mut order_index = 0;
// This function optimizes query execution by eliminating duplicate expressions between ORDER BY and GROUP BY clauses
// When the same column appears in both clauses, we can avoid redundant sorting operations
// The function reorders GROUP BY expressions and removes redundant ORDER BY expressions to ensure consistent ordering
while order_index < order_by_clauses.len() {
let (order_expr, direction) = &order_by_clauses[order_index];
// Skip descending orders as they require separate sorting
if matches!(direction, Direction::Descending) {
order_index += 1;
continue;
}
// Check if the current ORDER BY expression matches any expression in the GROUP BY clause
if let Some(group_expr_position) = group_by_clauses
// all order by terms must be in the group by clause for order by to be eliminated
if !order_by_clauses.iter().all(|(o_expr, _)| {
group_by_clauses
.exprs
.iter()
.position(|expr| exprs_are_equivalent(expr, order_expr))
{
// If we found a matching expression in GROUP BY, we need to ensure it's in the correct position
// to preserve the ordering specified by ORDER BY clauses
// Move the matching GROUP BY expression to the current insertion position
// This effectively "bubbles up" the expression to maintain proper ordering
if group_expr_position != group_by_insert_position {
let mut current_position = group_expr_position;
// Swap expressions to move the matching one to the correct position
while current_position > group_by_insert_position {
group_by_clauses
.exprs
.swap(current_position, current_position - 1);
current_position -= 1;
}
}
group_by_insert_position += 1;
// Remove this expression from ORDER BY since it's now handled by GROUP BY
order_by_clauses.remove(order_index);
// Note: We don't increment order_index here because removal shifts all elements
} else {
// If not found in GROUP BY, move to next ORDER BY expression
order_index += 1;
}
}
if order_by_clauses.is_empty() {
plan.order_by = None
.any(|g_expr| exprs_are_equivalent(g_expr, o_expr))
}) {
return Ok(());
}
// reorder group by terms so that they match the order by terms
// this way the group by sorter will effectively do the order by sorter's job and
// we can remove the order by clause
group_by_clauses.exprs.sort_by_key(|g_expr| {
order_by_clauses
.iter()
.position(|(o_expr, _)| exprs_are_equivalent(o_expr, g_expr))
.unwrap_or(usize::MAX)
});
plan.order_by = None;
Ok(())
}
@ -173,7 +150,7 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
fn eliminate_unnecessary_orderby(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
order_by: &mut Option<Vec<(ast::Expr, SortOrder)>>,
group_by: &Option<GroupBy>,
) -> Result<bool> {
let Some(order) = order_by else {
@ -206,8 +183,8 @@ fn eliminate_unnecessary_orderby(
// Special case: if ordering by just the rowid, we can remove the ORDER BY clause
if order.len() == 1 && order[0].0.is_rowid_alias_of(0) {
*iter_dir = match order[0].1 {
Direction::Ascending => IterationDirection::Forwards,
Direction::Descending => IterationDirection::Backwards,
SortOrder::Asc => IterationDirection::Forwards,
SortOrder::Desc => IterationDirection::Backwards,
};
*order_by = None;
return Ok(true);
@ -248,10 +225,10 @@ fn eliminate_unnecessary_orderby(
// If they don't, we must iterate the index in backwards order.
let index_direction = &matching_index.columns.first().as_ref().unwrap().order;
*iter_dir = match (index_direction, order[0].1) {
(SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => {
(SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => {
IterationDirection::Forwards
}
(SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => {
(SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => {
IterationDirection::Backwards
}
};
@ -266,12 +243,10 @@ fn eliminate_unnecessary_orderby(
let mut all_match_reverse = true;
for (i, (_, direction)) in order.iter().enumerate() {
match (&matching_index.columns[i].order, direction) {
(SortOrder::Asc, Direction::Ascending)
| (SortOrder::Desc, Direction::Descending) => {
(SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => {
all_match_reverse = false;
}
(SortOrder::Asc, Direction::Descending)
| (SortOrder::Desc, Direction::Ascending) => {
(SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => {
all_match_forward = false;
}
}
@ -299,7 +274,7 @@ fn use_indexes(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
where_clause: &mut Vec<WhereTerm>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
order_by: &mut Option<Vec<(ast::Expr, SortOrder)>>,
group_by: &Option<GroupBy>,
) -> Result<()> {
// Try to use indexes for eliminating ORDER BY clauses

View file

@ -1,10 +1,9 @@
use std::rc::Rc;
use limbo_sqlite3_parser::ast;
use limbo_sqlite3_parser::ast::{self, SortOrder};
use crate::{
schema::{Column, PseudoTable},
types::{OwnedValue, Record},
util::exprs_are_equivalent,
vdbe::{
builder::{CursorType, ProgramBuilder},
@ -16,7 +15,7 @@ use crate::{
use super::{
emitter::TranslateCtx,
expr::translate_expr,
plan::{Direction, ResultSetColumn, SelectPlan},
plan::{ResultSetColumn, SelectPlan},
result_row::{emit_offset, emit_result_row_and_limit},
};
@ -33,21 +32,17 @@ pub struct SortMetadata {
pub fn init_order_by(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
order_by: &[(ast::Expr, Direction)],
order_by: &[(ast::Expr, SortOrder)],
) -> Result<()> {
let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter);
t_ctx.meta_sort = Some(SortMetadata {
sort_cursor,
reg_sorter_data: program.alloc_register(),
});
let mut order = Vec::new();
for (_, direction) in order_by.iter() {
order.push(OwnedValue::Integer(*direction as i64));
}
program.emit_insn(Insn::SorterOpen {
cursor_id: sort_cursor,
columns: order_by.len(),
order: Record::new(order),
order: order_by.iter().map(|(_, direction)| *direction).collect(),
});
Ok(())
}
@ -257,7 +252,7 @@ pub fn sorter_insert(
///
/// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize)
pub fn order_by_deduplicate_result_columns(
order_by: &[(ast::Expr, Direction)],
order_by: &[(ast::Expr, SortOrder)],
result_columns: &[ResultSetColumn],
) -> Option<Vec<(usize, usize)>> {
let mut result_column_remapping: Option<Vec<(usize, usize)>> = None;

View file

@ -267,7 +267,7 @@ pub struct SelectPlan {
/// group by clause
pub group_by: Option<GroupBy>,
/// order by clause
pub order_by: Option<Vec<(ast::Expr, Direction)>>,
pub order_by: Option<Vec<(ast::Expr, SortOrder)>>,
/// all the aggregates collected from the result columns, order by, and (TODO) having clauses
pub aggregates: Vec<Aggregate>,
/// limit clause
@ -290,7 +290,7 @@ pub struct DeletePlan {
/// where clause split into a vec at 'AND' boundaries.
pub where_clause: Vec<WhereTerm>,
/// order by clause
pub order_by: Option<Vec<(ast::Expr, Direction)>>,
pub order_by: Option<Vec<(ast::Expr, SortOrder)>>,
/// limit clause
pub limit: Option<isize>,
/// offset clause
@ -308,7 +308,7 @@ pub struct UpdatePlan {
// (colum index, new value) pairs
pub set_clauses: Vec<(usize, ast::Expr)>,
pub where_clause: Vec<WhereTerm>,
pub order_by: Option<Vec<(ast::Expr, Direction)>>,
pub order_by: Option<Vec<(ast::Expr, SortOrder)>>,
pub limit: Option<isize>,
pub offset: Option<isize>,
// TODO: optional RETURNING clause
@ -681,21 +681,6 @@ pub enum Search {
},
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Direction {
Ascending,
Descending,
}
impl Display for Direction {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Direction::Ascending => write!(f, "ASC"),
Direction::Descending => write!(f, "DESC"),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Aggregate {
pub func: AggFunc,
@ -873,7 +858,16 @@ impl fmt::Display for UpdatePlan {
if let Some(order_by) = &self.order_by {
writeln!(f, "ORDER BY:")?;
for (expr, dir) in order_by {
writeln!(f, " - {} {}", expr, dir)?;
writeln!(
f,
" - {} {}",
expr,
if *dir == SortOrder::Asc {
"ASC"
} else {
"DESC"
}
)?;
}
}
if let Some(limit) = self.limit {

View file

@ -3,7 +3,7 @@ use super::plan::{select_star, Operation, Search, SelectQueryType};
use super::planner::Scope;
use crate::function::{AggFunc, ExtFunc, Func};
use crate::translate::optimizer::optimize_plan;
use crate::translate::plan::{Aggregate, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan};
use crate::translate::plan::{Aggregate, GroupBy, Plan, ResultSetColumn, SelectPlan};
use crate::translate::planner::{
bind_column_references, break_predicate_at_and_boundaries, parse_from, parse_limit,
parse_where, resolve_aggregates,
@ -368,13 +368,7 @@ pub fn prepare_select_plan<'a>(
)?;
resolve_aggregates(&o.expr, &mut plan.aggregates);
key.push((
o.expr,
o.order.map_or(Direction::Ascending, |o| match o {
ast::SortOrder::Asc => Direction::Ascending,
ast::SortOrder::Desc => Direction::Descending,
}),
));
key.push((o.expr, o.order.unwrap_or(ast::SortOrder::Asc)));
}
plan.order_by = Some(key);
}

View file

@ -11,8 +11,7 @@ use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update};
use super::emitter::emit_program;
use super::optimizer::optimize_plan;
use super::plan::{
ColumnUsedMask, Direction, IterationDirection, Plan, ResultSetColumn, TableReference,
UpdatePlan,
ColumnUsedMask, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan,
};
use super::planner::bind_column_references;
use super::planner::{parse_limit, parse_where};
@ -155,17 +154,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result<
let order_by = body.order_by.as_ref().map(|order| {
order
.iter()
.map(|o| {
(
o.expr.clone(),
o.order
.map(|s| match s {
SortOrder::Asc => Direction::Ascending,
SortOrder::Desc => Direction::Descending,
})
.unwrap_or(Direction::Ascending),
)
})
.map(|o| (o.expr.clone(), o.order.unwrap_or(SortOrder::Asc)))
.collect()
});
// Parse the WHERE clause

View file

@ -1079,6 +1079,14 @@ impl IndexKeySortOrder {
IndexKeySortOrder(spec)
}
pub fn from_list(order: &[SortOrder]) -> Self {
let mut spec = 0;
for (i, order) in order.iter().enumerate() {
spec |= ((*order == SortOrder::Desc) as u64) << i;
}
IndexKeySortOrder(spec)
}
pub fn default() -> Self {
Self(0)
}

View file

@ -2698,14 +2698,6 @@ pub fn op_sorter_open(
else {
unreachable!("unexpected Insn {:?}", insn)
};
let order = order
.get_values()
.iter()
.map(|v| match v {
OwnedValue::Integer(i) => *i == 0,
_ => unreachable!(),
})
.collect();
let cursor = Sorter::new(order);
let mut cursors = state.cursors.borrow_mut();
cursors

View file

@ -1,3 +1,5 @@
use limbo_sqlite3_parser::ast::SortOrder;
use crate::vdbe::{builder::CursorType, insn::RegisterOrLiteral};
use super::{Insn, InsnReference, OwnedValue, Program};
@ -876,17 +878,10 @@ pub fn insn_to_str(
} => {
let _p4 = String::new();
let to_print: Vec<String> = order
.get_values()
.iter()
.map(|v| match v {
OwnedValue::Integer(i) => {
if *i == 0 {
"B".to_string()
} else {
"-B".to_string()
}
}
_ => unreachable!(),
SortOrder::Asc => "B".to_string(),
SortOrder::Desc => "-B".to_string(),
})
.collect();
(

View file

@ -7,9 +7,9 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag
use crate::{
schema::BTreeTable,
storage::{pager::CreateBTreeFlags, wal::CheckpointMode},
types::Record,
};
use limbo_macros::Description;
use limbo_sqlite3_parser::ast::SortOrder;
/// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values.
#[derive(Clone, Copy, Debug, Default)]
@ -586,9 +586,9 @@ pub enum Insn {
/// Open a sorter.
SorterOpen {
cursor_id: CursorID, // P1
columns: usize, // P2
order: Record, // P4. 0 if ASC and 1 if DESC
cursor_id: CursorID, // P1
columns: usize, // P2
order: Vec<SortOrder>, // P4.
},
/// Insert a row into the sorter.

View file

@ -1,18 +1,21 @@
use crate::types::ImmutableRecord;
use std::cmp::Ordering;
use limbo_sqlite3_parser::ast::SortOrder;
use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder};
pub struct Sorter {
records: Vec<ImmutableRecord>,
current: Option<ImmutableRecord>,
order: Vec<bool>,
order: IndexKeySortOrder,
key_len: usize,
}
impl Sorter {
pub fn new(order: Vec<bool>) -> Self {
pub fn new(order: &[SortOrder]) -> Self {
Self {
records: Vec::new(),
current: None,
order,
key_len: order.len(),
order: IndexKeySortOrder::from_list(order),
}
}
pub fn is_empty(&self) -> bool {
@ -26,24 +29,11 @@ impl Sorter {
// We do the sorting here since this is what is called by the SorterSort instruction
pub fn sort(&mut self) {
self.records.sort_by(|a, b| {
let cmp_by_idx = |idx: usize, ascending: bool| {
let a = &a.get_value(idx);
let b = &b.get_value(idx);
if ascending {
a.cmp(b)
} else {
b.cmp(a)
}
};
let mut cmp_ret = Ordering::Equal;
for (idx, &is_asc) in self.order.iter().enumerate() {
cmp_ret = cmp_by_idx(idx, is_asc);
if cmp_ret != Ordering::Equal {
break;
}
}
cmp_ret
compare_immutable(
&a.values[..self.key_len],
&b.values[..self.key_len],
self.order,
)
});
self.records.reverse();
self.next()

View file

@ -185,3 +185,10 @@ William|111}
do_execsql_test group_by_column_number {
select u.first_name, count(1) from users u group by 1 limit 1;
} {Aaron|41}
# There was a regression where we incorrectly removed SOME order by terms and left others in place, which is invalid and results in wrong rows being returned.
do_execsql_test groupby_orderby_removal_regression_test {
select id, last_name, count(1) from users GROUP BY 1,2 order by id, last_name desc limit 3;
} {1|Foster|1
2|Salazar|1
3|Perry|1}