Rename to Distinctness + add distinctness information to SelectPlan

This commit is contained in:
Jussi Saurio 2025-05-18 11:18:22 +03:00
parent c1e31b0213
commit d5386439e8
6 changed files with 56 additions and 53 deletions

View file

@ -12,7 +12,7 @@ use crate::{
use super::{
emitter::{Resolver, TranslateCtx},
expr::translate_expr,
plan::{AggDistinctness, Aggregate, SelectPlan, TableReference},
plan::{Aggregate, Distinctness, SelectPlan, TableReference},
result_row::emit_select_result,
};
@ -64,7 +64,7 @@ pub fn emit_ungrouped_aggregation<'a>(
/// This is used in both GROUP BY and non-GROUP BY aggregations to jump over
/// the AggStep that would otherwise accumulate the same value multiple times.
pub fn handle_distinct(program: &mut ProgramBuilder, agg: &Aggregate, agg_arg_reg: usize) {
let AggDistinctness::Distinct { ctx } = &agg.distinctness else {
let Distinctness::Distinct { ctx } = &agg.distinctness else {
return;
};
let distinct_agg_ctx = ctx

View file

@ -20,7 +20,7 @@ use super::{
emitter::{Resolver, TranslateCtx},
expr::{translate_condition_expr, translate_expr, ConditionMetadata},
order_by::order_by_sorter_insert,
plan::{AggDistinctness, Aggregate, GroupBy, SelectPlan, TableReference},
plan::{Aggregate, Distinctness, GroupBy, SelectPlan, TableReference},
result_row::emit_select_result,
};
@ -576,7 +576,7 @@ pub fn group_by_process_single_group(
agg_result_reg,
&t_ctx.resolver,
)?;
if let AggDistinctness::Distinct { ctx } = &agg.distinctness {
if let Distinctness::Distinct { ctx } = &agg.distinctness {
let ctx = ctx
.as_ref()
.expect("distinct aggregate context not populated");
@ -924,7 +924,7 @@ pub fn group_by_emit_row_phase<'a>(
plan.aggregates
.iter()
.filter_map(|agg| {
if let AggDistinctness::Distinct { ctx } = &agg.distinctness {
if let Distinctness::Distinct { ctx } = &agg.distinctness {
Some(ctx)
} else {
None

View file

@ -6,7 +6,7 @@ use std::sync::Arc;
use crate::{
schema::{Index, IndexColumn, Table},
translate::{
plan::{AggDistinctness, DistinctAggCtx},
plan::{DistinctCtx, Distinctness},
result_row::emit_select_result,
},
types::SeekOp,
@ -116,8 +116,8 @@ pub fn init_loop(
is_table: false,
});
}
agg.distinctness = AggDistinctness::Distinct {
ctx: Some(DistinctAggCtx {
agg.distinctness = Distinctness::Distinct {
ctx: Some(DistinctCtx {
cursor_id,
ephemeral_index_name: index_name,
label_on_conflict: program.allocate_label(),
@ -763,7 +763,7 @@ fn emit_loop_source<'a>(
reg,
&t_ctx.resolver,
)?;
if let AggDistinctness::Distinct { ctx } = &agg.distinctness {
if let Distinctness::Distinct { ctx } = &agg.distinctness {
let ctx = ctx
.as_ref()
.expect("distinct aggregate context not populated");

View file

@ -292,6 +292,42 @@ impl Default for JoinOrderMember {
}
}
#[derive(Debug, Clone, PartialEq)]
/// Whether a column is DISTINCT or not.
pub enum Distinctness {
/// The column is not a DISTINCT column.
NonDistinct,
/// The column is a DISTINCT column,
/// and includes a translation context for handling duplicates.
Distinct { ctx: Option<DistinctCtx> },
}
impl Distinctness {
pub fn from_ast(distinctness: Option<&ast::Distinctness>) -> Self {
match distinctness {
Some(ast::Distinctness::Distinct) => Self::Distinct { ctx: None },
Some(ast::Distinctness::All) => Self::NonDistinct,
None => Self::NonDistinct,
}
}
pub fn is_distinct(&self) -> bool {
matches!(self, Distinctness::Distinct { .. })
}
}
/// Translation context for handling DISTINCT columns.
#[derive(Debug, Clone, PartialEq)]
pub struct DistinctCtx {
/// The cursor ID for the ephemeral index opened for the purpose of deduplicating results.
pub cursor_id: usize,
/// The index name for the ephemeral index, needed to lookup the cursor ID.
pub ephemeral_index_name: String,
/// The label for the on conflict branch.
/// When a duplicate is found, the program will jump to the offset this label points to.
pub label_on_conflict: BranchOffset,
}
#[derive(Debug, Clone)]
pub struct SelectPlan {
/// List of table references in loop order, outermost first.
@ -317,6 +353,8 @@ pub struct SelectPlan {
pub contains_constant_false_condition: bool,
/// query type (top level or subquery)
pub query_type: SelectQueryType,
/// whether the query is DISTINCT
pub distinctness: Distinctness,
}
impl SelectPlan {
@ -800,46 +838,11 @@ pub enum Search {
}
#[derive(Debug, Clone, PartialEq)]
pub enum AggDistinctness {
/// The aggregate is not a DISTINCT aggregate.
NonDistinct,
/// The aggregate is a DISTINCT aggregate.
Distinct { ctx: Option<DistinctAggCtx> },
}
impl AggDistinctness {
pub fn from_ast(distinctness: Option<&ast::Distinctness>) -> Self {
match distinctness {
Some(ast::Distinctness::Distinct) => Self::Distinct { ctx: None },
Some(ast::Distinctness::All) => Self::NonDistinct,
None => Self::NonDistinct,
}
}
pub fn is_distinct(&self) -> bool {
matches!(self, AggDistinctness::Distinct { .. })
}
}
/// Translation context for handling distinct aggregates.
#[derive(Debug, Clone, PartialEq)]
pub struct DistinctAggCtx {
/// The cursor ID for the ephemeral index opened for the distinct aggregate.
/// This is used to track the distinct values and avoid duplicates.
pub cursor_id: usize,
/// The index name for the ephemeral index opened for the distinct aggregate.
pub ephemeral_index_name: String,
/// The label for the on conflict branch.
/// When a duplicate is found, the program will jump to the offset this label points to.
pub label_on_conflict: BranchOffset,
}
#[derive(Clone, Debug, PartialEq)]
pub struct Aggregate {
pub func: AggFunc,
pub args: Vec<ast::Expr>,
pub original_expr: ast::Expr,
pub distinctness: AggDistinctness,
pub distinctness: Distinctness,
}
impl Aggregate {

View file

@ -1,6 +1,6 @@
use super::{
plan::{
AggDistinctness, Aggregate, ColumnUsedMask, EvalAt, IterationDirection, JoinInfo,
Aggregate, ColumnUsedMask, Distinctness, EvalAt, IterationDirection, JoinInfo,
JoinOrderMember, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType,
TableReference, WhereTerm,
},
@ -41,7 +41,7 @@ pub fn resolve_aggregates(expr: &Expr, aggs: &mut Vec<Aggregate>) -> Result<bool
};
match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), args_count) {
Ok(Func::Agg(f)) => {
let distinctness = AggDistinctness::from_ast(distinctness.as_ref());
let distinctness = Distinctness::from_ast(distinctness.as_ref());
let num_args = args.as_ref().map_or(0, |args| args.len());
if distinctness.is_distinct() && num_args != 1 {
crate::bail_parse_error!(
@ -75,7 +75,7 @@ pub fn resolve_aggregates(expr: &Expr, aggs: &mut Vec<Aggregate>) -> Result<bool
func: f,
args: vec![],
original_expr: expr.clone(),
distinctness: AggDistinctness::NonDistinct,
distinctness: Distinctness::NonDistinct,
});
Ok(true)
} else {

View file

@ -1,7 +1,5 @@
use super::emitter::{emit_program, TranslateCtx};
use super::plan::{
select_star, AggDistinctness, JoinOrderMember, Operation, Search, SelectQueryType,
};
use super::plan::{select_star, Distinctness, JoinOrderMember, Operation, Search, SelectQueryType};
use super::planner::Scope;
use crate::function::{AggFunc, ExtFunc, Func};
use crate::schema::Table;
@ -56,6 +54,7 @@ pub fn prepare_select_plan<'a>(
from,
where_clause,
group_by,
distinctness,
..
} = *select_inner;
let col_count = columns.len();
@ -111,6 +110,7 @@ pub fn prepare_select_plan<'a>(
offset: None,
contains_constant_false_condition: false,
query_type: SelectQueryType::TopLevel,
distinctness: Distinctness::from_ast(distinctness.as_ref()),
};
let mut aggregate_expressions = Vec::new();
@ -174,7 +174,7 @@ pub fn prepare_select_plan<'a>(
} else {
0
};
let distinctness = AggDistinctness::from_ast(distinctness.as_ref());
let distinctness = Distinctness::from_ast(distinctness.as_ref());
if distinctness.is_distinct() && args_count != 1 {
crate::bail_parse_error!("DISTINCT aggregate functions must have exactly one argument");
}
@ -287,7 +287,7 @@ pub fn prepare_select_plan<'a>(
"1".to_string(),
))],
original_expr: expr.clone(),
distinctness: AggDistinctness::NonDistinct,
distinctness: Distinctness::NonDistinct,
};
aggregate_expressions.push(agg.clone());
plan.result_columns.push(ResultSetColumn {