mirror of
https://github.com/tursodatabase/limbo.git
synced 2025-08-04 18:18:03 +00:00
Add stable internal_id property to TableReference
Currently our "table id"/"table no"/"table idx" references always use the direct index of the `TableReference` in the plan, e.g. in `SelectPlan::table_references`. For example: ```rust Expr::Column { table: 0, column: 3, .. } ``` refers to the 0'th table in the `table_references` list. This is a fragile approach because it assumes the table_references list is stable for the lifetime of the query processing. This has so far been the case, but there exist certain query transformations, e.g. subquery unnesting, that may fold new table references from a subquery (which has its own table ref list) into the table reference list of the parent. If such a transformation is made, then potentially all of the Expr::Column references to tables will become invalid. Consider this example: ```sql -- Assume tables: users(id, age), orders(user_id, amount) -- Get total amount spent per user on orders over $100 SELECT u.id, sub.total FROM users u JOIN (SELECT user_id, SUM(amount) as total FROM orders o WHERE o.amount > 100 GROUP BY o.user_id) sub WHERE u.id = sub.user_id -- Before subquery unnesting: -- Main query table_references: [users, sub] -- u.id refers to table 0, column 0 -- sub.total refers to table 1, column 1 -- -- Subquery table_references: [orders] -- o.user_id refers to table 0, column 0 -- o.amount refers to table 0, column 1 -- -- After unnesting and folding subquery tables into main query, -- the query might look like this: SELECT u.id, SUM(o.amount) as total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.amount > 100 GROUP BY u.id; -- Main query table_references: [users, orders] -- u.id refers to table index 0 (correct) -- o.amount refers to table index 0 (incorrect, should be 1) -- o.user_id refers to table index 0 (incorrect, should be 1) ``` We could ofc traverse every expression in the subquery and rewrite the table indexes to be correct, but if we instead use stable identifiers for each table reference, then all the column references will continue to be correct. Hence, this PR introduces a `TableInternalId` used in `TableReference` as well as `Expr::Column` and `Expr::Rowid` so that this kind of query transformations can happen with less pain.
This commit is contained in:
parent
b7d2173e99
commit
7c07c09300
19 changed files with 552 additions and 278 deletions
|
@ -5,7 +5,7 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
use limbo_sqlite3_parser::ast;
|
||||
use limbo_sqlite3_parser::ast::{self, TableInternalId};
|
||||
|
||||
use crate::{
|
||||
fast_lock::SpinLock,
|
||||
|
@ -19,10 +19,28 @@ use crate::{
|
|||
},
|
||||
Connection, VirtualTable,
|
||||
};
|
||||
pub struct TableRefIdCounter {
|
||||
next_free: TableInternalId,
|
||||
}
|
||||
|
||||
impl TableRefIdCounter {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
next_free: TableInternalId::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> ast::TableInternalId {
|
||||
let id = self.next_free;
|
||||
self.next_free += 1;
|
||||
id
|
||||
}
|
||||
}
|
||||
|
||||
use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program};
|
||||
#[allow(dead_code)]
|
||||
pub struct ProgramBuilder {
|
||||
pub table_reference_counter: TableRefIdCounter,
|
||||
next_free_register: usize,
|
||||
next_free_cursor_id: usize,
|
||||
/// Instruction, the function to execute it with, and its original index in the vector.
|
||||
|
@ -90,6 +108,7 @@ pub struct ProgramBuilderOpts {
|
|||
impl ProgramBuilder {
|
||||
pub fn new(opts: ProgramBuilderOpts) -> Self {
|
||||
Self {
|
||||
table_reference_counter: TableRefIdCounter::new(),
|
||||
next_free_register: 1,
|
||||
next_free_cursor_id: 0,
|
||||
insns: Vec::with_capacity(opts.approx_num_insns),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue