Merge 'Initial pass at UPDATE support' from Preston Thorpe

This PR is to support `Update` queries. Follows sqlite behavior as much
as possible.
### limbo
```console
limbo> create table t (a,b,c);
limbo> explain update t set a = 1 where b = 2;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     18    0                    0   Start at 18
1     OpenWriteAsync     0     2     0                    0
2     OpenWriteAwait     0     0     0                    0
3     RewindAsync        0     0     0                    0
4     RewindAwait        0     17    0                    0   Rewind table t
5       Column           0     1     4                    0   r[4]=t.b
6       Ne               4     5     15                   0   if r[4]!=r[5] goto 15
7       RowId            0     6     0                    0   r[6]=t.rowid
8       IsNull           6     17    0                    0   if (r[6]==NULL) goto 17
9       Integer          1     1     0                    0   r[1]=1
10      Column           0     1     2                    0   r[2]=t.b
11      Column           0     2     3                    0   r[3]=t.c
12      MakeRecord       1     3     7                    0   r[7]=mkrec(r[1..3])
13      InsertAsync      0     7     6                    0
14      InsertAwait      0     0     0                    0
15    NextAsync          0     0     0                    0
16    NextAwait          0     5     0                    0
17    Halt               0     0     0                    0
18    Transaction        0     1     0                    0   write=true
19    Integer            2     5     0                    0   r[5]=2
20    Goto               0     1     0                    0
```
### sqlite
```console
sqlite> explain update t set a = 1 where b = 2;
addr  opcode         p1    p2    p3    p4             p5  comment
----  -------------  ----  ----  ----  -------------  --  -------------
0     Init           0     15    0                    0   Start at 15
1     Null           0     1     2                    0   r[1..2]=NULL
2     Noop           1     0     1                    0
3     OpenWrite      0     2     0     2              0   root=2 iDb=0; t
4     Rewind         0     14    0                    0
5       Column         0     1     5                    0   r[5]= cursor 0 column 1
6       Ne             6     13    5     BINARY-8       81  if r[5]!=r[6] goto 13
7       Rowid          0     2     0                    0   r[2]= rowid of 0
8       IsNull         2     14    0                    0   if r[2]==NULL goto 14
9       Integer        1     3     0                    0   r[3]=1
10      Column         0     1     4                    0   r[4]= cursor 0 column 1
11      MakeRecord     3     2     1                    0   r[1]=mkrec(r[3..4])
12      Insert         0     1     2     t              7   intkey=r[2] data=r[1]
13    Next           0     5     0                    1
14    Halt           0     0     0                    0
15    Transaction    0     1     1     0              1   usesStmtJournal=0
16    Integer        2     6     0                    0   r[6]=2
17    Goto           0     1     0                    0
```

Closes #1130
This commit is contained in:
Pekka Enberg 2025-03-24 09:19:22 +02:00
commit 31bbc5144a
8 changed files with 516 additions and 15 deletions

View file

@ -91,7 +91,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte
| SELECT ... OUTER JOIN | Partial | no RIGHT JOIN |
| SELECT ... JOIN USING | Yes | |
| SELECT ... NATURAL JOIN | Yes | |
| UPDATE | No | |
| UPDATE | Yes | |
| UPSERT | No | |
| VACUUM | No | |
| WITH clause | Partial | No RECURSIVE, no MATERIALIZED, only SELECT supported in CTEs |

View file

@ -820,8 +820,27 @@ impl BTreeCursor {
(self.find_cell(page, int_key), page.page_type())
};
// TODO: if overwrite drop cell
// if the cell index is less than the total cells, check: if its an existing
// rowid, we are going to update / overwrite the cell
if cell_idx < page.get_contents().cell_count() {
if let BTreeCell::TableLeafCell(tbl_leaf) = page.get_contents().cell_get(
cell_idx,
self.pager.clone(),
payload_overflow_threshold_max(page_type, self.usable_space() as u16),
payload_overflow_threshold_min(page_type, self.usable_space() as u16),
self.usable_space(),
)? {
if tbl_leaf._rowid == int_key {
tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting");
self.overwrite_cell(page.clone(), cell_idx, record)?;
self.state
.mut_write_info()
.expect("expected write info")
.state = WriteState::Finish;
continue;
}
}
}
// insert cell
let mut cell_payload: Vec<u8> = Vec::new();
fill_cell_payload(
@ -2200,6 +2219,108 @@ impl BTreeCursor {
pub fn table_id(&self) -> usize {
self.root_page
}
pub fn overwrite_cell(
&mut self,
page_ref: PageRef,
cell_idx: usize,
record: &Record,
) -> Result<CursorResult<()>> {
// build the new payload
let page_type = page_ref.get().contents.as_ref().unwrap().page_type();
let mut new_payload = Vec::new();
fill_cell_payload(
page_type,
self.rowid.get(),
&mut new_payload,
record,
self.usable_space() as u16,
self.pager.clone(),
);
// figure out old cell offset & size
let (old_offset, old_local_size) = {
let page = page_ref.get().contents.as_ref().unwrap();
page.cell_get_raw_region(
cell_idx,
payload_overflow_threshold_max(page_type, self.usable_space() as u16),
payload_overflow_threshold_min(page_type, self.usable_space() as u16),
self.usable_space(),
)
};
// if it all fits in local space and old_local_size is enough, do an in-place overwrite
if new_payload.len() <= old_local_size {
self.overwrite_content(
page_ref.clone(),
old_offset,
&new_payload,
0,
new_payload.len(),
)?;
// if there's leftover local space (old_local_size > new_payload.len()), zero it or free it
let remaining = old_local_size - new_payload.len();
if remaining > 0 {
let buf = page_ref.get().contents.as_mut().unwrap().as_ptr();
for i in 0..remaining {
buf[old_offset + new_payload.len() + i] = 0;
}
}
Ok(CursorResult::Ok(()))
} else {
// doesn't fit, drop it and insert a new one
drop_cell(
page_ref.get_contents(),
cell_idx,
self.usable_space() as u16,
)?;
insert_into_cell(
page_ref.get_contents(),
&new_payload,
cell_idx,
self.usable_space() as u16,
)?;
Ok(CursorResult::Ok(()))
}
}
pub fn overwrite_content(
&mut self,
page_ref: PageRef,
dest_offset: usize,
new_payload: &[u8],
src_offset: usize,
amount: usize,
) -> Result<CursorResult<()>> {
return_if_locked!(page_ref);
page_ref.set_dirty();
self.pager.add_dirty(page_ref.get().id);
let buf = page_ref.get().contents.as_mut().unwrap().as_ptr();
// if new_payload doesn't have enough data, we fill with zeros
let n_data = new_payload.len().saturating_sub(src_offset);
if n_data == 0 {
// everything is zeros
for i in 0..amount {
if buf[dest_offset + i] != 0 {
buf[dest_offset + i] = 0;
}
}
} else {
let copy_len = n_data.min(amount);
// copy the overlapping portion
buf[dest_offset..dest_offset + copy_len]
.copy_from_slice(&new_payload[src_offset..src_offset + copy_len]);
// if copy_len < amount => fill remainder with 0
if copy_len < amount {
for i in copy_len..amount {
buf[dest_offset + i] = 0;
}
}
}
Ok(CursorResult::Ok(()))
}
}
impl PageStack {

View file

@ -1871,12 +1871,10 @@ pub fn translate_expr(
}
}
ast::Expr::FunctionCallStar { .. } => todo!(),
ast::Expr::Id(id) => {
crate::bail_parse_error!(
"no such column: {} - should this be a string literal in single-quotes?",
id.0
)
}
ast::Expr::Id(id) => crate::bail_parse_error!(
"no such column: {} - should this be a string literal in single-quotes?",
id.0
),
ast::Expr::Column {
database: _,
table,
@ -2205,7 +2203,7 @@ fn translate_like_base(
dest: target_register,
func: FuncCtx {
func: Func::Scalar(func),
arg_count: arg_count,
arg_count,
},
});
}

View file

@ -23,6 +23,7 @@ pub(crate) mod result_row;
pub(crate) mod select;
pub(crate) mod subquery;
pub(crate) mod transaction;
pub(crate) mod update;
use crate::fast_lock::SpinLock;
use crate::schema::Schema;
@ -41,6 +42,7 @@ use std::fmt::Display;
use std::rc::{Rc, Weak};
use std::sync::Arc;
use transaction::{translate_tx_begin, translate_tx_commit};
use update::translate_update;
/// Translate SQL statement into bytecode program.
pub fn translate(
@ -109,7 +111,7 @@ pub fn translate(
ast::Stmt::Rollback { .. } => bail_parse_error!("ROLLBACK not supported yet"),
ast::Stmt::Savepoint(_) => bail_parse_error!("SAVEPOINT not supported yet"),
ast::Stmt::Select(select) => translate_select(query_mode, schema, *select, syms)?,
ast::Stmt::Update { .. } => bail_parse_error!("UPDATE not supported yet"),
ast::Stmt::Update(mut update) => translate_update(query_mode, schema, &mut update, syms)?,
ast::Stmt::Vacuum(_, _) => bail_parse_error!("VACUUM not supported yet"),
ast::Stmt::Insert(insert) => {
let Insert {

216
core/translate/update.rs Normal file
View file

@ -0,0 +1,216 @@
use crate::translate::plan::Operation;
use crate::{
bail_parse_error,
schema::{Schema, Table},
util::normalize_ident,
vdbe::{
builder::{CursorType, ProgramBuilder, ProgramBuilderOpts, QueryMode},
insn::Insn,
},
SymbolTable,
};
use limbo_sqlite3_parser::ast::Update;
use super::planner::bind_column_references;
use super::{
emitter::Resolver,
expr::{translate_condition_expr, translate_expr, ConditionMetadata},
plan::TableReference,
};
/*
* Update is simple. By default we scan the table, and for each row, we check the WHERE
* clause. If it evaluates to true, we build the new record with the updated value and insert.
*
* EXAMPLE:
*
sqlite> explain update t set a = 100 where b = 5;
addr opcode p1 p2 p3 p4 p5 comment
---- ------------- ---- ---- ---- ------------- -- -------------
0 Init 0 16 0 0 Start at 16
1 Null 0 1 2 0 r[1..2]=NULL
2 Noop 1 0 1 0
3 OpenWrite 0 2 0 3 0 root=2 iDb=0; t
4 Rewind 0 15 0 0
5 Column 0 1 6 0 r[6]= cursor 0 column 1
6 Ne 7 14 6 BINARY-8 81 if r[6]!=r[7] goto 14
7 Rowid 0 2 0 0 r[2]= rowid of 0
8 IsNull 2 15 0 0 if r[2]==NULL goto 15
9 Integer 100 3 0 0 r[3]=100
10 Column 0 1 4 0 r[4]= cursor 0 column 1
11 Column 0 2 5 0 r[5]= cursor 0 column 2
12 MakeRecord 3 3 1 0 r[1]=mkrec(r[3..5])
13 Insert 0 1 2 t 7 intkey=r[2] data=r[1]
14 Next 0 5 0 1
15 Halt 0 0 0 0
16 Transaction 0 1 1 0 1 usesStmtJournal=0
17 Integer 5 7 0 0 r[7]=5
18 Goto 0 1 0 0
*/
pub fn translate_update(
query_mode: QueryMode,
schema: &Schema,
body: &mut Update,
syms: &SymbolTable,
) -> crate::Result<ProgramBuilder> {
// TODO: freestyling these numbers
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
query_mode,
num_cursors: 1,
approx_num_insns: 20,
approx_num_labels: 4,
});
if body.with.is_some() {
bail_parse_error!("WITH clause is not supported");
}
if body.or_conflict.is_some() {
bail_parse_error!("ON CONFLICT clause is not supported");
}
let table_name = &body.tbl_name.name;
let table = match schema.get_table(table_name.0.as_str()) {
Some(table) => table,
None => bail_parse_error!("Parse error: no such table: {}", table_name),
};
if let Table::Virtual(_) = table.as_ref() {
bail_parse_error!("vtable update not yet supported");
}
let resolver = Resolver::new(syms);
let init_label = program.allocate_label();
program.emit_insn(Insn::Init {
target_pc: init_label,
});
let start_offset = program.offset();
let Some(btree_table) = table.btree() else {
crate::bail_corrupt_error!("Parse error: no such table: {}", table_name);
};
let cursor_id = program.alloc_cursor_id(
Some(table_name.0.clone()),
CursorType::BTreeTable(btree_table.clone()),
);
let root_page = btree_table.root_page;
program.emit_insn(Insn::OpenWriteAsync {
cursor_id,
root_page,
});
program.emit_insn(Insn::OpenWriteAwait {});
let end_label = program.allocate_label();
program.emit_insn(Insn::RewindAsync { cursor_id });
program.emit_insn(Insn::RewindAwait {
cursor_id,
pc_if_empty: end_label,
});
let first_col_reg = program.alloc_registers(btree_table.columns.len());
let referenced_tables = vec![TableReference {
table: Table::BTree(btree_table.clone()),
identifier: table_name.0.clone(),
op: Operation::Scan { iter_dir: None },
join_info: None,
}];
// store the (col_index, Expr value) of each 'Set'
// if a column declared here isn't found: error
let mut update_idxs = Vec::with_capacity(body.sets.len());
for s in body.sets.iter_mut() {
let ident = normalize_ident(s.col_names[0].0.as_str());
if let Some((i, _)) = btree_table.columns.iter().enumerate().find(|(_, col)| {
col.name
.as_ref()
.unwrap_or(&String::new())
.eq_ignore_ascii_case(&ident)
}) {
bind_column_references(&mut s.expr, &referenced_tables, None)?;
update_idxs.push((i, &s.expr));
} else {
bail_parse_error!("column {} not found", ident);
}
}
let loop_start = program.offset();
let skip_label = program.allocate_label();
if let Some(where_clause) = body.where_clause.as_mut() {
bind_column_references(where_clause, &referenced_tables, None)?;
translate_condition_expr(
&mut program,
&referenced_tables,
where_clause,
ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_true: crate::vdbe::BranchOffset::Placeholder,
jump_target_when_false: skip_label,
},
&resolver,
)?;
}
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id,
dest: rowid_reg,
});
// if no rowid, we're done
program.emit_insn(Insn::IsNull {
reg: rowid_reg,
target_pc: end_label,
});
// we scan a column at a time, loading either the column's values, or the new value
// from the Set expression, into registers so we can emit a MakeRecord and update the row.
for idx in 0..btree_table.columns.len() {
if let Some((idx, expr)) = update_idxs.iter().find(|(i, _)| *i == idx) {
let target_reg = first_col_reg + idx;
translate_expr(
&mut program,
Some(&referenced_tables),
expr,
target_reg,
&resolver,
)?;
} else {
program.emit_insn(Insn::Column {
cursor_id,
column: idx,
dest: first_col_reg + idx,
});
}
}
let record_reg = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: first_col_reg,
count: btree_table.columns.len(),
dest_reg: record_reg,
});
program.emit_insn(Insn::InsertAsync {
cursor: cursor_id,
key_reg: rowid_reg,
record_reg,
flag: 0,
});
program.emit_insn(Insn::InsertAwait { cursor_id });
// label for false `WHERE` clause: proceed to next row
program.resolve_label(skip_label, program.offset());
program.emit_insn(Insn::NextAsync { cursor_id });
program.emit_insn(Insn::NextAwait {
cursor_id,
pc_if_next: loop_start,
});
// cleanup/halt
program.resolve_label(end_label, program.offset());
program.emit_insn(Insn::Halt {
err_code: 0,
description: String::new(),
});
program.resolve_label(init_label, program.offset());
program.emit_insn(Insn::Transaction { write: true });
program.emit_constant_insns();
program.emit_insn(Insn::Goto {
target_pc: start_offset,
});
program.table_references = referenced_tables.clone();
Ok(program)
}

View file

@ -1063,8 +1063,7 @@ impl Program {
let rowid = {
let mut index_cursor = state.get_cursor(index_cursor_id);
let index_cursor = index_cursor.as_btree_mut();
let rowid = index_cursor.rowid()?;
rowid
index_cursor.rowid()?
};
let mut table_cursor = state.get_cursor(table_cursor_id);
let table_cursor = table_cursor.as_btree_mut();
@ -1805,8 +1804,7 @@ impl Program {
unreachable!();
};
if (matches!(func, AggFunc::Count) && matches!(col, OwnedValue::Null))
== false
if !(matches!(func, AggFunc::Count) && matches!(col, OwnedValue::Null))
{
*count += 1;
};

View file

@ -26,3 +26,4 @@ source $testdir/total-changes.test
source $testdir/offset.test
source $testdir/scalar-functions-printf.test
source $testdir/transactions.test
source $testdir/update.test

165
testing/update.test Executable file
View file

@ -0,0 +1,165 @@
#!/usr/bin/env tclsh
set testdir [file dirname $argv0]
source $testdir/tester.tcl
do_execsql_test_on_specific_db {:memory:} basic-update {
create table temp (t1 integer);
insert into temp values (1);
update temp set t1 = 2;
select * from temp;
} {2}
do_execsql_test_on_specific_db {:memory:} update-mul {
create table temp (t1 integer);
insert into temp values (1),(2.0),('3'),('4.0');
update temp set t1 = 2;
select * from temp;
} {2
2
2
2}
do_execsql_test_on_specific_db {:memory:} update-where {
create table temp (a,b,c);
insert into temp values (1,2,33);
insert into temp values (1,22,33);
insert into temp values (1,22,33);
update temp set a = 6 where b = 2;
select * from temp;
} {6|2|33
1|22|33
1|22|33}
do_execsql_test_on_specific_db {:memory:} update-where-2 {
create table temp (a,b,c);
insert into temp values (1,22,33);
insert into temp values (1,22,33);
insert into temp values (1,22,33);
insert into temp values (6,22,33);
insert into temp values (1,22,33);
insert into temp values (1,22,33);
update temp set b = 100000 where a = 6;
select b from temp where a = 6;
} {100000}
do_execsql_test_on_specific_db {:memory:} update-all-many {
create table temp (a,b,c);
insert into temp values (1,22,33),(1,22,33),(1,22,33),(1,22,33),(1,22,33),(1,22,33),(1,22,33),(1,22,33);
update temp set a = 1234234234234234;
select COUNT(*) from temp where a = 1234234234234234;
} {8}
do_execsql_test_on_specific_db {:memory:} update-large-small {
create table temp (a,b,c);
insert into temp values (randomblob(1024), 1, 2);
update temp set a = 'a';
select * from temp;
} {a|1|2}
do_execsql_test_on_specific_db {:memory:} update-null {
create table temp (a,b,c);
insert into temp values (NULL, 1, 2);
insert into temp values (3, 4, 5);
update temp set a = 10 where a IS NULL;
select * from temp;
} {10|1|2
3|4|5}
do_execsql_test_on_specific_db {:memory:} update-mixed-types {
create table temp (a);
insert into temp values (1);
insert into temp values ('2');
insert into temp values (3.0);
insert into temp values (X'01');
update temp set a = 'updated';
select * from temp;
} {updated
updated
updated
updated}
do_execsql_test_on_specific_db {:memory:} update-with-expression {
create table temp (a, b);
insert into temp values (1, 10);
insert into temp values (2, 20);
insert into temp values (3, 30);
update temp set a = b * 3 / 2;
select * from temp;
} {15|10
30|20
45|30}
do_execsql_test_on_specific_db {:memory:} update-self-reference {
create table temp (a, b);
insert into temp values (1, 10);
insert into temp values (2, 20);
update temp set a = b where a = 1;
select * from temp;
} {10|10
2|20}
do_execsql_test_on_specific_db {:memory:} update-self-ref-all {
create table temp (a, b, c);
insert into temp values (1, 2, 3);
insert into temp values (4, 5, 6);
update temp set a = b;
select * from temp;
} {2|2|3
5|5|6}
do_execsql_test_on_specific_db {:memory:} update-large-text {
create table temp (a);
insert into temp values ('short');
update temp set a = 'this is a very large text field that exceeds the default cell size and should trigger overflow handling in the btree implementation';
select * from temp;
} {{this is a very large text field that exceeds the default cell size and should trigger overflow handling in the btree implementation}}
do_execsql_test_on_specific_db {:memory:} update-with-null-condition {
create table temp (a, b);
insert into temp values (NULL, 1);
insert into temp values (2, NULL);
insert into temp values (3, 4);
update temp set a = 99 where b IS NULL;
select * from temp;
} {|1
99|
3|4}
do_execsql_test_on_specific_db {:memory:} update-to-null {
create table temp (a, b);
insert into temp values (1, 10);
insert into temp values (2, 20);
update temp set a = NULL where b = 10;
select * from temp;
} {|10
2|20}
do_execsql_test_on_specific_db {:memory:} update-with-randomblob {
create table temp (a, b);
insert into temp values (randomblob(100), 1);
update temp set a = randomblob(200);
select length(a) from temp;
} {200}
do_execsql_test_on_specific_db {:memory:} update-multiple-columns {
create table temp (a, b, c);
insert into temp values (1, 2, 3);
insert into temp values (4, 5, 6);
update temp set a = 10, b = 20, c = 30;
select * from temp;
} {10|20|30
10|20|30}
do_execsql_test_on_specific_db {:memory:} update-true-expr {
create table temp (a, b, c);
insert into temp values (1, 2, 3);
insert into temp values (4, 5, 6);
update temp set a = 10, b = 20, c = 30 where 1;
select * from temp;
} {10|20|30
10|20|30}