mirror of
https://github.com/tursodatabase/limbo.git
synced 2025-08-04 01:58:16 +00:00
482 lines
16 KiB
Rust
482 lines
16 KiB
Rust
use std::{
|
|
cell::Cell,
|
|
collections::HashMap,
|
|
rc::{Rc, Weak},
|
|
sync::Arc,
|
|
};
|
|
|
|
use limbo_sqlite3_parser::ast;
|
|
|
|
use crate::{
|
|
fast_lock::SpinLock,
|
|
parameters::Parameters,
|
|
schema::{BTreeTable, Index, PseudoTable},
|
|
storage::sqlite3_ondisk::DatabaseHeader,
|
|
translate::plan::{ResultSetColumn, TableReference},
|
|
Connection, VirtualTable,
|
|
};
|
|
|
|
use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program};
|
|
#[allow(dead_code)]
|
|
pub struct ProgramBuilder {
|
|
next_free_register: usize,
|
|
next_free_cursor_id: usize,
|
|
insns: Vec<(Insn, InsnFunction)>,
|
|
// for temporarily storing instructions that will be put after Transaction opcode
|
|
constant_insns: Vec<(Insn, InsnFunction)>,
|
|
// Vector of labels which must be assigned to next emitted instruction
|
|
next_insn_labels: Vec<BranchOffset>,
|
|
// Cursors that are referenced by the program. Indexed by CursorID.
|
|
pub cursor_ref: Vec<(Option<String>, CursorType)>,
|
|
/// A vector where index=label number, value=resolved offset. Resolved in build().
|
|
label_to_resolved_offset: Vec<Option<InsnReference>>,
|
|
// Bitmask of cursors that have emitted a SeekRowid instruction.
|
|
seekrowid_emitted_bitmask: u64,
|
|
// map of instruction index to manual comment (used in EXPLAIN only)
|
|
comments: Option<HashMap<InsnReference, &'static str>>,
|
|
pub parameters: Parameters,
|
|
pub result_columns: Vec<ResultSetColumn>,
|
|
pub table_references: Vec<TableReference>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum CursorType {
|
|
BTreeTable(Rc<BTreeTable>),
|
|
BTreeIndex(Arc<Index>),
|
|
Pseudo(Rc<PseudoTable>),
|
|
Sorter,
|
|
VirtualTable(Rc<VirtualTable>),
|
|
}
|
|
|
|
impl CursorType {
|
|
pub fn is_index(&self) -> bool {
|
|
matches!(self, CursorType::BTreeIndex(_))
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
|
pub enum QueryMode {
|
|
Normal,
|
|
Explain,
|
|
}
|
|
|
|
impl From<ast::Cmd> for QueryMode {
|
|
fn from(stmt: ast::Cmd) -> Self {
|
|
match stmt {
|
|
ast::Cmd::ExplainQueryPlan(_) | ast::Cmd::Explain(_) => QueryMode::Explain,
|
|
_ => QueryMode::Normal,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct ProgramBuilderOpts {
|
|
pub query_mode: QueryMode,
|
|
pub num_cursors: usize,
|
|
pub approx_num_insns: usize,
|
|
pub approx_num_labels: usize,
|
|
}
|
|
|
|
impl ProgramBuilder {
|
|
pub fn new(opts: ProgramBuilderOpts) -> Self {
|
|
Self {
|
|
next_free_register: 1,
|
|
next_free_cursor_id: 0,
|
|
insns: Vec::with_capacity(opts.approx_num_insns),
|
|
next_insn_labels: Vec::with_capacity(2),
|
|
cursor_ref: Vec::with_capacity(opts.num_cursors),
|
|
constant_insns: Vec::new(),
|
|
label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels),
|
|
seekrowid_emitted_bitmask: 0,
|
|
comments: if opts.query_mode == QueryMode::Explain {
|
|
Some(HashMap::new())
|
|
} else {
|
|
None
|
|
},
|
|
parameters: Parameters::new(),
|
|
result_columns: Vec::new(),
|
|
table_references: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn alloc_register(&mut self) -> usize {
|
|
let reg = self.next_free_register;
|
|
self.next_free_register += 1;
|
|
reg
|
|
}
|
|
|
|
pub fn alloc_registers(&mut self, amount: usize) -> usize {
|
|
let reg = self.next_free_register;
|
|
self.next_free_register += amount;
|
|
reg
|
|
}
|
|
|
|
pub fn alloc_cursor_id(
|
|
&mut self,
|
|
table_identifier: Option<String>,
|
|
cursor_type: CursorType,
|
|
) -> usize {
|
|
let cursor = self.next_free_cursor_id;
|
|
self.next_free_cursor_id += 1;
|
|
self.cursor_ref.push((table_identifier, cursor_type));
|
|
assert_eq!(self.cursor_ref.len(), self.next_free_cursor_id);
|
|
cursor
|
|
}
|
|
|
|
pub fn emit_insn(&mut self, insn: Insn) {
|
|
for label in self.next_insn_labels.drain(..) {
|
|
self.label_to_resolved_offset[label.to_label_value() as usize] =
|
|
Some(self.insns.len() as InsnReference);
|
|
}
|
|
let function = insn.to_function();
|
|
self.insns.push((insn, function));
|
|
}
|
|
|
|
pub fn close_cursors(&mut self, cursors: &[CursorID]) {
|
|
for cursor in cursors {
|
|
self.emit_insn(Insn::Close { cursor_id: *cursor });
|
|
}
|
|
}
|
|
|
|
pub fn emit_string8(&mut self, value: String, dest: usize) {
|
|
self.emit_insn(Insn::String8 { value, dest });
|
|
}
|
|
|
|
pub fn emit_string8_new_reg(&mut self, value: String) -> usize {
|
|
let dest = self.alloc_register();
|
|
self.emit_insn(Insn::String8 { value, dest });
|
|
dest
|
|
}
|
|
|
|
pub fn emit_int(&mut self, value: i64, dest: usize) {
|
|
self.emit_insn(Insn::Integer { value, dest });
|
|
}
|
|
|
|
pub fn emit_bool(&mut self, value: bool, dest: usize) {
|
|
self.emit_insn(Insn::Integer {
|
|
value: if value { 1 } else { 0 },
|
|
dest,
|
|
});
|
|
}
|
|
|
|
pub fn emit_null(&mut self, dest: usize, dest_end: Option<usize>) {
|
|
self.emit_insn(Insn::Null { dest, dest_end });
|
|
}
|
|
|
|
pub fn emit_result_row(&mut self, start_reg: usize, count: usize) {
|
|
self.emit_insn(Insn::ResultRow { start_reg, count });
|
|
}
|
|
|
|
pub fn emit_halt(&mut self) {
|
|
self.emit_insn(Insn::Halt {
|
|
err_code: 0,
|
|
description: String::new(),
|
|
});
|
|
}
|
|
|
|
// no users yet, but I want to avoid someone else in the future
|
|
// just adding parameters to emit_halt! If you use this, remove the
|
|
// clippy warning please.
|
|
#[allow(dead_code)]
|
|
pub fn emit_halt_err(&mut self, err_code: usize, description: String) {
|
|
self.emit_insn(Insn::Halt {
|
|
err_code,
|
|
description,
|
|
});
|
|
}
|
|
|
|
pub fn emit_init(&mut self) -> BranchOffset {
|
|
let target_pc = self.allocate_label();
|
|
self.emit_insn(Insn::Init { target_pc });
|
|
target_pc
|
|
}
|
|
|
|
pub fn emit_transaction(&mut self, write: bool) {
|
|
self.emit_insn(Insn::Transaction { write });
|
|
}
|
|
|
|
pub fn emit_goto(&mut self, target_pc: BranchOffset) {
|
|
self.emit_insn(Insn::Goto { target_pc });
|
|
}
|
|
|
|
pub fn add_comment(&mut self, insn_index: BranchOffset, comment: &'static str) {
|
|
if let Some(comments) = &mut self.comments {
|
|
comments.insert(insn_index.to_offset_int(), comment);
|
|
}
|
|
}
|
|
|
|
// Emit an instruction that will be put at the end of the program (after Transaction statement).
|
|
// This is useful for instructions that otherwise will be unnecessarily repeated in a loop.
|
|
// Example: In `SELECT * from users where name='John'`, it is unnecessary to set r[1]='John' as we SCAN users table.
|
|
// We could simply set it once before the SCAN started.
|
|
pub fn mark_last_insn_constant(&mut self) {
|
|
self.constant_insns.push(self.insns.pop().unwrap());
|
|
}
|
|
|
|
pub fn emit_constant_insns(&mut self) {
|
|
self.insns.append(&mut self.constant_insns);
|
|
}
|
|
|
|
pub fn offset(&self) -> BranchOffset {
|
|
BranchOffset::Offset(self.insns.len() as InsnReference)
|
|
}
|
|
|
|
pub fn allocate_label(&mut self) -> BranchOffset {
|
|
let label_n = self.label_to_resolved_offset.len();
|
|
self.label_to_resolved_offset.push(None);
|
|
BranchOffset::Label(label_n as u32)
|
|
}
|
|
|
|
// Effectively a GOTO <next insn> without the need to emit an explicit GOTO instruction.
|
|
// Useful when you know you need to jump to "the next part", but the exact offset is unknowable
|
|
// at the time of emitting the instruction.
|
|
pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) {
|
|
self.next_insn_labels.push(label);
|
|
}
|
|
|
|
pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) {
|
|
assert!(matches!(label, BranchOffset::Label(_)));
|
|
assert!(matches!(to_offset, BranchOffset::Offset(_)));
|
|
self.label_to_resolved_offset[label.to_label_value() as usize] =
|
|
Some(to_offset.to_offset_int());
|
|
}
|
|
|
|
/// Resolve unresolved labels to a specific offset in the instruction list.
|
|
///
|
|
/// This function scans all instructions and resolves any labels to their corresponding offsets.
|
|
/// It ensures that all labels are resolved correctly and updates the target program counter (PC)
|
|
/// of each instruction that references a label.
|
|
pub fn resolve_labels(&mut self) {
|
|
let resolve = |pc: &mut BranchOffset, insn_name: &str| {
|
|
if let BranchOffset::Label(label) = pc {
|
|
let to_offset = self
|
|
.label_to_resolved_offset
|
|
.get(*label as usize)
|
|
.unwrap_or_else(|| {
|
|
panic!("Reference to undefined label in {}: {}", insn_name, label)
|
|
});
|
|
*pc = BranchOffset::Offset(
|
|
to_offset
|
|
.unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)),
|
|
);
|
|
}
|
|
};
|
|
for (insn, _) in self.insns.iter_mut() {
|
|
match insn {
|
|
Insn::Init { target_pc } => {
|
|
resolve(target_pc, "Init");
|
|
}
|
|
Insn::Eq {
|
|
lhs: _lhs,
|
|
rhs: _rhs,
|
|
target_pc,
|
|
..
|
|
} => {
|
|
resolve(target_pc, "Eq");
|
|
}
|
|
Insn::Ne {
|
|
lhs: _lhs,
|
|
rhs: _rhs,
|
|
target_pc,
|
|
..
|
|
} => {
|
|
resolve(target_pc, "Ne");
|
|
}
|
|
Insn::Lt {
|
|
lhs: _lhs,
|
|
rhs: _rhs,
|
|
target_pc,
|
|
..
|
|
} => {
|
|
resolve(target_pc, "Lt");
|
|
}
|
|
Insn::Le {
|
|
lhs: _lhs,
|
|
rhs: _rhs,
|
|
target_pc,
|
|
..
|
|
} => {
|
|
resolve(target_pc, "Le");
|
|
}
|
|
Insn::Gt {
|
|
lhs: _lhs,
|
|
rhs: _rhs,
|
|
target_pc,
|
|
..
|
|
} => {
|
|
resolve(target_pc, "Gt");
|
|
}
|
|
Insn::Ge {
|
|
lhs: _lhs,
|
|
rhs: _rhs,
|
|
target_pc,
|
|
..
|
|
} => {
|
|
resolve(target_pc, "Ge");
|
|
}
|
|
Insn::If {
|
|
reg: _reg,
|
|
target_pc,
|
|
jump_if_null: _,
|
|
} => {
|
|
resolve(target_pc, "If");
|
|
}
|
|
Insn::IfNot {
|
|
reg: _reg,
|
|
target_pc,
|
|
jump_if_null: _,
|
|
} => {
|
|
resolve(target_pc, "IfNot");
|
|
}
|
|
Insn::Rewind { pc_if_empty, .. } => {
|
|
resolve(pc_if_empty, "Rewind");
|
|
}
|
|
Insn::Last { pc_if_empty, .. } => {
|
|
resolve(pc_if_empty, "Last");
|
|
}
|
|
Insn::Goto { target_pc } => {
|
|
resolve(target_pc, "Goto");
|
|
}
|
|
Insn::DecrJumpZero {
|
|
reg: _reg,
|
|
target_pc,
|
|
} => {
|
|
resolve(target_pc, "DecrJumpZero");
|
|
}
|
|
Insn::SorterNext {
|
|
cursor_id: _cursor_id,
|
|
pc_if_next,
|
|
} => {
|
|
resolve(pc_if_next, "SorterNext");
|
|
}
|
|
Insn::SorterSort { pc_if_empty, .. } => {
|
|
resolve(pc_if_empty, "SorterSort");
|
|
}
|
|
Insn::NotNull {
|
|
reg: _reg,
|
|
target_pc,
|
|
} => {
|
|
resolve(target_pc, "NotNull");
|
|
}
|
|
Insn::IfPos { target_pc, .. } => {
|
|
resolve(target_pc, "IfPos");
|
|
}
|
|
Insn::Next { pc_if_next, .. } => {
|
|
resolve(pc_if_next, "Next");
|
|
}
|
|
Insn::Prev { pc_if_prev, .. } => {
|
|
resolve(pc_if_prev, "Prev");
|
|
}
|
|
Insn::InitCoroutine {
|
|
yield_reg: _,
|
|
jump_on_definition,
|
|
start_offset: _,
|
|
} => {
|
|
resolve(jump_on_definition, "InitCoroutine");
|
|
}
|
|
Insn::NotExists {
|
|
cursor: _,
|
|
rowid_reg: _,
|
|
target_pc,
|
|
} => {
|
|
resolve(target_pc, "NotExists");
|
|
}
|
|
Insn::Yield {
|
|
yield_reg: _,
|
|
end_offset,
|
|
} => {
|
|
resolve(end_offset, "Yield");
|
|
}
|
|
Insn::SeekRowid { target_pc, .. } => {
|
|
resolve(target_pc, "SeekRowid");
|
|
}
|
|
Insn::Gosub { target_pc, .. } => {
|
|
resolve(target_pc, "Gosub");
|
|
}
|
|
Insn::Jump {
|
|
target_pc_eq,
|
|
target_pc_lt,
|
|
target_pc_gt,
|
|
} => {
|
|
resolve(target_pc_eq, "Jump");
|
|
resolve(target_pc_lt, "Jump");
|
|
resolve(target_pc_gt, "Jump");
|
|
}
|
|
Insn::SeekGE { target_pc, .. } => {
|
|
resolve(target_pc, "SeekGE");
|
|
}
|
|
Insn::SeekGT { target_pc, .. } => {
|
|
resolve(target_pc, "SeekGT");
|
|
}
|
|
Insn::SeekLE { target_pc, .. } => {
|
|
resolve(target_pc, "SeekLE");
|
|
}
|
|
Insn::SeekLT { target_pc, .. } => {
|
|
resolve(target_pc, "SeekLT");
|
|
}
|
|
Insn::IdxGE { target_pc, .. } => {
|
|
resolve(target_pc, "IdxGE");
|
|
}
|
|
Insn::IdxLE { target_pc, .. } => {
|
|
resolve(target_pc, "IdxLE");
|
|
}
|
|
Insn::IdxGT { target_pc, .. } => {
|
|
resolve(target_pc, "IdxGT");
|
|
}
|
|
Insn::IdxLT { target_pc, .. } => {
|
|
resolve(target_pc, "IdxLT");
|
|
}
|
|
Insn::IsNull { reg: _, target_pc } => {
|
|
resolve(target_pc, "IsNull");
|
|
}
|
|
Insn::VNext { pc_if_next, .. } => {
|
|
resolve(pc_if_next, "VNext");
|
|
}
|
|
Insn::VFilter { pc_if_empty, .. } => {
|
|
resolve(pc_if_empty, "VFilter");
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
self.label_to_resolved_offset.clear();
|
|
}
|
|
|
|
// translate table to cursor id
|
|
pub fn resolve_cursor_id(&self, table_identifier: &str) -> CursorID {
|
|
self.cursor_ref
|
|
.iter()
|
|
.position(|(t_ident, _)| {
|
|
t_ident
|
|
.as_ref()
|
|
.is_some_and(|ident| ident == table_identifier)
|
|
})
|
|
.unwrap()
|
|
}
|
|
|
|
pub fn build(
|
|
mut self,
|
|
database_header: Arc<SpinLock<DatabaseHeader>>,
|
|
connection: Weak<Connection>,
|
|
change_cnt_on: bool,
|
|
) -> Program {
|
|
self.resolve_labels();
|
|
assert!(
|
|
self.constant_insns.is_empty(),
|
|
"constant_insns is not empty when build() is called, did you forget to call emit_constant_insns()?"
|
|
);
|
|
|
|
self.parameters.list.dedup();
|
|
Program {
|
|
max_registers: self.next_free_register,
|
|
insns: self.insns,
|
|
cursor_ref: self.cursor_ref,
|
|
database_header,
|
|
comments: self.comments,
|
|
connection,
|
|
parameters: self.parameters,
|
|
n_change: Cell::new(0),
|
|
change_cnt_on,
|
|
result_columns: self.result_columns,
|
|
table_references: self.table_references,
|
|
}
|
|
}
|
|
}
|