From 96987db6caa4ef3e65dc20b93add9097beee0d37 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 31 Jan 2025 22:04:03 -0500 Subject: [PATCH] implement is and is not where constraints The main difference between = and != is how null values are handled. SQLite passes a flag "NULLEQ" to Eq and Ne to disambiguate that. In the presence of that flag, NULL = NULL. Some prep work is done to make sure we can pass a flag instead of a boolean to Eq and Ne. I looked into the bitflags crate but got a bit scared with the list of dependencies. --- COMPAT.md | 2 +- core/translate/expr.rs | 224 +++++++++++++++++++++++++++-------- core/translate/main_loop.rs | 6 +- core/vdbe/insn.rs | 58 +++++++-- core/vdbe/mod.rs | 34 ++++-- testing/tester.tcl | 11 ++ testing/testing_small.db | Bin 0 -> 8192 bytes testing/testing_small.db-wal | Bin 0 -> 512 bytes testing/where.test | 32 +++++ 9 files changed, 291 insertions(+), 76 deletions(-) create mode 100644 testing/testing_small.db create mode 100644 testing/testing_small.db-wal diff --git a/COMPAT.md b/COMPAT.md index 97982562f..055fdae89 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -186,7 +186,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | (NOT) GLOB | Yes | | | (NOT) REGEXP | No | | | (NOT) MATCH | No | | -| IS (NOT) | No | | +| IS (NOT) | Yes | | | IS (NOT) DISTINCT FROM | No | | | (NOT) BETWEEN ... AND ... | No | | | (NOT) IN (subquery) | No | | diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9f1943bb5..cc2e5b4dd 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -5,7 +5,11 @@ use crate::function::JsonFunc; use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc}; use crate::schema::Type; use crate::util::normalize_ident; -use crate::vdbe::{builder::ProgramBuilder, insn::Insn, BranchOffset}; +use crate::vdbe::{ + builder::ProgramBuilder, + insn::{Flags, Insn}, + BranchOffset, +}; use crate::Result; use super::emitter::Resolver; @@ -40,26 +44,76 @@ macro_rules! emit_cmp_insn { $op_true:ident, $op_false:ident, $lhs:expr, - $rhs:expr + $rhs:expr, + $tables:expr, + $resolver:expr ) => {{ + let lhs_reg = translate_and_mark($program, Some($tables), $lhs, $resolver)?; + let rhs_reg = translate_and_mark($program, Some($tables), $rhs, $resolver)?; if $cond.jump_if_condition_is_true { $program.emit_insn(Insn::$op_true { - lhs: $lhs, - rhs: $rhs, + lhs: lhs_reg, + rhs: rhs_reg, target_pc: $cond.jump_target_when_true, - jump_if_null: false, + flags: Flags::default(), }); } else { $program.emit_insn(Insn::$op_false { - lhs: $lhs, - rhs: $rhs, + lhs: lhs_reg, + rhs: rhs_reg, target_pc: $cond.jump_target_when_false, - jump_if_null: true, + flags: Flags::default().jump_if_null(), }); } }}; } +macro_rules! emit_cmp_null_insn { + ( + $program:expr, + $cond:expr, + $op_true:ident, + $op_false:ident, + $op_null:ident, + $lhs:expr, + $rhs:expr, + $tables:expr, + $resolver:expr + ) => {{ + if **$rhs == ast::Expr::Literal(ast::Literal::Null) { + let lhs_reg = translate_and_mark($program, Some($tables), $lhs, $resolver)?; + $program.emit_insn(Insn::$op_null { + reg: lhs_reg, + target_pc: $cond.jump_target_when_false, + }); + } else if **$lhs == ast::Expr::Literal(ast::Literal::Null) { + let rhs_reg = translate_and_mark($program, Some($tables), $rhs, $resolver)?; + $program.emit_insn(Insn::$op_null { + reg: rhs_reg, + target_pc: $cond.jump_target_when_false, + }); + } else { + let lhs_reg = translate_and_mark($program, Some($tables), $lhs, $resolver)?; + let rhs_reg = translate_and_mark($program, Some($tables), $rhs, $resolver)?; + if $cond.jump_if_condition_is_true { + $program.emit_insn(Insn::$op_true { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: $cond.jump_target_when_true, + flags: Flags::default().null_eq(), + }); + } else { + $program.emit_insn(Insn::$op_false { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: $cond.jump_target_when_false, + flags: Flags::default().jump_if_null().null_eq(), + }); + } + } + }}; +} + macro_rules! expect_arguments_exact { ( $args:expr, @@ -204,35 +258,109 @@ pub fn translate_condition_expr( resolver, )?; } - ast::Expr::Binary(lhs, op, rhs) => { - let lhs_reg = translate_and_mark(program, Some(referenced_tables), lhs, resolver)?; - let rhs_reg = translate_and_mark(program, Some(referenced_tables), rhs, resolver)?; - match op { - ast::Operator::Greater => { - emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) - } - ast::Operator::GreaterEquals => { - emit_cmp_insn!(program, condition_metadata, Ge, Lt, lhs_reg, rhs_reg) - } - ast::Operator::Less => { - emit_cmp_insn!(program, condition_metadata, Lt, Ge, lhs_reg, rhs_reg) - } - ast::Operator::LessEquals => { - emit_cmp_insn!(program, condition_metadata, Le, Gt, lhs_reg, rhs_reg) - } - ast::Operator::Equals => { - emit_cmp_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg) - } - ast::Operator::NotEquals => { - emit_cmp_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg) - } - ast::Operator::Is => todo!(), - ast::Operator::IsNot => todo!(), - _ => { - todo!("op {:?} not implemented", op); - } + ast::Expr::Binary(lhs, op, rhs) => match op { + ast::Operator::Greater => { + emit_cmp_insn!( + program, + condition_metadata, + Gt, + Le, + lhs, + rhs, + referenced_tables, + resolver + ) } - } + ast::Operator::GreaterEquals => { + emit_cmp_insn!( + program, + condition_metadata, + Ge, + Lt, + lhs, + rhs, + referenced_tables, + resolver + ) + } + ast::Operator::Less => { + emit_cmp_insn!( + program, + condition_metadata, + Lt, + Ge, + lhs, + rhs, + referenced_tables, + resolver + ) + } + ast::Operator::LessEquals => { + emit_cmp_insn!( + program, + condition_metadata, + Le, + Gt, + lhs, + rhs, + referenced_tables, + resolver + ) + } + ast::Operator::Equals => { + emit_cmp_insn!( + program, + condition_metadata, + Eq, + Ne, + lhs, + rhs, + referenced_tables, + resolver + ) + } + ast::Operator::NotEquals => { + emit_cmp_insn!( + program, + condition_metadata, + Ne, + Eq, + lhs, + rhs, + referenced_tables, + resolver + ) + } + ast::Operator::Is => { + emit_cmp_null_insn!( + program, + condition_metadata, + Eq, + Ne, + NotNull, + lhs, + rhs, + referenced_tables, + resolver + ) + } + ast::Operator::IsNot => { + emit_cmp_null_insn!( + program, + condition_metadata, + Ne, + Eq, + IsNull, + lhs, + rhs, + referenced_tables, + resolver + ) + } + _ => { + todo!("op {:?} not implemented", op); + } + }, ast::Expr::Literal(lit) => match lit { ast::Literal::Numeric(val) => { let maybe_int = val.parse::(); @@ -326,7 +454,7 @@ pub fn translate_condition_expr( lhs: lhs_reg, rhs: rhs_reg, target_pc: jump_target_when_true, - jump_if_null: false, + flags: Flags::default(), }); } else { // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. @@ -334,7 +462,7 @@ pub fn translate_condition_expr( lhs: lhs_reg, rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, - jump_if_null: true, + flags: Flags::default().jump_if_null(), }); } } @@ -355,7 +483,7 @@ pub fn translate_condition_expr( lhs: lhs_reg, rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, - jump_if_null: true, + flags: Flags::default().jump_if_null(), }); } // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. @@ -498,7 +626,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -514,7 +642,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -530,7 +658,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -546,7 +674,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -562,7 +690,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -578,7 +706,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -671,7 +799,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -685,7 +813,7 @@ pub fn translate_expr( lhs: e1_reg, rhs: e2_reg, target_pc: if_true_label, - jump_if_null: false, + flags: Flags::default(), }, target_register, if_true_label, @@ -756,7 +884,7 @@ pub fn translate_expr( lhs: base_reg, rhs: expr_reg, target_pc: next_case_label, - jump_if_null: false, + flags: Flags::default(), }), // CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause None => program.emit_insn(Insn::IfNot { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 4396842e5..da812d9e7 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -4,7 +4,7 @@ use crate::{ translate::result_row::emit_select_result, vdbe::{ builder::{CursorType, ProgramBuilder}, - insn::Insn, + insn::{Flags, Insn}, BranchOffset, }, Result, @@ -477,7 +477,7 @@ pub fn open_loop( lhs: rowid_reg, rhs: cmp_reg, target_pc: loop_end, - jump_if_null: false, + flags: Flags::default(), }); } } @@ -499,7 +499,7 @@ pub fn open_loop( lhs: rowid_reg, rhs: cmp_reg, target_pc: loop_end, - jump_if_null: false, + flags: Flags::default(), }); } } diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f4c02d564..f8a40fa65 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -6,6 +6,36 @@ use crate::storage::wal::CheckpointMode; use crate::types::{OwnedRecord, OwnedValue}; use limbo_macros::Description; +#[derive(Clone, Copy, Debug, Default)] +pub struct Flags(usize); + +impl Flags { + const NULL_EQ: usize = 0x80; + const JUMP_IF_NULL: usize = 0x10; + + fn has(&self, flag: usize) -> bool { + (self.0 & flag) != 0 + } + + pub fn null_eq(mut self) -> Self { + self.0 |= Flags::NULL_EQ; + self + } + + pub fn jump_if_null(mut self) -> Self { + self.0 |= Flags::JUMP_IF_NULL; + self + } + + pub fn has_jump_if_null(&self) -> bool { + self.has(Flags::JUMP_IF_NULL) + } + + pub fn has_nulleq(&self) -> bool { + self.has(Flags::NULL_EQ) + } +} + #[derive(Description, Debug)] pub enum Insn { // Initialize the program state and jump to the given PC. @@ -108,52 +138,56 @@ pub enum Insn { lhs: usize, rhs: usize, target_pc: BranchOffset, - /// Jump if either of the operands is null. Used for "jump when false" logic. + /// Flags are nulleq (null = null) or jump_if_null. + /// + /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. /// Eg. "SELECT * FROM users WHERE id = NULL" becomes: /// /// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true. /// This flag indicates that if either is null we should still jump. - jump_if_null: bool, + flags: Flags, }, // Compare two registers and jump to the given PC if they are not equal. Ne { lhs: usize, rhs: usize, target_pc: BranchOffset, - /// Jump if either of the operands is null. Used for "jump when false" logic. - jump_if_null: bool, + /// Flags are nulleq (null = null) or jump_if_null. + /// + /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. + flags: Flags, }, // Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { lhs: usize, rhs: usize, target_pc: BranchOffset, - /// Jump if either of the operands is null. Used for "jump when false" logic. - jump_if_null: bool, + /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. + flags: Flags, }, // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. Le { lhs: usize, rhs: usize, target_pc: BranchOffset, - /// Jump if either of the operands is null. Used for "jump when false" logic. - jump_if_null: bool, + /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. + flags: Flags, }, // Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { lhs: usize, rhs: usize, target_pc: BranchOffset, - /// Jump if either of the operands is null. Used for "jump when false" logic. - jump_if_null: bool, + /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. + flags: Flags, }, // Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { lhs: usize, rhs: usize, target_pc: BranchOffset, - /// Jump if either of the operands is null. Used for "jump when false" logic. - jump_if_null: bool, + /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. + flags: Flags, }, /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0) If { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 9c3410ba3..9586b159b 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -604,15 +604,18 @@ impl Program { lhs, rhs, target_pc, - jump_if_null, + flags, } => { assert!(target_pc.is_offset()); let lhs = *lhs; let rhs = *rhs; let target_pc = *target_pc; + let cond = state.registers[lhs] == state.registers[rhs]; + let nulleq = flags.has_nulleq(); + let jump_if_null = flags.has_jump_if_null(); match (&state.registers[lhs], &state.registers[rhs]) { (_, OwnedValue::Null) | (OwnedValue::Null, _) => { - if *jump_if_null { + if (nulleq && cond) || (!nulleq && jump_if_null) { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -631,15 +634,18 @@ impl Program { lhs, rhs, target_pc, - jump_if_null, + flags, } => { assert!(target_pc.is_offset()); let lhs = *lhs; let rhs = *rhs; let target_pc = *target_pc; + let cond = state.registers[lhs] != state.registers[rhs]; + let nulleq = flags.has_nulleq(); + let jump_if_null = flags.has_jump_if_null(); match (&state.registers[lhs], &state.registers[rhs]) { (_, OwnedValue::Null) | (OwnedValue::Null, _) => { - if *jump_if_null { + if (nulleq && cond) || (!nulleq && jump_if_null) { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -658,15 +664,16 @@ impl Program { lhs, rhs, target_pc, - jump_if_null, + flags, } => { assert!(target_pc.is_offset()); let lhs = *lhs; let rhs = *rhs; let target_pc = *target_pc; + let jump_if_null = flags.has_jump_if_null(); match (&state.registers[lhs], &state.registers[rhs]) { (_, OwnedValue::Null) | (OwnedValue::Null, _) => { - if *jump_if_null { + if jump_if_null { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -685,15 +692,16 @@ impl Program { lhs, rhs, target_pc, - jump_if_null, + flags, } => { assert!(target_pc.is_offset()); let lhs = *lhs; let rhs = *rhs; let target_pc = *target_pc; + let jump_if_null = flags.has_jump_if_null(); match (&state.registers[lhs], &state.registers[rhs]) { (_, OwnedValue::Null) | (OwnedValue::Null, _) => { - if *jump_if_null { + if jump_if_null { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -712,15 +720,16 @@ impl Program { lhs, rhs, target_pc, - jump_if_null, + flags, } => { assert!(target_pc.is_offset()); let lhs = *lhs; let rhs = *rhs; let target_pc = *target_pc; + let jump_if_null = flags.has_jump_if_null(); match (&state.registers[lhs], &state.registers[rhs]) { (_, OwnedValue::Null) | (OwnedValue::Null, _) => { - if *jump_if_null { + if jump_if_null { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -739,15 +748,16 @@ impl Program { lhs, rhs, target_pc, - jump_if_null, + flags, } => { assert!(target_pc.is_offset()); let lhs = *lhs; let rhs = *rhs; let target_pc = *target_pc; + let jump_if_null = flags.has_jump_if_null(); match (&state.registers[lhs], &state.registers[rhs]) { (_, OwnedValue::Null) | (OwnedValue::Null, _) => { - if *jump_if_null { + if jump_if_null { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; diff --git a/testing/tester.tcl b/testing/tester.tcl index b8cbff17f..735c91aae 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -1,5 +1,6 @@ set sqlite_exec [expr {[info exists env(SQLITE_EXEC)] ? $env(SQLITE_EXEC) : "sqlite3"}] set test_dbs [list "testing/testing.db" "testing/testing_norowidalias.db"] +set test_small_dbs [list "testing/testing_small.db" ] proc evaluate_sql {sqlite_exec db_name sql} { set command [list $sqlite_exec $db_name $sql] @@ -26,6 +27,16 @@ proc do_execsql_test {test_name sql_statements expected_outputs} { } } +proc do_execsql_test_small {test_name sql_statements expected_outputs} { + foreach db $::test_small_dbs { + puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + set combined_expected_output [join $expected_outputs "\n"] + run_test $::sqlite_exec $db $combined_sql $combined_expected_output + } +} + + proc do_execsql_test_regex {test_name sql_statements expected_regex} { foreach db $::test_dbs { puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] diff --git a/testing/testing_small.db b/testing/testing_small.db new file mode 100644 index 0000000000000000000000000000000000000000..766b14f2a1dcbe613a45491f6b7883aa8765bd0c GIT binary patch literal 8192 zcmeI#ziR?96bJD4E=i%#k}l%!S&=$IPeF^*}tq){|+}NH>ds$ zeK!bhx(eS%9?3TZLOxrjUs>H$ve;}_MI!@xA?Bx{2oV|WMEhK<3wsVaxA|HbdRzZK z>nZj>qyq&32tWV=5P$##AOHafKmY;|fWVCc@BGm1bhv1W`LZgj)u!!aoQE_9iRmZ{ z((XvFF6DC)!$-*b@?9)CoA|wC+ literal 0 HcmV?d00001 diff --git a/testing/testing_small.db-wal b/testing/testing_small.db-wal new file mode 100644 index 0000000000000000000000000000000000000000..43e809234f59aab1ad821efec4522b4d6a5439a7 GIT binary patch literal 512 kcmXr7XKP~6eI&uaAi#hOt~_TJ?m2gB2eQ~Gd-#O_0NMoyGXMYp literal 0 HcmV?d00001 diff --git a/testing/where.test b/testing/where.test index 9f4b86a5c..d34cc1bd9 100755 --- a/testing/where.test +++ b/testing/where.test @@ -3,6 +3,38 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +do_execsql_test_small where-is-null { + select count(*) from demo where value is null; +} {2} + +do_execsql_test_small where-equals-null { + select count(*) from demo where value = null; +} {0} + +do_execsql_test_small where-is-not-null { + select count(*) from demo where value is not null; +} {3} + +do_execsql_test_small where-not-equal-null { + select count(*) from demo where value != null; +} {0} + +do_execsql_test_small where-is-a-with-nulls { + select count(*) from demo where value is 'A'; +} {1} + +do_execsql_test_small where-equals-a-with-nulls { + select count(*) from demo where value == 'A'; +} {1} + +do_execsql_test_small where-is-not-a-with-nulls { + select count(*) from demo where value is not 'A'; +} {4} + +do_execsql_test_small where-not-equals-a-with-nulls { + select count(*) from demo where value != 'A'; +} {2} + do_execsql_test where-clause-eq { select last_name from users where id = 2000; } {Rodriguez}