From bf6038f0ba43ce67f768c8df4c22c16a079da5f2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 4 Dec 2025 16:16:29 -0500 Subject: [PATCH 1/3] Add join fuzzer for non-indexed columns --- tests/fuzz/mod.rs | 162 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs index df416442a..8760d921b 100644 --- a/tests/fuzz/mod.rs +++ b/tests/fuzz/mod.rs @@ -502,6 +502,168 @@ mod fuzz_tests { } } + #[turso_macros::test()] + pub fn join_fuzz_unindexed_keys(db: TempDatabase) { + let _ = env_logger::try_init(); + let (mut rng, seed) = rng_from_time_or_env(); + println!("join_fuzz_unindexed_keys seed: {seed}"); + + let opts = db.db_opts; + let flags = db.db_flags; + let builder = TempDatabase::builder().with_flags(flags).with_opts(opts); + + let limbo_db = builder.clone().build(); + let sqlite_db = builder.clone().build(); + let limbo_conn = limbo_db.connect_limbo(); + let sqlite_conn = rusqlite::Connection::open(sqlite_db.path.clone()).unwrap(); + + let schema = r#" + CREATE TABLE t1(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); + CREATE TABLE t2(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); + CREATE TABLE t3(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); + CREATE TABLE t4(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); + "#; + + sqlite_conn.execute_batch(schema).unwrap(); + limbo_conn.prepare_execute_batch(schema).unwrap(); + + const ROWS_PER_TABLE: i64 = 200; + let tables = ["t1", "t2", "t3", "t4"]; + for (t_idx, tname) in tables.iter().enumerate() { + for i in 0..ROWS_PER_TABLE { + // 25% chance of NULL per column. + let id = i + 1 + (t_idx as i64) * 10_000; + let gen_val = |rng: &mut ChaCha8Rng| { + if rng.random_range(0..4) == 0 { + None + } else { + Some(rng.random_range(-10..=20)) + } + }; + let a = gen_val(&mut rng); + let b = gen_val(&mut rng); + let c = gen_val(&mut rng); + let d = gen_val(&mut rng); + + let fmt_val = |v: Option| match v { + Some(x) => x.to_string(), + None => "NULL".to_string(), + }; + + let stmt = format!( + "INSERT INTO {tname}(id,a,b,c,d) VALUES ({id}, {a}, {b}, {c}, {d})", + a = fmt_val(a), + b = fmt_val(b), + c = fmt_val(c), + d = fmt_val(d), + ); + + sqlite_conn.execute(&stmt, params![]).unwrap(); + limbo_conn.execute(&stmt).unwrap(); + } + } + + let non_pk_cols = ["a", "b", "c", "d"]; + + const ITERS: usize = 5000; + for iter in 0..ITERS { + if iter % (ITERS / 100).max(1) == 0 { + println!("join_fuzz_unindexed_keys: iteration {}/{}", iter + 1, ITERS); + } + + // Random number of tables: 2..4 + let num_tables = rng.random_range(2..=4); + let used_tables = &tables[..num_tables]; + + let mut select_cols = Vec::new(); + for t in used_tables.iter() { + select_cols.push(format!("{t}.id")); + } + let select_clause = select_cols.join(", "); + + // FROM + JOIN clause: chain t1 JOIN t2 JOIN ... with ON predicates on a/b/c/d. + let mut from_clause = format!("FROM {}", used_tables[0]); + for i in 1..num_tables { + let left = used_tables[i - 1]; + let right = used_tables[i]; + + let join_type = if rng.random_bool(0.5) { + "JOIN" // INNER + } else { + "LEFT JOIN" + }; + + let num_preds = rng.random_range(1..=3); + let mut preds = Vec::new(); + for _ in 0..num_preds { + let col = non_pk_cols[rng.random_range(0..non_pk_cols.len())]; + // Join on same-named, non-indexed column on both sides. + preds.push(format!("{left}.{col} = {right}.{col}")); + } + // Avoid duplicate ON terms to keep queries simple. + preds.sort(); + preds.dedup(); + + let on_clause = preds.join(" AND "); + from_clause = format!("{from_clause} {join_type} {right} ON {on_clause}"); + } + + // WHERE clause: 0..2 random predicates, mostly equality / IS NULL / IS NOT NULL. + let mut where_parts = Vec::new(); + let num_where = rng.random_range(0..=2); + for _ in 0..num_where { + let t = used_tables[rng.random_range(0..num_tables)]; + let col = non_pk_cols[rng.random_range(0..non_pk_cols.len())]; + let kind = rng.random_range(0..4); + let cond = match kind { + 0 => { + // equality + let val = rng.random_range(-10..=20); + format!("{t}.{col} = {val}") + } + 1 => { + // inequality + let val = rng.random_range(-10..=20); + format!("{t}.{col} <> {val}") + } + 2 => format!("{t}.{col} IS NULL"), + 3 => format!("{t}.{col} IS NOT NULL"), + _ => unreachable!(), + }; + where_parts.push(cond); + } + let where_clause = if where_parts.is_empty() { + String::new() + } else { + format!("WHERE {}", where_parts.join(" AND ")) + }; + let order_clause = format!("ORDER BY {}", select_cols.join(", ")); + + let limit = 50; + let query = format!( + "SELECT {select_clause} {from_clause} {where_clause} {order_clause} LIMIT {limit}", + ); + log::debug!("join_fuzz query: {query}"); + + let sqlite_rows = sqlite_exec_rows(&sqlite_conn, &query); + let limbo_rows = limbo_exec_rows(&limbo_db, &limbo_conn, &query); + + if sqlite_rows != limbo_rows { + panic!( + "JOIN FUZZ MISMATCH!\nseed: {seed}\niteration: {iter}\nquery: {query}\n\ + sqlite: {:?}\nlimbo: {:?}\n limbo count: {}, sqlite count: {}\n + sqlite path: {:?}, limbo path: {:?}", + sqlite_rows, + limbo_rows, + limbo_rows.len(), + sqlite_rows.len(), + sqlite_db.path, + limbo_db.path, + ); + } + } + } + // TODO: Mvcc indexes #[turso_macros::test()] pub fn collation_fuzz(db: TempDatabase) { From 16a1940d3ab2983b28fed14e62b5560df3708fda Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 4 Dec 2025 16:23:46 -0500 Subject: [PATCH 2/3] Reduce iteration count in join fuzzer to 2000 --- tests/fuzz/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs index 8760d921b..f3f1a35d2 100644 --- a/tests/fuzz/mod.rs +++ b/tests/fuzz/mod.rs @@ -565,7 +565,7 @@ mod fuzz_tests { let non_pk_cols = ["a", "b", "c", "d"]; - const ITERS: usize = 5000; + const ITERS: usize = 2000; for iter in 0..ITERS { if iter % (ITERS / 100).max(1) == 0 { println!("join_fuzz_unindexed_keys: iteration {}/{}", iter + 1, ITERS); From c9a682701107309c0a4f6b6db351b293167d5889 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 5 Dec 2025 12:03:23 -0500 Subject: [PATCH 3/3] Extract out join fuzzer to an additional test on indexed columns --- tests/fuzz/mod.rs | 103 +++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 37 deletions(-) diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs index f3f1a35d2..03f13bc90 100644 --- a/tests/fuzz/mod.rs +++ b/tests/fuzz/mod.rs @@ -502,16 +502,14 @@ mod fuzz_tests { } } - #[turso_macros::test()] - pub fn join_fuzz_unindexed_keys(db: TempDatabase) { + fn join_fuzz_inner(db: TempDatabase, add_indexes: bool, iterations: usize, rows: i64) { let _ = env_logger::try_init(); let (mut rng, seed) = rng_from_time_or_env(); - println!("join_fuzz_unindexed_keys seed: {seed}"); + println!("join_fuzz_inner (add_indexes={add_indexes}) seed: {seed}",); let opts = db.db_opts; let flags = db.db_flags; let builder = TempDatabase::builder().with_flags(flags).with_opts(opts); - let limbo_db = builder.clone().build(); let sqlite_db = builder.clone().build(); let limbo_conn = limbo_db.connect_limbo(); @@ -521,18 +519,43 @@ mod fuzz_tests { CREATE TABLE t1(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); CREATE TABLE t2(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); CREATE TABLE t3(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); - CREATE TABLE t4(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); - "#; + CREATE TABLE t4(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT);"#; sqlite_conn.execute_batch(schema).unwrap(); limbo_conn.prepare_execute_batch(schema).unwrap(); - const ROWS_PER_TABLE: i64 = 200; + if add_indexes { + let index_ddl = r#" + CREATE INDEX t1_a_idx ON t1(a); + CREATE INDEX t1_b_idx ON t1(b); + CREATE INDEX t1_c_idx ON t1(c); + CREATE INDEX t1_d_idx ON t1(d); + + CREATE INDEX t2_a_idx ON t2(a); + CREATE INDEX t2_b_idx ON t2(b); + CREATE INDEX t2_c_idx ON t2(c); + CREATE INDEX t2_d_idx ON t2(d); + + CREATE INDEX t3_a_idx ON t3(a); + CREATE INDEX t3_b_idx ON t3(b); + CREATE INDEX t3_c_idx ON t3(c); + CREATE INDEX t3_d_idx ON t3(d); + + CREATE INDEX t4_a_idx ON t4(a); + CREATE INDEX t4_b_idx ON t4(b); + CREATE INDEX t4_c_idx ON t4(c); + CREATE INDEX t4_d_idx ON t4(d); + "#; + sqlite_conn.execute_batch(index_ddl).unwrap(); + limbo_conn.prepare_execute_batch(index_ddl).unwrap(); + } + let tables = ["t1", "t2", "t3", "t4"]; for (t_idx, tname) in tables.iter().enumerate() { - for i in 0..ROWS_PER_TABLE { - // 25% chance of NULL per column. + for i in 0..rows { let id = i + 1 + (t_idx as i64) * 10_000; + + // 25% chance of NULL per column. let gen_val = |rng: &mut ChaCha8Rng| { if rng.random_range(0..4) == 0 { None @@ -565,30 +588,31 @@ mod fuzz_tests { let non_pk_cols = ["a", "b", "c", "d"]; - const ITERS: usize = 2000; - for iter in 0..ITERS { - if iter % (ITERS / 100).max(1) == 0 { - println!("join_fuzz_unindexed_keys: iteration {}/{}", iter + 1, ITERS); + for iter in 0..iterations { + if iter % (iterations / 100).max(1) == 0 { + println!( + "join_fuzz_inner(add_indexes={}) iter {}/{}", + add_indexes, + iter + 1, + iterations + ); } - // Random number of tables: 2..4 let num_tables = rng.random_range(2..=4); let used_tables = &tables[..num_tables]; - let mut select_cols = Vec::new(); + let mut select_cols: Vec = Vec::new(); for t in used_tables.iter() { select_cols.push(format!("{t}.id")); } let select_clause = select_cols.join(", "); - - // FROM + JOIN clause: chain t1 JOIN t2 JOIN ... with ON predicates on a/b/c/d. let mut from_clause = format!("FROM {}", used_tables[0]); for i in 1..num_tables { let left = used_tables[i - 1]; let right = used_tables[i]; let join_type = if rng.random_bool(0.5) { - "JOIN" // INNER + "JOIN" } else { "LEFT JOIN" }; @@ -597,10 +621,8 @@ mod fuzz_tests { let mut preds = Vec::new(); for _ in 0..num_preds { let col = non_pk_cols[rng.random_range(0..non_pk_cols.len())]; - // Join on same-named, non-indexed column on both sides. preds.push(format!("{left}.{col} = {right}.{col}")); } - // Avoid duplicate ON terms to keep queries simple. preds.sort(); preds.dedup(); @@ -608,7 +630,7 @@ mod fuzz_tests { from_clause = format!("{from_clause} {join_type} {right} ON {on_clause}"); } - // WHERE clause: 0..2 random predicates, mostly equality / IS NULL / IS NOT NULL. + // WHERE clause: 0..2 predicates on non-pk cols let mut where_parts = Vec::new(); let num_where = rng.random_range(0..=2); for _ in 0..num_where { @@ -617,12 +639,10 @@ mod fuzz_tests { let kind = rng.random_range(0..4); let cond = match kind { 0 => { - // equality let val = rng.random_range(-10..=20); format!("{t}.{col} = {val}") } 1 => { - // inequality let val = rng.random_range(-10..=20); format!("{t}.{col} <> {val}") } @@ -638,32 +658,41 @@ mod fuzz_tests { format!("WHERE {}", where_parts.join(" AND ")) }; let order_clause = format!("ORDER BY {}", select_cols.join(", ")); - let limit = 50; let query = format!( "SELECT {select_clause} {from_clause} {where_clause} {order_clause} LIMIT {limit}", ); - log::debug!("join_fuzz query: {query}"); - let sqlite_rows = sqlite_exec_rows(&sqlite_conn, &query); let limbo_rows = limbo_exec_rows(&limbo_db, &limbo_conn, &query); - if sqlite_rows != limbo_rows { panic!( - "JOIN FUZZ MISMATCH!\nseed: {seed}\niteration: {iter}\nquery: {query}\n\ - sqlite: {:?}\nlimbo: {:?}\n limbo count: {}, sqlite count: {}\n - sqlite path: {:?}, limbo path: {:?}", - sqlite_rows, - limbo_rows, - limbo_rows.len(), - sqlite_rows.len(), - sqlite_db.path, - limbo_db.path, - ); + "JOIN FUZZ MISMATCH (add_indexes={})\nseed: {}\niteration: {}\nquery: {}\n\ + sqlite ({} rows): {:?}\nlimbo ({} rows): {:?}\nsqlite path: {:?}\nlimbo path: {:?}", + add_indexes, + seed, + iter, + query, + sqlite_rows.len(), + sqlite_rows, + limbo_rows.len(), + limbo_rows, + sqlite_db.path, + limbo_db.path, + ); } } } + #[turso_macros::test()] + pub fn join_fuzz_unindexed_keys(db: TempDatabase) { + join_fuzz_inner(db, false, 2000, 200); + } + + #[turso_macros::test()] + pub fn join_fuzz_indexed_keys(db: TempDatabase) { + join_fuzz_inner(db, true, 2000, 200); + } + // TODO: Mvcc indexes #[turso_macros::test()] pub fn collation_fuzz(db: TempDatabase) {