test/fuzz: modify compound index scan fuzz to utilize both pk columns in where clause

2025-08-04 01:58:16 +00:00 · 2025-04-10 15:06:18 +03:00 · 2025-04-10 15:06:18 +03:00 · 3d1b4c5292
commit 3d1b4c5292
parent 579d04f521
1 changed files with 83 additions and 42 deletions
--- a/tests/integration/fuzz/mod.rs
+++ b/tests/integration/fuzz/mod.rs
@ -232,12 +232,8 @@ mod tests {

        const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="];

-        const ORDER_BY: [Option<&str>; 4] = [
-            None,
-            Some("ORDER BY x"),
-            Some("ORDER BY x DESC"),
-            Some("ORDER BY x ASC"),
-        ];
+        const ORDER_BY: [Option<&str>; 3] = [None, Some("ORDER BY x DESC"), Some("ORDER BY x ASC")];
+        const SECONDARY_ORDER_BY: [Option<&str>; 3] = [None, Some(", y DESC"), Some(", y ASC")];

        let print_dump_on_fail = |insert: &str, seed: u64| {
            let comment = format!("-- seed: {}; dump for manual debugging:", seed);
@ -252,56 +248,101 @@ mod tests {

        for comp in COMPARISONS.iter() {
            for order_by in ORDER_BY.iter() {
-                for max in 0..=3000 {
-                    // see comment below about ordering and the '=' comparison operator; omitting LIMIT for that reason
-                    // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure
-                    // correctness in the = case
-                    let limit = if *comp == "=" { "" } else { "LIMIT 5" };
+                // make it more likely that the full 2-column index is utilized for seeking
+                let iter_count_per_permutation = if *comp == "=" { 2000 } else { 500 };
+                println!(
+                    "fuzzing {} iterations with comp: {:?}, order_by: {:?}",
+                    iter_count_per_permutation, comp, order_by
+                );
+                for _ in 0..iter_count_per_permutation {
+                    let first_col_val = rng.random_range(0..=3000);
+                    let mut limit = "LIMIT 5";
+                    let mut second_idx_col_cond = "".to_string();
+                    let mut second_idx_col_comp = "".to_string();
+
+                    // somtetimes include the second index column in the where clause.
+                    // make it more probable when first column has '=' constraint since those queries are usually faster to run
+                    let second_col_prob = if *comp == "=" { 0.7 } else { 0.02 };
+                    if rng.random_bool(second_col_prob) {
+                        let second_idx_col = rng.random_range(0..3000);
+
+                        second_idx_col_comp =
+                            COMPARISONS[rng.random_range(0..COMPARISONS.len())].to_string();
+                        second_idx_col_cond =
+                            format!(" AND y {} {}", second_idx_col_comp, second_idx_col);
+                    }
+
+                    // if the first constraint is =, then half the time, use the second index column in the ORDER BY too
+                    let mut secondary_order_by = None;
+                    let use_secondary_order_by = order_by.is_some()
+                        && *comp == "="
+                        && second_idx_col_comp != ""
+                        && rng.random_bool(0.5);
+                    let full_order_by = if use_secondary_order_by {
+                        secondary_order_by =
+                            SECONDARY_ORDER_BY[rng.random_range(0..SECONDARY_ORDER_BY.len())];
+                        if let Some(secondary) = secondary_order_by {
+                            format!("{}{}", order_by.unwrap_or(""), secondary,)
+                        } else {
+                            order_by.unwrap_or("").to_string()
+                        }
+                    } else {
+                        order_by.unwrap_or("").to_string()
+                    };
+
+                    // There are certain cases where SQLite does not bother iterating in reverse order despite the ORDER BY.
+                    // These cases include e.g.
+                    // SELECT * FROM t WHERE x = 3 ORDER BY x DESC
+                    // SELECT * FROM t WHERE x = 3 and y < 100 ORDER BY x DESC
+                    //
+                    // The common thread being that the ORDER BY column is also constrained by an equality predicate, meaning
+                    // that it doesn't semantically matter what the ordering is.
+                    //
+                    // We do not currently replicate this "lazy" behavior, so in these cases we want the full result set and ensure
+                    // that if the result is not exactly equal, then the ordering must be the exact reverse.
+                    let allow_reverse_ordering = {
+                        if *comp != "=" {
+                            false
+                        } else if secondary_order_by.is_some() {
+                            second_idx_col_comp == "="
+                        } else {
+                            true
+                        }
+                    };
+                    if allow_reverse_ordering {
+                        // see comment above about ordering and the '=' comparison operator; omitting LIMIT for that reason
+                        // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure
+                        // correctness in the = case
+                        limit = "";
+                    }
                    let query = format!(
-                        "SELECT * FROM t WHERE x {} {} {} {}",
-                        comp,
-                        max,
-                        order_by.unwrap_or(""),
-                        limit
+                        // e.g. SELECT * FROM t WHERE x = 1 AND y > 2 ORDER BY x DESC LIMIT 5
+                        "SELECT * FROM t WHERE x {} {} {} {} {}",
+                        comp, first_col_val, second_idx_col_cond, full_order_by, limit,
                    );
-                    log::trace!("query: {}", query);
+                    log::debug!("query: {}", query);
                    let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
                    let sqlite = sqlite_exec_rows(&sqlite_conn, &query);
                    let is_equal = limbo == sqlite;
                    if !is_equal {
-                        // if the condition is = and the same rows are present but in different order, then we accept that
-                        // e.g. sqlite doesn't bother iterating in reverse order if "WHERE X = 3 ORDER BY X DESC", but we currently do.
-                        if *comp == "=" {
+                        if allow_reverse_ordering {
                            let limbo_row_count = limbo.len();
                            let sqlite_row_count = sqlite.len();
                            if limbo_row_count == sqlite_row_count {
-                                for limbo_row in limbo.iter() {
-                                    if !sqlite.contains(limbo_row) {
-                                        // save insert to file and print the filename for debugging
-                                        let error_msg = format!("row not found in sqlite: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed);
-                                        print_dump_on_fail(&insert, seed);
-                                        panic!("{}", error_msg);
-                                    }
-                                }
-                                for sqlite_row in sqlite.iter() {
-                                    if !limbo.contains(sqlite_row) {
-                                        let error_msg = format!("row not found in limbo: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed);
-                                        print_dump_on_fail(&insert, seed);
-                                        panic!("{}", error_msg);
-                                    }
-                                }
-                                continue;
+                                let limbo_rev = limbo.iter().cloned().rev().collect::<Vec<_>>();
+                                assert_eq!(limbo_rev, sqlite, "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", query, limbo, sqlite, seed, allow_reverse_ordering);
                            } else {
                                print_dump_on_fail(&insert, seed);
-                                let error_msg = format!("row count mismatch (limbo: {}, sqlite: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed);
+                                let error_msg = format!("row count mismatch (limbo row count: {}, sqlite row count: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed, allow_reverse_ordering);
                                panic!("{}", error_msg);
                            }
+                        } else {
+                            print_dump_on_fail(&insert, seed);
+                            panic!(
+                                "query: {}, limbo row count: {}, limbo: {:?}, sqlite row count: {}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}",
+                                query, limbo.len(), limbo, sqlite.len(), sqlite, seed, allow_reverse_ordering
+                            );
                        }
-                        print_dump_on_fail(&insert, seed);
-                        panic!(
-                            "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}",
-                            query, limbo, sqlite, seed
-                        );
                    }
                }
            }