cleaned up benchmark script, now doing 200 runs per bench repeated 3 times to confirm

2025-10-01 15:51:12 +00:00 · 2021-07-20 14:10:35 +02:00 · 2021-07-20 14:10:35 +02:00 · a3f7c6accf
commit a3f7c6accf
parent e7223fe176
5 changed files with 74 additions and 67 deletions
--- a/.gitignore
+++ b/.gitignore
@ -30,3 +30,6 @@ sccache_dir
 # self-contained benchmark folder
 bench-folder*
 # earthly
 earthly_log.txt
--- a/Cargo.toml
+++ b/Cargo.toml
@ -32,6 +32,7 @@ members = [
    "roc_std",
    "docs",
 ]
 exclude = ["ci/bench-runner"]
 # Needed to be able to run `cargo run -p roc_cli --no-default-features` -
 # see www/build.sh for more.
 #
--- a/ci/bench-runner.sh
+++ b/ci/bench-runner.sh
@ -1,10 +1,25 @@
 #!/usr/bin/env bash
 # script to return exit code 1 if benchmarks have regressed
 # script assumes we are in repo root
 # benchmark trunk
 ulimit -s unlimited
 LOG_FILE="bench_log.txt"
 NR_REPEATS=3
 for ctr in `seq 1 $NR_REPEATS`;
    do
        #
        # <run benchmarks>
        #
        # delete criterion folder to remove old benchmark data (ignore error if folder does not exist)
        rm -rf bench-folder-trunk/target/criterion
        rm -rf bench-folder-branch/target/criterion
        cd bench-folder-trunk
        # benchmark trunk
        ./target/release/deps/time_bench --bench
        cd ..
@ -13,68 +28,53 @@ cp -r bench-folder-trunk/target/criterion bench-folder-branch/target/
        cd bench-folder-branch
-LOG_FILE="bench_log.txt"
+        # ignore error if file does not exist
        rm -f $LOG_FILE 2>&1
        touch $LOG_FILE
        FULL_CMD=" ./target/release/deps/time_bench --bench"
        echo $FULL_CMD
        script -efq $LOG_FILE -c "$FULL_CMD"
        EXIT_CODE=$?
        #
        # </run benchmarks>
        #
-if cat $LOG_FILE | grep -q "regressed"; then
+        #
        # <save which tests regressed>
        #
        REGRESSED_TESTS_FILE_NAME="regressed_$ctr.txt"
-    grep -B3 "regressed" $LOG_FILE | sed 's/\x1B\[[0-9;]\{1,\}[A-Za-z]//g' | grep -o "\".*\"" | rev | cut -d' ' -f1 | rev > slow_benches_1.txt
+        grep -B3 "regressed" $LOG_FILE | sed 's/\x1B\[[0-9;]\{1,\}[A-Za-z]//g' | grep -o "\".*\"" | rev | cut -d' ' -f1 | rev > $REGRESSED_TESTS_FILE_NAME
-    echo "regression(s) detected in:"
+
-    cat slow_benches_1.txt
+        #
        # </save which tests regressed>
        #
        if [ $(cat $REGRESSED_TESTS_FILE_NAME | wc -l) -gt 0 ]; then 
            if [ $ctr -ne $NR_REPEATS ]; then
                echo ""
                echo ""
                echo "------<<<<<<>>>>>>------"
-    echo "Benchmark detected regression. Running benchmark again to confirm..."
+                echo "Benchmark regression detected for:"
                cat $REGRESSED_TESTS_FILE_NAME
                echo "Running benchmarks again to confirm regression is real..."
                echo "------<<<<<<>>>>>>------"
                echo ""
                echo ""
-
+            else
    # delete criterion folder to remove old benchmark data
    rm -rf ./target/criterion
    # benchmark trunk again
    cd ../bench-folder-trunk
    rm -rf target/criterion
    ./target/release/deps/time_bench --bench
    cd ../bench-folder-branch
    cp -r ../bench-folder-trunk/target/criterion ./target
    rm $LOG_FILE
    touch $LOG_FILE
    script -efq $LOG_FILE -c "$FULL_CMD"
    EXIT_CODE=$?
    if cat $LOG_FILE | grep -q "regressed"; then
        grep -B3 "regressed" $LOG_FILE | sed 's/\x1B\[[0-9;]\{1,\}[A-Za-z]//g' | grep -o "\".*\"" | rev | cut -d' ' -f1 | rev > slow_benches_2.txt
        echo "regression(s) detected in:"
        cat slow_benches_2.txt
        if [[ $(grep -Fxf slow_benches_1.txt slow_benches_2.txt | wc -l) -gt 0 ]]; then
                echo ""
                echo ""
                echo "------<<<<<<!!!!!!>>>>>>------"
-            echo "Benchmarks were run twice and a regression was detected both times for the following benchmarks:"
+                echo "Benchmarks were run $NR_REPEATS times and a regression was detected every time for the following benchmarks:"
-            grep -Fxf slow_benches_1.txt slow_benches_2.txt
+                cat regressed_*.txt > regressed.txt
                sort regressed.txt | uniq -d 
                echo "------<<<<<<!!!!!!>>>>>>------"
                echo ""
                echo ""
                exit 1
        else
            echo "Benchmarks were run twice and a regression was detected on one run. We assume this was a fluke."
            exit 0
        fi
    else
        echo "Benchmarks were run twice and a regression was detected on one run. We assume this was a fluke."
        exit 0
            fi
        else
            echo ""
@ -82,3 +82,6 @@ else
            echo ""
            exit $EXIT_CODE
        fi
        cd ..
    done  
--- a/cli/benches/time_bench.rs
+++ b/cli/benches/time_bench.rs
@ -7,9 +7,9 @@ use criterion::{
 fn bench_group_wall_time(c: &mut Criterion) {
    let mut group = c.benchmark_group("bench-group_wall-time");
-    // calculate statistics based on a fixed(flat) 300 runs
+    // calculate statistics based on a fixed(flat) 200 runs
    group.sampling_mode(SamplingMode::Flat);
-    group.sample_size(300);
+    group.sample_size(200);
    let bench_funcs: Vec<fn(Option<&mut BenchmarkGroup<WallTime>>) -> ()> = vec![
        bench_nqueens,       // queens 11
--- a/cli/cli_utils/src/bench_utils.rs
+++ b/cli/cli_utils/src/bench_utils.rs
@ -79,7 +79,7 @@ pub fn bench_nqueens<T: Measurement>(bench_group_opt: Option<&mut BenchmarkGroup
        &example_file("benchmarks", "NQueens.roc"),
        "11",
        "nqueens",
-        "2680\n",
+        "2680\n",//2680-14200
        bench_group_opt,
    );
 }