Merge pull request #6311 from roc-lang/fix_benchmarks

Fix benchmarks failing silently
This commit is contained in:
Richard Feldman 2024-01-15 14:00:57 -05:00 committed by GitHub
commit ae0e3593a4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 85 additions and 64 deletions

View file

@ -3,7 +3,6 @@ use data_encoding::HEXUPPER;
use is_executable::IsExecutable;
use regex::Regex;
use ring::digest::{Context, Digest, SHA256};
use std::fs::File;
use std::io::Read;
use std::{
collections::{HashMap, HashSet, VecDeque},
@ -22,7 +21,7 @@ fn main() {
delete_old_bench_results();
if optional_args.check_executables_changed {
println!("Doing a test run to verify benchmarks are working correctly and generate executables.");
println!("\nDoing a test run to verify benchmarks are working correctly and generate executables.\n");
std::env::set_var("BENCH_DRY_RUN", "1");
@ -79,6 +78,7 @@ fn finish(all_regressed_benches: HashSet<String>, nr_repeat_benchmarks: usize) {
// returns all benchmarks that have regressed
fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
delete_old_bench_results();
do_benchmark("main");
let mut all_regressed_benches = do_benchmark("branch");
@ -91,6 +91,7 @@ fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
for _ in 1..nr_repeat_benchmarks {
delete_old_bench_results();
do_benchmark("main");
let regressed_benches = do_benchmark("branch");
@ -110,17 +111,25 @@ fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
// returns Vec with names of regressed benchmarks
fn do_benchmark(branch_name: &'static str) -> HashSet<String> {
let mut cmd_child = Command::new(format!(
"./bench-folder-{}/target/release/deps/time_bench",
branch_name
))
.args(&["--bench", "--noplot"])
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.unwrap_or_else(|_| panic!("Failed to benchmark {}.", branch_name));
let mut bench_cmd =
Command::new(format!(
"./bench-folder-{}/target/release/deps/time_bench",
branch_name
));
let stdout = cmd_child.stdout.as_mut().unwrap();
let bench_cmd_w_args =
bench_cmd.args(&["--bench", "--noplot"]);
let bench_cmd_as_str = format!("{bench_cmd_w_args:?}");
let mut bench_cmd_child =
bench_cmd_w_args
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.unwrap_or_else(|_| panic!("Failed to benchmark {}.", branch_name));
let stdout = bench_cmd_child.stdout.as_mut().unwrap();
let stdout_reader = BufReader::new(stdout);
let stdout_lines = stdout_reader.lines();
@ -147,6 +156,18 @@ fn do_benchmark(branch_name: &'static str) -> HashSet<String> {
println!(">>bench {:?}: {:?}", branch_name, line_str);
}
let exit_status = bench_cmd_child.wait().expect("Failed to wait on cmd_child");
if !exit_status.success() {
panic!(
"Error: time-bench execution failed with exit code {}.\n\
See output above for error info.\n\
Command was:\n\t{}",
exit_status,
bench_cmd_as_str
);
}
regressed_benches
}
@ -190,20 +211,21 @@ fn sha256_digest<R: Read>(mut reader: R) -> Result<Digest, io::Error> {
}
fn sha_file(file_path: &Path) -> Result<String, io::Error> {
// Debug info is dependent on the dir in which executable was created,
// so we need to strip that to be able to compare binaries.
let no_debug_info_file_path = file_path.to_str().unwrap().to_string() + ("_no_debug_info");
std::fs::copy(file_path, &no_debug_info_file_path)?;
// only checking disassembly because of #6386
let disassembly_output = Command::new("objdump")
.args(["-d", file_path.to_str().unwrap()])
.output()
.expect("failed to execute objdump");
let strip_output = Command::new("strip")
.args(["--strip-debug", &no_debug_info_file_path])
.output()
.expect("failed to execute process");
assert!(disassembly_output.status.success());
assert!(strip_output.status.success());
let mut reader = BufReader::new(disassembly_output.stdout.as_slice());
// the first line contains the path, we want to skip it
let mut _discard_lines = String::new();
reader.read_line(&mut _discard_lines)?;
reader.read_line(&mut _discard_lines)?;
let no_debug_info_file = File::open(no_debug_info_file_path)?;
let reader = BufReader::new(no_debug_info_file);
let digest = sha256_digest(reader)?;
Ok(HEXUPPER.encode(digest.as_ref()))
@ -250,6 +272,7 @@ fn check_if_bench_executables_changed() -> bool {
let main_bench_hashes = calc_hashes_for_folder(&main_benches_path_str);
let branch_benches_path_str = [BENCH_FOLDER_BRANCH, bench_folder_str].join("");
let branch_bench_hashes = calc_hashes_for_folder(&branch_benches_path_str);
if main_bench_hashes.keys().len() == branch_bench_hashes.keys().len() {

View file

@ -1,19 +1,12 @@
# Running the benchmarks
Install cargo criterion:
If you're not using nix, install cargo criterion:
```sh
cargo install cargo-criterion
```
To prevent stack overflow on the `CFold` benchmark:
```sh
ulimit -s unlimited
```
In the `cli` folder execute:
In the `crates/cli` folder execute:
```sh
cargo criterion

View file

@ -976,13 +976,13 @@ mod cli_run {
// TODO fix QuicksortApp and then remove this!
match roc_filename {
"QuicksortApp.roc" => {
"quicksortApp.roc" => {
eprintln!(
"WARNING: skipping testing benchmark {roc_filename} because the test is broken right now!"
);
return;
}
"TestAStar.roc" => {
"testAStar.roc" => {
if cfg!(feature = "wasm32-cli-run") {
eprintln!(
"WARNING: skipping testing benchmark {roc_filename} because it currently does not work on wasm32 due to dictionaries."
@ -1137,20 +1137,20 @@ mod cli_run {
#[test]
#[cfg_attr(windows, ignore)]
fn nqueens() {
test_benchmark("NQueens.roc", &["6"], "4\n", UseValgrind::Yes)
test_benchmark("nQueens.roc", &["6"], "4\n", UseValgrind::Yes)
}
#[test]
#[cfg_attr(windows, ignore)]
fn cfold() {
test_benchmark("CFold.roc", &["3"], "11 & 11\n", UseValgrind::Yes)
test_benchmark("cFold.roc", &["3"], "11 & 11\n", UseValgrind::Yes)
}
#[test]
#[cfg_attr(windows, ignore)]
fn deriv() {
test_benchmark(
"Deriv.roc",
"deriv.roc",
&["2"],
"1 count: 6\n2 count: 22\n",
UseValgrind::Yes,
@ -1160,14 +1160,14 @@ mod cli_run {
#[test]
#[cfg_attr(windows, ignore)]
fn rbtree_ck() {
test_benchmark("RBTreeCk.roc", &["100"], "10\n", UseValgrind::Yes)
test_benchmark("rBTreeCk.roc", &["100"], "10\n", UseValgrind::Yes)
}
#[test]
#[cfg_attr(windows, ignore)]
fn rbtree_insert() {
test_benchmark(
"RBTreeInsert.roc",
"rBTreeInsert.roc",
&[],
"Node Black 0 {} Empty Empty\n",
UseValgrind::Yes,
@ -1179,25 +1179,25 @@ mod cli_run {
#[test]
fn rbtree_del() {
test_benchmark(
"RBTreeDel.roc",
"rBTreeDel.roc",
&["420"],
&[],
"30\n",
true
UseValgrind::Yes,
)
}*/
}
*/
#[test]
#[cfg_attr(windows, ignore)]
fn astar() {
test_benchmark("TestAStar.roc", &[], "True\n", UseValgrind::No)
test_benchmark("testAStar.roc", &[], "True\n", UseValgrind::No)
}
#[test]
#[cfg_attr(windows, ignore)]
fn base64() {
test_benchmark(
"TestBase64.roc",
"testBase64.roc",
&[],
"encoded: SGVsbG8gV29ybGQ=\ndecoded: Hello World\n",
UseValgrind::Yes,
@ -1207,19 +1207,19 @@ mod cli_run {
#[test]
#[cfg_attr(windows, ignore)]
fn closure() {
test_benchmark("Closure.roc", &[], "", UseValgrind::No)
test_benchmark("closure.roc", &[], "", UseValgrind::No)
}
#[test]
#[cfg_attr(windows, ignore)]
fn issue2279() {
test_benchmark("Issue2279.roc", &[], "Hello, world!\n", UseValgrind::Yes)
test_benchmark("issue2279.roc", &[], "Hello, world!\n", UseValgrind::Yes)
}
#[test]
fn quicksort_app() {
test_benchmark(
"QuicksortApp.roc",
"quicksortApp.roc",
&[],
"todo put the correct quicksort answer here",
UseValgrind::Yes,

View file

@ -248,4 +248,4 @@ del = \t, k ->
rebalanceLeft cx lx ky vy ry
Delmin (Del ry Bool.false) ky vy ->
Del (Node cx lx ky vy ry) Bool.false
Del (Node cx lx ky vy ry) Bool.false

File diff suppressed because one or more lines are too long

View file

@ -255,7 +255,12 @@ pub fn run_cmd<'a, I: IntoIterator<Item = &'a str>, E: IntoIterator<Item = (&'a
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.unwrap_or_else(|_| panic!("failed to execute cmd `{cmd_name}` in CLI test"));
.unwrap_or_else(|err| {
panic!(
"Encountered error:\n\t{:?}\nWhile executing cmd:\n\t{:?}",
err, cmd_str
)
});
{
let stdin = child.stdin.as_mut().expect("Failed to open stdin");
@ -269,7 +274,7 @@ pub fn run_cmd<'a, I: IntoIterator<Item = &'a str>, E: IntoIterator<Item = (&'a
let output = child
.wait_with_output()
.unwrap_or_else(|_| panic!("failed to execute cmd `{cmd_name}` in CLI test"));
.unwrap_or_else(|_| panic!("Failed to execute cmd:\n\t`{:?}`", cmd_str));
Out {
cmd_str,