Merge pull request #6311 from roc-lang/fix_benchmarks

Fix benchmarks failing silently
This commit is contained in:
Richard Feldman 2024-01-15 14:00:57 -05:00 committed by GitHub
commit ae0e3593a4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 85 additions and 64 deletions

View file

@ -3,7 +3,6 @@ use data_encoding::HEXUPPER;
use is_executable::IsExecutable; use is_executable::IsExecutable;
use regex::Regex; use regex::Regex;
use ring::digest::{Context, Digest, SHA256}; use ring::digest::{Context, Digest, SHA256};
use std::fs::File;
use std::io::Read; use std::io::Read;
use std::{ use std::{
collections::{HashMap, HashSet, VecDeque}, collections::{HashMap, HashSet, VecDeque},
@ -22,7 +21,7 @@ fn main() {
delete_old_bench_results(); delete_old_bench_results();
if optional_args.check_executables_changed { if optional_args.check_executables_changed {
println!("Doing a test run to verify benchmarks are working correctly and generate executables."); println!("\nDoing a test run to verify benchmarks are working correctly and generate executables.\n");
std::env::set_var("BENCH_DRY_RUN", "1"); std::env::set_var("BENCH_DRY_RUN", "1");
@ -79,6 +78,7 @@ fn finish(all_regressed_benches: HashSet<String>, nr_repeat_benchmarks: usize) {
// returns all benchmarks that have regressed // returns all benchmarks that have regressed
fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> { fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
delete_old_bench_results(); delete_old_bench_results();
do_benchmark("main"); do_benchmark("main");
let mut all_regressed_benches = do_benchmark("branch"); let mut all_regressed_benches = do_benchmark("branch");
@ -91,6 +91,7 @@ fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
for _ in 1..nr_repeat_benchmarks { for _ in 1..nr_repeat_benchmarks {
delete_old_bench_results(); delete_old_bench_results();
do_benchmark("main"); do_benchmark("main");
let regressed_benches = do_benchmark("branch"); let regressed_benches = do_benchmark("branch");
@ -110,17 +111,25 @@ fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
// returns Vec with names of regressed benchmarks // returns Vec with names of regressed benchmarks
fn do_benchmark(branch_name: &'static str) -> HashSet<String> { fn do_benchmark(branch_name: &'static str) -> HashSet<String> {
let mut cmd_child = Command::new(format!( let mut bench_cmd =
"./bench-folder-{}/target/release/deps/time_bench", Command::new(format!(
branch_name "./bench-folder-{}/target/release/deps/time_bench",
)) branch_name
.args(&["--bench", "--noplot"]) ));
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.unwrap_or_else(|_| panic!("Failed to benchmark {}.", branch_name));
let stdout = cmd_child.stdout.as_mut().unwrap(); let bench_cmd_w_args =
bench_cmd.args(&["--bench", "--noplot"]);
let bench_cmd_as_str = format!("{bench_cmd_w_args:?}");
let mut bench_cmd_child =
bench_cmd_w_args
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.unwrap_or_else(|_| panic!("Failed to benchmark {}.", branch_name));
let stdout = bench_cmd_child.stdout.as_mut().unwrap();
let stdout_reader = BufReader::new(stdout); let stdout_reader = BufReader::new(stdout);
let stdout_lines = stdout_reader.lines(); let stdout_lines = stdout_reader.lines();
@ -147,6 +156,18 @@ fn do_benchmark(branch_name: &'static str) -> HashSet<String> {
println!(">>bench {:?}: {:?}", branch_name, line_str); println!(">>bench {:?}: {:?}", branch_name, line_str);
} }
let exit_status = bench_cmd_child.wait().expect("Failed to wait on cmd_child");
if !exit_status.success() {
panic!(
"Error: time-bench execution failed with exit code {}.\n\
See output above for error info.\n\
Command was:\n\t{}",
exit_status,
bench_cmd_as_str
);
}
regressed_benches regressed_benches
} }
@ -190,20 +211,21 @@ fn sha256_digest<R: Read>(mut reader: R) -> Result<Digest, io::Error> {
} }
fn sha_file(file_path: &Path) -> Result<String, io::Error> { fn sha_file(file_path: &Path) -> Result<String, io::Error> {
// Debug info is dependent on the dir in which executable was created, // only checking disassembly because of #6386
// so we need to strip that to be able to compare binaries. let disassembly_output = Command::new("objdump")
let no_debug_info_file_path = file_path.to_str().unwrap().to_string() + ("_no_debug_info"); .args(["-d", file_path.to_str().unwrap()])
std::fs::copy(file_path, &no_debug_info_file_path)?; .output()
.expect("failed to execute objdump");
let strip_output = Command::new("strip") assert!(disassembly_output.status.success());
.args(["--strip-debug", &no_debug_info_file_path])
.output()
.expect("failed to execute process");
assert!(strip_output.status.success()); let mut reader = BufReader::new(disassembly_output.stdout.as_slice());
// the first line contains the path, we want to skip it
let mut _discard_lines = String::new();
reader.read_line(&mut _discard_lines)?;
reader.read_line(&mut _discard_lines)?;
let no_debug_info_file = File::open(no_debug_info_file_path)?;
let reader = BufReader::new(no_debug_info_file);
let digest = sha256_digest(reader)?; let digest = sha256_digest(reader)?;
Ok(HEXUPPER.encode(digest.as_ref())) Ok(HEXUPPER.encode(digest.as_ref()))
@ -250,6 +272,7 @@ fn check_if_bench_executables_changed() -> bool {
let main_bench_hashes = calc_hashes_for_folder(&main_benches_path_str); let main_bench_hashes = calc_hashes_for_folder(&main_benches_path_str);
let branch_benches_path_str = [BENCH_FOLDER_BRANCH, bench_folder_str].join(""); let branch_benches_path_str = [BENCH_FOLDER_BRANCH, bench_folder_str].join("");
let branch_bench_hashes = calc_hashes_for_folder(&branch_benches_path_str); let branch_bench_hashes = calc_hashes_for_folder(&branch_benches_path_str);
if main_bench_hashes.keys().len() == branch_bench_hashes.keys().len() { if main_bench_hashes.keys().len() == branch_bench_hashes.keys().len() {

View file

@ -1,19 +1,12 @@
# Running the benchmarks # Running the benchmarks
Install cargo criterion: If you're not using nix, install cargo criterion:
```sh ```sh
cargo install cargo-criterion cargo install cargo-criterion
``` ```
To prevent stack overflow on the `CFold` benchmark: In the `crates/cli` folder execute:
```sh
ulimit -s unlimited
```
In the `cli` folder execute:
```sh ```sh
cargo criterion cargo criterion

View file

@ -976,13 +976,13 @@ mod cli_run {
// TODO fix QuicksortApp and then remove this! // TODO fix QuicksortApp and then remove this!
match roc_filename { match roc_filename {
"QuicksortApp.roc" => { "quicksortApp.roc" => {
eprintln!( eprintln!(
"WARNING: skipping testing benchmark {roc_filename} because the test is broken right now!" "WARNING: skipping testing benchmark {roc_filename} because the test is broken right now!"
); );
return; return;
} }
"TestAStar.roc" => { "testAStar.roc" => {
if cfg!(feature = "wasm32-cli-run") { if cfg!(feature = "wasm32-cli-run") {
eprintln!( eprintln!(
"WARNING: skipping testing benchmark {roc_filename} because it currently does not work on wasm32 due to dictionaries." "WARNING: skipping testing benchmark {roc_filename} because it currently does not work on wasm32 due to dictionaries."
@ -1137,20 +1137,20 @@ mod cli_run {
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn nqueens() { fn nqueens() {
test_benchmark("NQueens.roc", &["6"], "4\n", UseValgrind::Yes) test_benchmark("nQueens.roc", &["6"], "4\n", UseValgrind::Yes)
} }
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn cfold() { fn cfold() {
test_benchmark("CFold.roc", &["3"], "11 & 11\n", UseValgrind::Yes) test_benchmark("cFold.roc", &["3"], "11 & 11\n", UseValgrind::Yes)
} }
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn deriv() { fn deriv() {
test_benchmark( test_benchmark(
"Deriv.roc", "deriv.roc",
&["2"], &["2"],
"1 count: 6\n2 count: 22\n", "1 count: 6\n2 count: 22\n",
UseValgrind::Yes, UseValgrind::Yes,
@ -1160,14 +1160,14 @@ mod cli_run {
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn rbtree_ck() { fn rbtree_ck() {
test_benchmark("RBTreeCk.roc", &["100"], "10\n", UseValgrind::Yes) test_benchmark("rBTreeCk.roc", &["100"], "10\n", UseValgrind::Yes)
} }
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn rbtree_insert() { fn rbtree_insert() {
test_benchmark( test_benchmark(
"RBTreeInsert.roc", "rBTreeInsert.roc",
&[], &[],
"Node Black 0 {} Empty Empty\n", "Node Black 0 {} Empty Empty\n",
UseValgrind::Yes, UseValgrind::Yes,
@ -1179,25 +1179,25 @@ mod cli_run {
#[test] #[test]
fn rbtree_del() { fn rbtree_del() {
test_benchmark( test_benchmark(
"RBTreeDel.roc", "rBTreeDel.roc",
&["420"], &["420"],
&[],
"30\n", "30\n",
true UseValgrind::Yes,
) )
}*/ }
*/
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn astar() { fn astar() {
test_benchmark("TestAStar.roc", &[], "True\n", UseValgrind::No) test_benchmark("testAStar.roc", &[], "True\n", UseValgrind::No)
} }
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn base64() { fn base64() {
test_benchmark( test_benchmark(
"TestBase64.roc", "testBase64.roc",
&[], &[],
"encoded: SGVsbG8gV29ybGQ=\ndecoded: Hello World\n", "encoded: SGVsbG8gV29ybGQ=\ndecoded: Hello World\n",
UseValgrind::Yes, UseValgrind::Yes,
@ -1207,19 +1207,19 @@ mod cli_run {
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn closure() { fn closure() {
test_benchmark("Closure.roc", &[], "", UseValgrind::No) test_benchmark("closure.roc", &[], "", UseValgrind::No)
} }
#[test] #[test]
#[cfg_attr(windows, ignore)] #[cfg_attr(windows, ignore)]
fn issue2279() { fn issue2279() {
test_benchmark("Issue2279.roc", &[], "Hello, world!\n", UseValgrind::Yes) test_benchmark("issue2279.roc", &[], "Hello, world!\n", UseValgrind::Yes)
} }
#[test] #[test]
fn quicksort_app() { fn quicksort_app() {
test_benchmark( test_benchmark(
"QuicksortApp.roc", "quicksortApp.roc",
&[], &[],
"todo put the correct quicksort answer here", "todo put the correct quicksort answer here",
UseValgrind::Yes, UseValgrind::Yes,

View file

@ -248,4 +248,4 @@ del = \t, k ->
rebalanceLeft cx lx ky vy ry rebalanceLeft cx lx ky vy ry
Delmin (Del ry Bool.false) ky vy -> Delmin (Del ry Bool.false) ky vy ->
Del (Node cx lx ky vy ry) Bool.false Del (Node cx lx ky vy ry) Bool.false

File diff suppressed because one or more lines are too long

View file

@ -255,7 +255,12 @@ pub fn run_cmd<'a, I: IntoIterator<Item = &'a str>, E: IntoIterator<Item = (&'a
.stdout(Stdio::piped()) .stdout(Stdio::piped())
.stderr(Stdio::piped()) .stderr(Stdio::piped())
.spawn() .spawn()
.unwrap_or_else(|_| panic!("failed to execute cmd `{cmd_name}` in CLI test")); .unwrap_or_else(|err| {
panic!(
"Encountered error:\n\t{:?}\nWhile executing cmd:\n\t{:?}",
err, cmd_str
)
});
{ {
let stdin = child.stdin.as_mut().expect("Failed to open stdin"); let stdin = child.stdin.as_mut().expect("Failed to open stdin");
@ -269,7 +274,7 @@ pub fn run_cmd<'a, I: IntoIterator<Item = &'a str>, E: IntoIterator<Item = (&'a
let output = child let output = child
.wait_with_output() .wait_with_output()
.unwrap_or_else(|_| panic!("failed to execute cmd `{cmd_name}` in CLI test")); .unwrap_or_else(|_| panic!("Failed to execute cmd:\n\t`{:?}`", cmd_str));
Out { Out {
cmd_str, cmd_str,