From d1cd9998be65b17ee467ff9d7b28d5417c026187 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 29 Sep 2025 21:09:56 +0200 Subject: [PATCH] fix some benchmark (some of them were failing) --- src/uu/numfmt/benches/numfmt_bench.rs | 98 +++++++++++------------- src/uu/sort/benches/sort_bench.rs | 55 ++++++++++--- src/uu/sort/benches/sort_locale_bench.rs | 50 ++++++++++-- src/uucore/src/lib/features/benchmark.rs | 7 +- 4 files changed, 139 insertions(+), 71 deletions(-) diff --git a/src/uu/numfmt/benches/numfmt_bench.rs b/src/uu/numfmt/benches/numfmt_bench.rs index f136b6f6e..d75bf4ad1 100644 --- a/src/uu/numfmt/benches/numfmt_bench.rs +++ b/src/uu/numfmt/benches/numfmt_bench.rs @@ -5,108 +5,100 @@ use divan::{Bencher, black_box}; use uu_numfmt::uumain; -use uucore::benchmark::{run_util_function, setup_test_file, text_data}; +use uucore::benchmark::run_util_function; -/// Benchmark SI formatting with different number counts -#[divan::bench(args = [1_000_000])] +/// Benchmark SI formatting by passing numbers as command-line arguments +#[divan::bench(args = [10_000])] fn numfmt_to_si(bencher: Bencher, count: usize) { - let data = text_data::generate_numbers(count); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + let numbers: Vec = (1..=count).map(|n| n.to_string()).collect(); + let mut args = vec!["--to=si"]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function(uumain, &["--to=si", file_path_str])); + black_box(run_util_function(uumain, &args)); }); } /// Benchmark SI formatting with precision format -#[divan::bench(args = [1_000_000])] +#[divan::bench(args = [10_000])] fn numfmt_to_si_precision(bencher: Bencher, count: usize) { - let data = text_data::generate_numbers(count); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + let numbers: Vec = (1..=count).map(|n| n.to_string()).collect(); + let mut args = vec!["--to=si", "--format=%.6f"]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["--to=si", "--format=%.6f", file_path_str], - )); + black_box(run_util_function(uumain, &args)); }); } /// Benchmark IEC (binary) formatting -#[divan::bench(args = [1_000_000])] +#[divan::bench(args = [10_000])] fn numfmt_to_iec(bencher: Bencher, count: usize) { - let data = text_data::generate_numbers(count); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + let numbers: Vec = (1..=count).map(|n| n.to_string()).collect(); + let mut args = vec!["--to=iec"]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function(uumain, &["--to=iec", file_path_str])); + black_box(run_util_function(uumain, &args)); }); } /// Benchmark parsing from SI format back to raw numbers -#[divan::bench(args = [1_000_000])] +#[divan::bench(args = [10_000])] fn numfmt_from_si(bencher: Bencher, count: usize) { - // Generate SI formatted data (e.g., "1.0K", "2.0K", etc.) - let data = (1..=count) - .map(|n| format!("{:.1}K", n as f64 / 1000.0)) - .collect::>() - .join("\n"); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + // Generate SI formatted data (e.g., "1K", "2K", etc.) + let numbers: Vec = (1..=count).map(|n| format!("{n}K")).collect(); + let mut args = vec!["--from=si"]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function(uumain, &["--from=si", file_path_str])); + black_box(run_util_function(uumain, &args)); }); } /// Benchmark large numbers with SI formatting -#[divan::bench(args = [1_000_000])] +#[divan::bench(args = [10_000])] fn numfmt_large_numbers_si(bencher: Bencher, count: usize) { // Generate larger numbers (millions to billions range) - let data = (1..=count) - .map(|n| (n * 1_000_000).to_string()) - .collect::>() - .join("\n"); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + let numbers: Vec = (1..=count).map(|n| (n * 1_000_000).to_string()).collect(); + let mut args = vec!["--to=si"]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function(uumain, &["--to=si", file_path_str])); + black_box(run_util_function(uumain, &args)); }); } /// Benchmark different padding widths -#[divan::bench(args = [(1_000_000, 50)])] +#[divan::bench(args = [(10_000, 50)])] fn numfmt_padding(bencher: Bencher, (count, padding): (usize, usize)) { - let data = text_data::generate_numbers(count); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + let numbers: Vec = (1..=count).map(|n| n.to_string()).collect(); let padding_arg = format!("--padding={padding}"); + let mut args = vec!["--to=si", &padding_arg]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["--to=si", &padding_arg, file_path_str], - )); + black_box(run_util_function(uumain, &args)); }); } /// Benchmark round modes with SI formatting -#[divan::bench(args = [("up", 1_000_000), ("down", 1_000_000), ("towards-zero", 1_000_000)])] +#[divan::bench(args = [("up", 10_000), ("down", 10_000), ("towards-zero", 10_000)])] fn numfmt_round_modes(bencher: Bencher, (round_mode, count): (&str, usize)) { - let data = text_data::generate_numbers(count); - let file_path = setup_test_file(data.as_bytes()); - let file_path_str = file_path.to_str().unwrap(); + let numbers: Vec = (1..=count).map(|n| n.to_string()).collect(); let round_arg = format!("--round={round_mode}"); + let mut args = vec!["--to=si", &round_arg]; + let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect(); + args.extend(number_refs); bencher.bench(|| { - black_box(run_util_function( - uumain, - &["--to=si", &round_arg, file_path_str], - )); + black_box(run_util_function(uumain, &args)); }); } diff --git a/src/uu/sort/benches/sort_bench.rs b/src/uu/sort/benches/sort_bench.rs index 93e970516..43461b2fb 100644 --- a/src/uu/sort/benches/sort_bench.rs +++ b/src/uu/sort/benches/sort_bench.rs @@ -4,6 +4,7 @@ // file that was distributed with this source code. use divan::{Bencher, black_box}; +use tempfile::NamedTempFile; use uu_sort::uumain; use uucore::benchmark::{run_util_function, setup_test_file, text_data}; @@ -12,9 +13,14 @@ use uucore::benchmark::{run_util_function, setup_test_file, text_data}; fn sort_ascii_only(bencher: Bencher, num_lines: usize) { let data = text_data::generate_ascii_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -23,9 +29,14 @@ fn sort_ascii_only(bencher: Bencher, num_lines: usize) { fn sort_accented_data(bencher: Bencher, num_lines: usize) { let data = text_data::generate_accented_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -34,9 +45,14 @@ fn sort_accented_data(bencher: Bencher, num_lines: usize) { fn sort_mixed_data(bencher: Bencher, num_lines: usize) { let data = text_data::generate_mixed_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -45,9 +61,14 @@ fn sort_mixed_data(bencher: Bencher, num_lines: usize) { fn sort_case_sensitive(bencher: Bencher, num_lines: usize) { let data = text_data::generate_case_sensitive_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -56,11 +77,13 @@ fn sort_case_sensitive(bencher: Bencher, num_lines: usize) { fn sort_case_insensitive(bencher: Bencher, num_lines: usize) { let data = text_data::generate_case_sensitive_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { black_box(run_util_function( uumain, - &["-f", file_path.to_str().unwrap()], + &["-f", "-o", output_path, file_path.to_str().unwrap()], )); }); } @@ -70,11 +93,13 @@ fn sort_case_insensitive(bencher: Bencher, num_lines: usize) { fn sort_dictionary_order(bencher: Bencher, num_lines: usize) { let data = text_data::generate_mixed_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { black_box(run_util_function( uumain, - &["-d", file_path.to_str().unwrap()], + &["-d", "-o", output_path, file_path.to_str().unwrap()], )); }); } @@ -92,10 +117,13 @@ fn sort_numeric(bencher: Bencher, num_lines: usize) { let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + bencher.bench(|| { black_box(run_util_function( uumain, - &["-n", file_path.to_str().unwrap()], + &["-n", "-o", output_path, file_path.to_str().unwrap()], )); }); } @@ -105,11 +133,13 @@ fn sort_numeric(bencher: Bencher, num_lines: usize) { fn sort_reverse_locale(bencher: Bencher, num_lines: usize) { let data = text_data::generate_accented_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { black_box(run_util_function( uumain, - &["-r", file_path.to_str().unwrap()], + &["-r", "-o", output_path, file_path.to_str().unwrap()], )); }); } @@ -130,11 +160,14 @@ fn sort_key_field(bencher: Bencher, num_lines: usize) { let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + bencher.bench(|| { // Sort by second field black_box(run_util_function( uumain, - &["-k", "2", file_path.to_str().unwrap()], + &["-k", "2", "-o", output_path, file_path.to_str().unwrap()], )); }); } @@ -144,11 +177,13 @@ fn sort_key_field(bencher: Bencher, num_lines: usize) { fn sort_unique_locale(bencher: Bencher, num_lines: usize) { let data = text_data::generate_accented_data(num_lines); let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); bencher.bench(|| { black_box(run_util_function( uumain, - &["-u", file_path.to_str().unwrap()], + &["-u", "-o", output_path, file_path.to_str().unwrap()], )); }); } diff --git a/src/uu/sort/benches/sort_locale_bench.rs b/src/uu/sort/benches/sort_locale_bench.rs index f95af20f2..e38283560 100644 --- a/src/uu/sort/benches/sort_locale_bench.rs +++ b/src/uu/sort/benches/sort_locale_bench.rs @@ -5,6 +5,7 @@ use divan::{Bencher, black_box}; use std::env; +use tempfile::NamedTempFile; use uu_sort::uumain; use uucore::benchmark::{run_util_function, setup_test_file, text_data}; @@ -18,7 +19,12 @@ fn sort_ascii_c_locale(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "C"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -32,7 +38,12 @@ fn sort_ascii_utf8_locale(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "en_US.UTF-8"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -46,7 +57,12 @@ fn sort_mixed_c_locale(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "C"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -60,7 +76,12 @@ fn sort_mixed_utf8_locale(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "en_US.UTF-8"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -74,7 +95,12 @@ fn sort_german_c_locale(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "C"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -88,7 +114,12 @@ fn sort_german_locale(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "de_DE.UTF-8"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } @@ -102,7 +133,12 @@ fn sort_random_strings(bencher: Bencher) { unsafe { env::set_var("LC_ALL", "en_US.UTF-8"); } - black_box(run_util_function(uumain, &[file_path.to_str().unwrap()])); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap(); + black_box(run_util_function( + uumain, + &["-o", output_path, file_path.to_str().unwrap()], + )); }); } diff --git a/src/uucore/src/lib/features/benchmark.rs b/src/uucore/src/lib/features/benchmark.rs index bc6949384..5f87540ae 100644 --- a/src/uucore/src/lib/features/benchmark.rs +++ b/src/uucore/src/lib/features/benchmark.rs @@ -19,6 +19,9 @@ pub fn create_test_file(data: &[u8], temp_dir: &Path) -> PathBuf { let mut writer = BufWriter::new(file); writer.write_all(data).unwrap(); writer.flush().unwrap(); + // Ensure data is fully written to disk before returning + std::mem::drop(writer); + File::open(&file_path).unwrap().sync_all().unwrap(); file_path } @@ -28,7 +31,9 @@ pub fn run_util_function(util_func: F, args: &[&str]) -> i32 where F: FnOnce(std::vec::IntoIter) -> i32, { - let os_args: Vec = args.iter().map(|s| (*s).into()).collect(); + // Prepend a dummy program name as argv[0] since clap expects it + let mut os_args: Vec = vec!["benchmark".into()]; + os_args.extend(args.iter().map(|s| (*s).into())); util_func(os_args.into_iter()) }