fix some benchmark (some of them were failing)

This commit is contained in:
Sylvestre Ledru 2025-09-29 21:09:56 +02:00
parent 5c15d7939b
commit d1cd9998be
4 changed files with 139 additions and 71 deletions

View file

@ -5,108 +5,100 @@
use divan::{Bencher, black_box};
use uu_numfmt::uumain;
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
use uucore::benchmark::run_util_function;
/// Benchmark SI formatting with different number counts
#[divan::bench(args = [1_000_000])]
/// Benchmark SI formatting by passing numbers as command-line arguments
#[divan::bench(args = [10_000])]
fn numfmt_to_si(bencher: Bencher, count: usize) {
let data = text_data::generate_numbers(count);
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
let numbers: Vec<String> = (1..=count).map(|n| n.to_string()).collect();
let mut args = vec!["--to=si"];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(uumain, &["--to=si", file_path_str]));
black_box(run_util_function(uumain, &args));
});
}
/// Benchmark SI formatting with precision format
#[divan::bench(args = [1_000_000])]
#[divan::bench(args = [10_000])]
fn numfmt_to_si_precision(bencher: Bencher, count: usize) {
let data = text_data::generate_numbers(count);
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
let numbers: Vec<String> = (1..=count).map(|n| n.to_string()).collect();
let mut args = vec!["--to=si", "--format=%.6f"];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["--to=si", "--format=%.6f", file_path_str],
));
black_box(run_util_function(uumain, &args));
});
}
/// Benchmark IEC (binary) formatting
#[divan::bench(args = [1_000_000])]
#[divan::bench(args = [10_000])]
fn numfmt_to_iec(bencher: Bencher, count: usize) {
let data = text_data::generate_numbers(count);
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
let numbers: Vec<String> = (1..=count).map(|n| n.to_string()).collect();
let mut args = vec!["--to=iec"];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(uumain, &["--to=iec", file_path_str]));
black_box(run_util_function(uumain, &args));
});
}
/// Benchmark parsing from SI format back to raw numbers
#[divan::bench(args = [1_000_000])]
#[divan::bench(args = [10_000])]
fn numfmt_from_si(bencher: Bencher, count: usize) {
// Generate SI formatted data (e.g., "1.0K", "2.0K", etc.)
let data = (1..=count)
.map(|n| format!("{:.1}K", n as f64 / 1000.0))
.collect::<Vec<_>>()
.join("\n");
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
// Generate SI formatted data (e.g., "1K", "2K", etc.)
let numbers: Vec<String> = (1..=count).map(|n| format!("{n}K")).collect();
let mut args = vec!["--from=si"];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(uumain, &["--from=si", file_path_str]));
black_box(run_util_function(uumain, &args));
});
}
/// Benchmark large numbers with SI formatting
#[divan::bench(args = [1_000_000])]
#[divan::bench(args = [10_000])]
fn numfmt_large_numbers_si(bencher: Bencher, count: usize) {
// Generate larger numbers (millions to billions range)
let data = (1..=count)
.map(|n| (n * 1_000_000).to_string())
.collect::<Vec<_>>()
.join("\n");
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
let numbers: Vec<String> = (1..=count).map(|n| (n * 1_000_000).to_string()).collect();
let mut args = vec!["--to=si"];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(uumain, &["--to=si", file_path_str]));
black_box(run_util_function(uumain, &args));
});
}
/// Benchmark different padding widths
#[divan::bench(args = [(1_000_000, 50)])]
#[divan::bench(args = [(10_000, 50)])]
fn numfmt_padding(bencher: Bencher, (count, padding): (usize, usize)) {
let data = text_data::generate_numbers(count);
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
let numbers: Vec<String> = (1..=count).map(|n| n.to_string()).collect();
let padding_arg = format!("--padding={padding}");
let mut args = vec!["--to=si", &padding_arg];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["--to=si", &padding_arg, file_path_str],
));
black_box(run_util_function(uumain, &args));
});
}
/// Benchmark round modes with SI formatting
#[divan::bench(args = [("up", 1_000_000), ("down", 1_000_000), ("towards-zero", 1_000_000)])]
#[divan::bench(args = [("up", 10_000), ("down", 10_000), ("towards-zero", 10_000)])]
fn numfmt_round_modes(bencher: Bencher, (round_mode, count): (&str, usize)) {
let data = text_data::generate_numbers(count);
let file_path = setup_test_file(data.as_bytes());
let file_path_str = file_path.to_str().unwrap();
let numbers: Vec<String> = (1..=count).map(|n| n.to_string()).collect();
let round_arg = format!("--round={round_mode}");
let mut args = vec!["--to=si", &round_arg];
let number_refs: Vec<&str> = numbers.iter().map(|s| s.as_str()).collect();
args.extend(number_refs);
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["--to=si", &round_arg, file_path_str],
));
black_box(run_util_function(uumain, &args));
});
}

View file

@ -4,6 +4,7 @@
// file that was distributed with this source code.
use divan::{Bencher, black_box};
use tempfile::NamedTempFile;
use uu_sort::uumain;
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
@ -12,9 +13,14 @@ use uucore::benchmark::{run_util_function, setup_test_file, text_data};
fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_ascii_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -23,9 +29,14 @@ fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
fn sort_accented_data(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_accented_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -34,9 +45,14 @@ fn sort_accented_data(bencher: Bencher, num_lines: usize) {
fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_mixed_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -45,9 +61,14 @@ fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_case_sensitive_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -56,11 +77,13 @@ fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_case_sensitive_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["-f", file_path.to_str().unwrap()],
&["-f", "-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -70,11 +93,13 @@ fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
fn sort_dictionary_order(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_mixed_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["-d", file_path.to_str().unwrap()],
&["-d", "-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -92,10 +117,13 @@ fn sort_numeric(bencher: Bencher, num_lines: usize) {
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["-n", file_path.to_str().unwrap()],
&["-n", "-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -105,11 +133,13 @@ fn sort_numeric(bencher: Bencher, num_lines: usize) {
fn sort_reverse_locale(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_accented_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["-r", file_path.to_str().unwrap()],
&["-r", "-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -130,11 +160,14 @@ fn sort_key_field(bencher: Bencher, num_lines: usize) {
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
// Sort by second field
black_box(run_util_function(
uumain,
&["-k", "2", file_path.to_str().unwrap()],
&["-k", "2", "-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -144,11 +177,13 @@ fn sort_key_field(bencher: Bencher, num_lines: usize) {
fn sort_unique_locale(bencher: Bencher, num_lines: usize) {
let data = text_data::generate_accented_data(num_lines);
let file_path = setup_test_file(&data);
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
bencher.bench(|| {
black_box(run_util_function(
uumain,
&["-u", file_path.to_str().unwrap()],
&["-u", "-o", output_path, file_path.to_str().unwrap()],
));
});
}

View file

@ -5,6 +5,7 @@
use divan::{Bencher, black_box};
use std::env;
use tempfile::NamedTempFile;
use uu_sort::uumain;
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
@ -18,7 +19,12 @@ fn sort_ascii_c_locale(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "C");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -32,7 +38,12 @@ fn sort_ascii_utf8_locale(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "en_US.UTF-8");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -46,7 +57,12 @@ fn sort_mixed_c_locale(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "C");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -60,7 +76,12 @@ fn sort_mixed_utf8_locale(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "en_US.UTF-8");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -74,7 +95,12 @@ fn sort_german_c_locale(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "C");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -88,7 +114,12 @@ fn sort_german_locale(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "de_DE.UTF-8");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}
@ -102,7 +133,12 @@ fn sort_random_strings(bencher: Bencher) {
unsafe {
env::set_var("LC_ALL", "en_US.UTF-8");
}
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
let output_file = NamedTempFile::new().unwrap();
let output_path = output_file.path().to_str().unwrap();
black_box(run_util_function(
uumain,
&["-o", output_path, file_path.to_str().unwrap()],
));
});
}

View file

@ -19,6 +19,9 @@ pub fn create_test_file(data: &[u8], temp_dir: &Path) -> PathBuf {
let mut writer = BufWriter::new(file);
writer.write_all(data).unwrap();
writer.flush().unwrap();
// Ensure data is fully written to disk before returning
std::mem::drop(writer);
File::open(&file_path).unwrap().sync_all().unwrap();
file_path
}
@ -28,7 +31,9 @@ pub fn run_util_function<F>(util_func: F, args: &[&str]) -> i32
where
F: FnOnce(std::vec::IntoIter<std::ffi::OsString>) -> i32,
{
let os_args: Vec<std::ffi::OsString> = args.iter().map(|s| (*s).into()).collect();
// Prepend a dummy program name as argv[0] since clap expects it
let mut os_args: Vec<std::ffi::OsString> = vec!["benchmark".into()];
os_args.extend(args.iter().map(|s| (*s).into()));
util_func(os_args.into_iter())
}