mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
Merge pull request #8735 from sylvestre/sort-perf-2
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Run benchmarks (CodSpeed) (push) Blocked by required conditions
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Run benchmarks (CodSpeed) (push) Blocked by required conditions
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run
sort: add benchmark
This commit is contained in:
commit
ea5c8158d4
9 changed files with 650 additions and 95 deletions
2
.vscode/cSpell.json
vendored
2
.vscode/cSpell.json
vendored
|
|
@ -32,6 +32,8 @@
|
|||
".devcontainer/**",
|
||||
"util/gnu-patches/**",
|
||||
"docs/src/release-notes/**",
|
||||
"src/uu/*/benches/*.rs",
|
||||
"src/uucore/src/lib/features/benchmark.rs",
|
||||
],
|
||||
|
||||
"enableGlobDot": true,
|
||||
|
|
|
|||
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -3930,6 +3930,7 @@ dependencies = [
|
|||
"bigdecimal",
|
||||
"binary-heap-plus",
|
||||
"clap",
|
||||
"codspeed-divan-compat",
|
||||
"compare",
|
||||
"ctrlc",
|
||||
"fluent",
|
||||
|
|
|
|||
|
|
@ -4,45 +4,32 @@
|
|||
// file that was distributed with this source code.
|
||||
|
||||
use divan::{Bencher, black_box};
|
||||
use tempfile::TempDir;
|
||||
use uu_numfmt::uumain;
|
||||
use uucore::benchmark::{create_test_file, run_util_function};
|
||||
|
||||
/// Generate numeric data for benchmarking
|
||||
fn generate_numbers(count: usize) -> String {
|
||||
(1..=count)
|
||||
.map(|n| n.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
/// Setup benchmark environment with test data
|
||||
fn setup_benchmark(data: String) -> (TempDir, String) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let file_path = create_test_file(data.as_bytes(), temp_dir.path());
|
||||
let file_path_str = file_path.to_str().unwrap().to_string();
|
||||
(temp_dir, file_path_str)
|
||||
}
|
||||
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
|
||||
|
||||
/// Benchmark SI formatting with different number counts
|
||||
#[divan::bench(args = [1_000_000])]
|
||||
fn numfmt_to_si(bencher: Bencher, count: usize) {
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
|
||||
let data = text_data::generate_numbers(count);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &["--to=si", &file_path_str]));
|
||||
black_box(run_util_function(uumain, &["--to=si", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark SI formatting with precision format
|
||||
#[divan::bench(args = [1_000_000])]
|
||||
fn numfmt_to_si_precision(bencher: Bencher, count: usize) {
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
|
||||
let data = text_data::generate_numbers(count);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["--to=si", "--format=%.6f", &file_path_str],
|
||||
&["--to=si", "--format=%.6f", file_path_str],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
|
@ -50,10 +37,12 @@ fn numfmt_to_si_precision(bencher: Bencher, count: usize) {
|
|||
/// Benchmark IEC (binary) formatting
|
||||
#[divan::bench(args = [1_000_000])]
|
||||
fn numfmt_to_iec(bencher: Bencher, count: usize) {
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
|
||||
let data = text_data::generate_numbers(count);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &["--to=iec", &file_path_str]));
|
||||
black_box(run_util_function(uumain, &["--to=iec", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -65,10 +54,11 @@ fn numfmt_from_si(bencher: Bencher, count: usize) {
|
|||
.map(|n| format!("{:.1}K", n as f64 / 1000.0))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(data);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &["--from=si", &file_path_str]));
|
||||
black_box(run_util_function(uumain, &["--from=si", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -80,23 +70,26 @@ fn numfmt_large_numbers_si(bencher: Bencher, count: usize) {
|
|||
.map(|n| (n * 1_000_000).to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(data);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &["--to=si", &file_path_str]));
|
||||
black_box(run_util_function(uumain, &["--to=si", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark different padding widths
|
||||
#[divan::bench(args = [(1_000_000, 5), (1_000_000, 50)])]
|
||||
fn numfmt_padding(bencher: Bencher, (count, padding): (usize, usize)) {
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
|
||||
let data = text_data::generate_numbers(count);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
let padding_arg = format!("--padding={padding}");
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["--to=si", &padding_arg, &file_path_str],
|
||||
&["--to=si", &padding_arg, file_path_str],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
|
@ -104,13 +97,15 @@ fn numfmt_padding(bencher: Bencher, (count, padding): (usize, usize)) {
|
|||
/// Benchmark round modes with SI formatting
|
||||
#[divan::bench(args = [("up", 100_000), ("down", 1_000_000), ("towards-zero", 1_000_000)])]
|
||||
fn numfmt_round_modes(bencher: Bencher, (round_mode, count): (&str, usize)) {
|
||||
let (_temp_dir, file_path_str) = setup_benchmark(generate_numbers(count));
|
||||
let data = text_data::generate_numbers(count);
|
||||
let file_path = setup_test_file(data.as_bytes());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
let round_arg = format!("--round={round_mode}");
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["--to=si", &round_arg, &file_path_str],
|
||||
&["--to=si", &round_arg, file_path_str],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,21 @@ fluent = { workspace = true }
|
|||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
nix = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
divan = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
uucore = { workspace = true, features = [
|
||||
"benchmark",
|
||||
"fs",
|
||||
"parser",
|
||||
"version-cmp",
|
||||
"i18n-collator",
|
||||
] }
|
||||
|
||||
[[bin]]
|
||||
name = "sort"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bench]]
|
||||
name = "sort_bench"
|
||||
harness = false
|
||||
|
|
|
|||
158
src/uu/sort/benches/sort_bench.rs
Normal file
158
src/uu/sort/benches/sort_bench.rs
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use divan::{Bencher, black_box};
|
||||
use uu_sort::uumain;
|
||||
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
|
||||
|
||||
/// Benchmark sorting ASCII-only data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_ascii_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark sorting accented/non-ASCII data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_accented_data(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_accented_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark sorting mixed ASCII/non-ASCII data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_mixed_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark case-sensitive sorting with mixed case data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_case_sensitive_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark case-insensitive sorting (fold case)
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_case_sensitive_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-f", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark dictionary order sorting (only blanks and alphanumeric)
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_dictionary_order(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_mixed_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-d", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark numeric sorting with mixed data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_numeric(bencher: Bencher, num_lines: usize) {
|
||||
let mut data = Vec::new();
|
||||
|
||||
// Generate numeric data with some text prefixes
|
||||
for i in 0..num_lines {
|
||||
let value = (i * 13) % 10000; // Pseudo-random numeric values
|
||||
data.extend_from_slice(format!("value_{value}\n").as_bytes());
|
||||
}
|
||||
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-n", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark reverse sorting with locale-aware data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_reverse_locale(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_accented_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-r", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark sorting with specific key field
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_key_field(bencher: Bencher, num_lines: usize) {
|
||||
let mut data = Vec::new();
|
||||
|
||||
// Generate data with multiple fields
|
||||
let words = ["café", "naïve", "apple", "über", "banana"];
|
||||
for i in 0..num_lines {
|
||||
let word = words[i % words.len()];
|
||||
let num1 = i % 100;
|
||||
let num2 = (i * 7) % 100;
|
||||
data.extend_from_slice(format!("{num1}\t{word}\t{num2}\n").as_bytes());
|
||||
}
|
||||
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
// Sort by second field
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-k", "2", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark unique sorting with locale-aware data
|
||||
#[divan::bench(args = [100_000, 500_000])]
|
||||
fn sort_unique_locale(bencher: Bencher, num_lines: usize) {
|
||||
let data = text_data::generate_accented_data(num_lines);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-u", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
fn main() {
|
||||
divan::main();
|
||||
}
|
||||
166
src/uu/sort/benches/sort_locale_bench.rs
Normal file
166
src/uu/sort/benches/sort_locale_bench.rs
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use divan::{Bencher, black_box};
|
||||
use std::env;
|
||||
use uu_sort::uumain;
|
||||
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
|
||||
|
||||
/// Benchmark ASCII-only data sorting with C locale (byte comparison)
|
||||
#[divan::bench]
|
||||
fn sort_ascii_c_locale(bencher: Bencher) {
|
||||
let data = text_data::generate_ascii_data_simple(100_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "C");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark ASCII-only data sorting with UTF-8 locale
|
||||
#[divan::bench]
|
||||
fn sort_ascii_utf8_locale(bencher: Bencher) {
|
||||
let data = text_data::generate_ascii_data_simple(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "en_US.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark mixed ASCII/Unicode data with C locale
|
||||
#[divan::bench]
|
||||
fn sort_mixed_c_locale(bencher: Bencher) {
|
||||
let data = text_data::generate_mixed_locale_data(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "C");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark mixed ASCII/Unicode data with UTF-8 locale
|
||||
#[divan::bench]
|
||||
fn sort_mixed_utf8_locale(bencher: Bencher) {
|
||||
let data = text_data::generate_mixed_locale_data(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "en_US.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark German locale-specific data with C locale
|
||||
#[divan::bench]
|
||||
fn sort_german_c_locale(bencher: Bencher) {
|
||||
let data = text_data::generate_german_locale_data(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "C");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark German locale-specific data with German locale
|
||||
#[divan::bench]
|
||||
fn sort_german_locale(bencher: Bencher) {
|
||||
let data = text_data::generate_german_locale_data(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "de_DE.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark random strings of different lengths
|
||||
#[divan::bench]
|
||||
fn sort_random_strings(bencher: Bencher) {
|
||||
let data = text_data::generate_random_strings(10_000, 50);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "en_US.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark numeric sorting performance
|
||||
#[divan::bench]
|
||||
fn sort_numeric(bencher: Bencher) {
|
||||
let mut data = Vec::new();
|
||||
for i in 0..10_000 {
|
||||
let line = format!("{}\n", 10_000 - i);
|
||||
data.extend_from_slice(line.as_bytes());
|
||||
}
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "en_US.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-n", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark reverse sorting
|
||||
#[divan::bench]
|
||||
fn sort_reverse_mixed(bencher: Bencher) {
|
||||
let data = text_data::generate_mixed_locale_data(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "en_US.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-r", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark unique sorting
|
||||
#[divan::bench]
|
||||
fn sort_unique_mixed(bencher: Bencher) {
|
||||
let data = text_data::generate_mixed_locale_data(10_000);
|
||||
let file_path = setup_test_file(&data);
|
||||
|
||||
bencher.bench(|| {
|
||||
unsafe {
|
||||
env::set_var("LC_ALL", "en_US.UTF-8");
|
||||
}
|
||||
black_box(run_util_function(
|
||||
uumain,
|
||||
&["-u", file_path.to_str().unwrap()],
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
fn main() {
|
||||
divan::main();
|
||||
}
|
||||
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
use divan::{Bencher, black_box};
|
||||
use uu_tsort::uumain;
|
||||
use uucore::benchmark::{create_test_file, run_util_function};
|
||||
use uucore::benchmark::{run_util_function, setup_test_file};
|
||||
|
||||
/// Generate topological sort test data with different characteristics
|
||||
/// Generate topological sort test data - linear chain
|
||||
fn generate_linear_chain(num_nodes: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ fn generate_linear_chain(num_nodes: usize) -> Vec<u8> {
|
|||
data
|
||||
}
|
||||
|
||||
/// Generate a DAG with more complex dependencies
|
||||
/// Generate a DAG with tree-like structure
|
||||
fn generate_tree_dag(depth: usize, branching_factor: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
let mut node_id = 0;
|
||||
|
|
@ -116,64 +116,8 @@ fn generate_wide_dag(num_nodes: usize) -> Vec<u8> {
|
|||
data
|
||||
}
|
||||
|
||||
/// Benchmark linear chain graphs of different sizes
|
||||
/// This tests the performance improvements mentioned in PR #8694
|
||||
#[divan::bench(args = [1_000, 10_000, 100_000, 1_000_000])]
|
||||
fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_linear_chain(num_nodes);
|
||||
let file_path = create_test_file(&data, temp_dir.path());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark tree-like DAG structures
|
||||
#[divan::bench(args = [(4, 3), (5, 3), (6, 2), (7, 2)])]
|
||||
fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_tree_dag(depth, branching);
|
||||
let file_path = create_test_file(&data, temp_dir.path());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark complex DAG with cross-dependencies
|
||||
#[divan::bench(args = [1_000, 5_000, 10_000, 50_000])]
|
||||
fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_complex_dag(num_nodes);
|
||||
let file_path = create_test_file(&data, temp_dir.path());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark wide DAG with many parallel chains
|
||||
/// This should stress the hashmap optimizations from PR #8694
|
||||
#[divan::bench(args = [10_000, 50_000, 100_000])]
|
||||
fn tsort_wide_dag(bencher: Bencher, num_nodes: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_wide_dag(num_nodes);
|
||||
let file_path = create_test_file(&data, temp_dir.path());
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark input parsing vs computation by using files with different edge densities
|
||||
#[divan::bench(args = [10_000, 50_000])]
|
||||
fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
/// Generate DAG data for input parsing stress tests
|
||||
fn generate_input_parsing_heavy(num_edges: usize) -> Vec<u8> {
|
||||
// Create a scenario with many edges but relatively few unique nodes
|
||||
// This stresses the input parsing and graph construction optimizations
|
||||
let num_unique_nodes = (num_edges as f64).sqrt() as usize;
|
||||
|
|
@ -187,7 +131,64 @@ fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) {
|
|||
}
|
||||
}
|
||||
|
||||
let file_path = create_test_file(&data, temp_dir.path());
|
||||
data
|
||||
}
|
||||
|
||||
/// Benchmark linear chain graphs of different sizes
|
||||
/// This tests the performance improvements mentioned in PR #8694
|
||||
#[divan::bench(args = [1_000, 10_000, 100_000, 1_000_000])]
|
||||
fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) {
|
||||
let data = generate_linear_chain(num_nodes);
|
||||
let file_path = setup_test_file(&data);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark tree-like DAG structures
|
||||
#[divan::bench(args = [(4, 3), (5, 3), (6, 2), (7, 2)])]
|
||||
fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) {
|
||||
let data = generate_tree_dag(depth, branching);
|
||||
let file_path = setup_test_file(&data);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark complex DAG with cross-dependencies
|
||||
#[divan::bench(args = [1_000, 5_000, 10_000, 50_000])]
|
||||
fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) {
|
||||
let data = generate_complex_dag(num_nodes);
|
||||
let file_path = setup_test_file(&data);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark wide DAG with many parallel chains
|
||||
/// This should stress the hashmap optimizations from PR #8694
|
||||
#[divan::bench(args = [10_000, 50_000, 100_000])]
|
||||
fn tsort_wide_dag(bencher: Bencher, num_nodes: usize) {
|
||||
let data = generate_wide_dag(num_nodes);
|
||||
let file_path = setup_test_file(&data);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_util_function(uumain, &[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark input parsing vs computation by using files with different edge densities
|
||||
#[divan::bench(args = [10_000, 50_000])]
|
||||
fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) {
|
||||
let data = generate_input_parsing_heavy(num_edges);
|
||||
let file_path = setup_test_file(&data);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ os_display = "0.1.3"
|
|||
|
||||
# Benchmark dependencies (optional)
|
||||
divan = { workspace = true, optional = true }
|
||||
tempfile = { workspace = true, optional = true }
|
||||
|
||||
digest = { workspace = true, optional = true }
|
||||
hex = { workspace = true, optional = true }
|
||||
|
|
@ -173,4 +174,4 @@ wide = []
|
|||
tty = []
|
||||
time = ["jiff"]
|
||||
uptime = ["chrono", "libc", "windows-sys", "utmpx", "utmp-classic"]
|
||||
benchmark = ["divan"]
|
||||
benchmark = ["divan", "tempfile"]
|
||||
|
|
|
|||
|
|
@ -32,6 +32,16 @@ where
|
|||
util_func(os_args.into_iter())
|
||||
}
|
||||
|
||||
/// Helper function to set up a temporary test file and leak the temporary directory
|
||||
/// so it persists for the duration of the benchmark
|
||||
pub fn setup_test_file(data: &[u8]) -> PathBuf {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let file_path = create_test_file(data, temp_dir.path());
|
||||
// Keep temp_dir alive by leaking it - the OS will clean it up
|
||||
std::mem::forget(temp_dir);
|
||||
file_path
|
||||
}
|
||||
|
||||
/// Generate test data with different characteristics for text processing utilities
|
||||
pub mod text_data {
|
||||
/// Generate test data with a specific size in MB and average line length
|
||||
|
|
@ -85,4 +95,210 @@ pub mod text_data {
|
|||
|
||||
data
|
||||
}
|
||||
|
||||
/// Helper function to generate test data from a list of words
|
||||
pub fn generate_data_from_words(words: &[&str], num_lines: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
for i in 0..num_lines {
|
||||
let word = words[i % words.len()];
|
||||
let number = i % 1000;
|
||||
data.extend_from_slice(format!("{word}_{number:03}\n").as_bytes());
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
/// Helper function to generate test data from a list of words without number suffix
|
||||
pub fn generate_data_from_words_simple(words: &[&str], num_lines: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
for i in 0..num_lines {
|
||||
let word = words[i % words.len()];
|
||||
data.extend_from_slice(format!("{word}\n").as_bytes());
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
/// Helper function to generate test data from a list of words with counter
|
||||
pub fn generate_data_from_words_with_counter(words: &[&str], num_lines: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
for i in 0..num_lines {
|
||||
let word = words[i % words.len()];
|
||||
let line = format!("{word}{i:04}\n");
|
||||
data.extend_from_slice(line.as_bytes());
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
/// Generate test data with ASCII-only text
|
||||
pub fn generate_ascii_data(num_lines: usize) -> Vec<u8> {
|
||||
let words = [
|
||||
"apple",
|
||||
"banana",
|
||||
"cherry",
|
||||
"date",
|
||||
"elderberry",
|
||||
"fig",
|
||||
"grape",
|
||||
"honeydew",
|
||||
"kiwi",
|
||||
"lemon",
|
||||
"mango",
|
||||
"nectarine",
|
||||
"orange",
|
||||
"papaya",
|
||||
"quince",
|
||||
"raspberry",
|
||||
"strawberry",
|
||||
"tangerine",
|
||||
"ugli",
|
||||
"vanilla",
|
||||
"watermelon",
|
||||
"xigua",
|
||||
"yellow",
|
||||
"zucchini",
|
||||
"avocado",
|
||||
];
|
||||
|
||||
generate_data_from_words(&words, num_lines)
|
||||
}
|
||||
|
||||
/// Generate simple ASCII data with line numbers
|
||||
pub fn generate_ascii_data_simple(num_lines: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
for i in 0..num_lines {
|
||||
let line = format!("line_{:06}\n", (num_lines - i - 1));
|
||||
data.extend_from_slice(line.as_bytes());
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
/// Generate test data with accented characters that require locale-aware sorting
|
||||
pub fn generate_accented_data(num_lines: usize) -> Vec<u8> {
|
||||
let words = [
|
||||
// French words with accents
|
||||
"café",
|
||||
"naïve",
|
||||
"résumé",
|
||||
"fiancé",
|
||||
"crème",
|
||||
"déjà",
|
||||
"façade",
|
||||
"château",
|
||||
"élève",
|
||||
"côte",
|
||||
// German words with umlauts
|
||||
"über",
|
||||
"Müller",
|
||||
"schön",
|
||||
"Köln",
|
||||
"Düsseldorf",
|
||||
"Österreich",
|
||||
"Zürich",
|
||||
"Mädchen",
|
||||
"Bär",
|
||||
"größer",
|
||||
// Spanish words with tildes and accents
|
||||
"niño",
|
||||
"señor",
|
||||
"año",
|
||||
"mañana",
|
||||
"español",
|
||||
"corazón",
|
||||
"María",
|
||||
"José",
|
||||
"más",
|
||||
"también",
|
||||
];
|
||||
|
||||
generate_data_from_words(&words, num_lines)
|
||||
}
|
||||
|
||||
/// Generate test data with mixed ASCII and non-ASCII characters
|
||||
pub fn generate_mixed_data(num_lines: usize) -> Vec<u8> {
|
||||
let words = [
|
||||
// Mix of ASCII and accented words
|
||||
"apple",
|
||||
"café",
|
||||
"banana",
|
||||
"naïve",
|
||||
"cherry",
|
||||
"résumé",
|
||||
"date",
|
||||
"fiancé",
|
||||
"elderberry",
|
||||
"crème",
|
||||
"über",
|
||||
"grape",
|
||||
"Müller",
|
||||
"honeydew",
|
||||
"schön",
|
||||
"niño",
|
||||
"kiwi",
|
||||
"señor",
|
||||
"lemon",
|
||||
"año",
|
||||
"mango",
|
||||
"María",
|
||||
"orange",
|
||||
"José",
|
||||
"papaya",
|
||||
];
|
||||
|
||||
generate_data_from_words(&words, num_lines)
|
||||
}
|
||||
|
||||
/// Generate mixed locale data with counter
|
||||
pub fn generate_mixed_locale_data(num_lines: usize) -> Vec<u8> {
|
||||
let mixed_strings = [
|
||||
"zebra", "äpfel", "banana", "öl", "cat", "über", "dog", "zürich", "elephant", "café",
|
||||
"fish", "naïve", "grape", "résumé", "house", "piñata",
|
||||
];
|
||||
generate_data_from_words_with_counter(&mixed_strings, num_lines)
|
||||
}
|
||||
|
||||
/// Generate German locale-specific data
|
||||
pub fn generate_german_locale_data(num_lines: usize) -> Vec<u8> {
|
||||
let german_words = [
|
||||
"Ärger", "Öffnung", "Über", "Zucker", "Bär", "Föhn", "Größe", "Höhe", "Käse", "Löwe",
|
||||
"Mädchen", "Nüsse", "Röntgen", "Schäfer", "Tür", "Würfel", "ä", "ö", "ü", "ß", "a",
|
||||
"o", "u", "s",
|
||||
];
|
||||
generate_data_from_words_with_counter(&german_words, num_lines)
|
||||
}
|
||||
|
||||
/// Generate test data with uppercase/lowercase variations
|
||||
pub fn generate_case_sensitive_data(num_lines: usize) -> Vec<u8> {
|
||||
let base_words = [
|
||||
"apple", "Apple", "APPLE", "banana", "Banana", "BANANA", "café", "Café", "CAFÉ",
|
||||
"über", "Über", "ÜBER",
|
||||
];
|
||||
|
||||
generate_data_from_words_simple(&base_words, num_lines)
|
||||
}
|
||||
|
||||
/// Generate random strings with mixed charset including accented characters
|
||||
pub fn generate_random_strings(num_lines: usize, length: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
let charset =
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789äöüÄÖÜßéèêëàâîïôûç";
|
||||
let charset_bytes = charset.as_bytes();
|
||||
|
||||
for i in 0..num_lines {
|
||||
let mut line = String::new();
|
||||
for j in 0..length {
|
||||
let idx = ((i * length + j) * 17 + 42) % charset_bytes.len();
|
||||
line.push(charset_bytes[idx] as char);
|
||||
}
|
||||
line.push('\n');
|
||||
data.extend_from_slice(line.as_bytes());
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
/// Generate numeric data for benchmarking (simple sequential numbers)
|
||||
pub fn generate_numbers(count: usize) -> String {
|
||||
(1..=count)
|
||||
.map(|n| n.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue