diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 40eed009a..ca6fbcd7c 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -387,7 +387,6 @@ jobs: test -h /tmp/usr/local/bin/sha512sum test -h /tmp/usr/local/bin/shake128sum test -h /tmp/usr/local/bin/shake256sum - build_rust_stable: name: Build/stable needs: [ min_version, deps ] diff --git a/Cargo.lock b/Cargo.lock index 88012dffb..071371ba8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4268,6 +4268,7 @@ dependencies = [ "bstr", "chrono", "clap", + "codspeed-divan-compat", "crc-fast", "crc32fast", "data-encoding", diff --git a/src/uu/tsort/Cargo.toml b/src/uu/tsort/Cargo.toml index 2bd858450..94b170223 100644 --- a/src/uu/tsort/Cargo.toml +++ b/src/uu/tsort/Cargo.toml @@ -30,6 +30,7 @@ path = "src/main.rs" [dev-dependencies] divan = { workspace = true } tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } [[bench]] name = "tsort_bench" diff --git a/src/uu/tsort/benches/tsort_bench.rs b/src/uu/tsort/benches/tsort_bench.rs index cc9c723f4..cfe78c4bf 100644 --- a/src/uu/tsort/benches/tsort_bench.rs +++ b/src/uu/tsort/benches/tsort_bench.rs @@ -4,9 +4,7 @@ // file that was distributed with this source code. use divan::{Bencher, black_box}; -use std::fs::File; -use std::io::{BufWriter, Write}; -use tempfile::TempDir; +use uucore::benchmark::{create_test_file, run_uutils_binary}; /// Generate topological sort test data with different characteristics fn generate_linear_chain(num_nodes: usize) -> Vec { @@ -117,42 +115,17 @@ fn generate_wide_dag(num_nodes: usize) -> Vec { data } -/// Create a temporary file with test data -fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf { - let file_path = temp_dir.path().join("test_data.txt"); - let file = File::create(&file_path).unwrap(); - let mut writer = BufWriter::new(file); - writer.write_all(data).unwrap(); - writer.flush().unwrap(); - file_path -} - -/// Run uutils tsort with given arguments -fn run_uutils_tsort(args: &[&str]) -> i32 { - use std::process::{Command, Stdio}; - - // Use the binary instead of calling uumain directly to avoid stdout issues - let output = Command::new("../../../target/release/coreutils") - .args(["tsort"].iter().chain(args.iter())) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .expect("Failed to execute tsort command"); - - i32::from(!output.success()) -} - /// Benchmark linear chain graphs of different sizes /// This tests the performance improvements mentioned in PR #8694 #[divan::bench(args = [1_000, 10_000, 100_000, 1_000_000])] fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) { let temp_dir = tempfile::tempdir().unwrap(); let data = generate_linear_chain(num_nodes); - let file_path = create_test_file(&data, &temp_dir); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_tsort(&[file_path_str])); + black_box(run_uutils_binary("tsort", &[file_path_str])); }); } @@ -161,11 +134,11 @@ fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) { fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) { let temp_dir = tempfile::tempdir().unwrap(); let data = generate_tree_dag(depth, branching); - let file_path = create_test_file(&data, &temp_dir); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_tsort(&[file_path_str])); + black_box(run_uutils_binary("tsort", &[file_path_str])); }); } @@ -174,11 +147,11 @@ fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) { fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) { let temp_dir = tempfile::tempdir().unwrap(); let data = generate_complex_dag(num_nodes); - let file_path = create_test_file(&data, &temp_dir); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_tsort(&[file_path_str])); + black_box(run_uutils_binary("tsort", &[file_path_str])); }); } @@ -188,11 +161,11 @@ fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) { fn tsort_wide_dag(bencher: Bencher, num_nodes: usize) { let temp_dir = tempfile::tempdir().unwrap(); let data = generate_wide_dag(num_nodes); - let file_path = create_test_file(&data, &temp_dir); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_tsort(&[file_path_str])); + black_box(run_uutils_binary("tsort", &[file_path_str])); }); } @@ -213,11 +186,11 @@ fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) { } } - let file_path = create_test_file(&data, &temp_dir); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_tsort(&[file_path_str])); + black_box(run_uutils_binary("tsort", &[file_path_str])); }); } diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index aff31fac3..144fcd083 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -32,6 +32,7 @@ libc = { workspace = true } [dev-dependencies] divan = { workspace = true } tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } [[bin]] name = "wc" diff --git a/src/uu/wc/benches/wc_bench.rs b/src/uu/wc/benches/wc_bench.rs index 72aa85c16..3a764f5fd 100644 --- a/src/uu/wc/benches/wc_bench.rs +++ b/src/uu/wc/benches/wc_bench.rs @@ -4,109 +4,30 @@ // file that was distributed with this source code. use divan::{Bencher, black_box}; -use std::fs::File; -use std::io::{BufWriter, Write}; -use tempfile::TempDir; - -/// Generate test data with different characteristics -fn generate_test_data(size_mb: usize, avg_line_length: usize) -> Vec { - let total_size = size_mb * 1024 * 1024; - let mut data = Vec::with_capacity(total_size); - - let mut current_size = 0; - let mut line_chars = 0; - - while current_size < total_size { - if line_chars >= avg_line_length { - data.push(b'\n'); - line_chars = 0; - } else { - // Use various ASCII characters to make it realistic - data.push(b'a' + (current_size % 26) as u8); - line_chars += 1; - } - current_size += 1; - } - - // Ensure we end with a newline - if data.last() != Some(&b'\n') { - data.push(b'\n'); - } - - data -} - -/// Generate test data by line count instead of size -fn generate_test_data_by_lines(num_lines: usize, avg_line_length: usize) -> Vec { - let mut data = Vec::new(); - - for line_num in 0..num_lines { - // Vary line length slightly for realism - let line_length = avg_line_length + (line_num % 40).saturating_sub(20); - - for char_pos in 0..line_length { - // Create more realistic text with spaces - if char_pos > 0 && char_pos % 8 == 0 { - data.push(b' '); // Add spaces every 8 characters - } else { - // Cycle through letters with some variation - let char_offset = (line_num + char_pos) % 26; - data.push(b'a' + char_offset as u8); - } - } - data.push(b'\n'); - } - - data -} - -/// Create a temporary file with test data -fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf { - let file_path = temp_dir.path().join("test_data.txt"); - let file = File::create(&file_path).unwrap(); - let mut writer = BufWriter::new(file); - writer.write_all(data).unwrap(); - writer.flush().unwrap(); - file_path -} - -/// Run uutils wc with given arguments -fn run_uutils_wc(args: &[&str]) -> i32 { - use std::process::{Command, Stdio}; - - // Use the binary instead of calling uumain directly to avoid stdout issues - let output = Command::new("../../../target/release/coreutils") - .args(["wc"].iter().chain(args.iter())) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .expect("Failed to execute wc command"); - - i32::from(!output.success()) -} +use uucore::benchmark::{create_test_file, run_uutils_binary, text_data}; /// Benchmark different file sizes for byte counting #[divan::bench(args = [10, 50, 100])] fn wc_bytes_synthetic(bencher: Bencher, size_mb: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data(size_mb, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_size(size_mb, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-c", file_path_str])); + black_box(run_uutils_binary("wc", &["-c", file_path_str])); }); } #[divan::bench(args = [10, 100, 1_000])] fn wc_words_synthetic(bencher: Bencher, size_mb: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data(size_mb, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_size(size_mb, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-w", file_path_str])); + black_box(run_uutils_binary("wc", &["-w", file_path_str])); }); } @@ -114,12 +35,12 @@ fn wc_words_synthetic(bencher: Bencher, size_mb: usize) { #[divan::bench(args = [10, 100, 1_000])] fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data(size_mb, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_size(size_mb, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-cl", file_path_str])); + black_box(run_uutils_binary("wc", &["-cl", file_path_str])); }); } @@ -127,12 +48,12 @@ fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) { #[divan::bench(args = [(5, 50), (5, 100), (5, 200), (5, 500)])] fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, usize)) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data(size_mb, avg_line_len); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_size(size_mb, avg_line_len); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-l", file_path_str])); + black_box(run_uutils_binary("wc", &["-l", file_path_str])); }); } @@ -140,12 +61,12 @@ fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, u #[divan::bench(args = [10_000, 50_000, 100_000, 500_000])] fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data_by_lines(num_lines, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_lines(num_lines, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-l", file_path_str])); + black_box(run_uutils_binary("wc", &["-l", file_path_str])); }); } @@ -153,12 +74,12 @@ fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) { #[divan::bench(args = [10_000, 50_000, 100_000])] fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data_by_lines(num_lines, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_lines(num_lines, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-m", file_path_str])); + black_box(run_uutils_binary("wc", &["-m", file_path_str])); }); } @@ -166,12 +87,12 @@ fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) { #[divan::bench(args = [10_000, 50_000, 100_000])] fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data_by_lines(num_lines, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_lines(num_lines, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-w", file_path_str])); + black_box(run_uutils_binary("wc", &["-w", file_path_str])); }); } @@ -179,12 +100,12 @@ fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) { #[divan::bench(args = [10_000, 50_000, 100_000])] fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data_by_lines(num_lines, 80); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_lines(num_lines, 80); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&[file_path_str])); + black_box(run_uutils_binary("wc", &[file_path_str])); }); } @@ -192,12 +113,12 @@ fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) { #[divan::bench(args = [(100_000, 10), (100_000, 200), (100_000, 1000)])] fn wc_lines_extreme_line_lengths(bencher: Bencher, (num_lines, line_len): (usize, usize)) { let temp_dir = tempfile::tempdir().unwrap(); - let data = generate_test_data_by_lines(num_lines, line_len); - let file_path = create_test_file(&data, &temp_dir); + let data = text_data::generate_by_lines(num_lines, line_len); + let file_path = create_test_file(&data, temp_dir.path()); let file_path_str = file_path.to_str().unwrap(); bencher.bench(|| { - black_box(run_uutils_wc(&["-l", file_path_str])); + black_box(run_uutils_binary("wc", &["-l", file_path_str])); }); } diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 6ad56f86e..65e8e6089 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -47,6 +47,9 @@ base64-simd = { version = "0.8", optional = true } libc = { workspace = true, optional = true } os_display = "0.1.3" +# Benchmark dependencies (optional) +divan = { workspace = true, optional = true } + digest = { workspace = true, optional = true } hex = { workspace = true, optional = true } memchr = { workspace = true, optional = true } @@ -169,3 +172,4 @@ wide = [] tty = [] time = ["jiff"] uptime = ["chrono", "libc", "windows-sys", "utmpx", "utmp-classic"] +benchmark = ["divan"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 5a076aacb..c7edd9a05 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -8,6 +8,8 @@ #[cfg(feature = "backup-control")] pub mod backup_control; +#[cfg(feature = "benchmark")] +pub mod benchmark; #[cfg(feature = "buf-copy")] pub mod buf_copy; #[cfg(feature = "checksum")] diff --git a/src/uucore/src/lib/features/benchmark.rs b/src/uucore/src/lib/features/benchmark.rs new file mode 100644 index 000000000..1b2fca9e5 --- /dev/null +++ b/src/uucore/src/lib/features/benchmark.rs @@ -0,0 +1,93 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Common benchmark utilities for uutils coreutils +//! +//! This module provides shared functionality for benchmarking utilities, +//! including test data generation and binary execution helpers. + +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; + +/// Create a temporary file with test data +pub fn create_test_file(data: &[u8], temp_dir: &Path) -> PathBuf { + let file_path = temp_dir.join("test_data.txt"); + let file = File::create(&file_path).unwrap(); + let mut writer = BufWriter::new(file); + writer.write_all(data).unwrap(); + writer.flush().unwrap(); + file_path +} + +/// Run a uutils binary with given arguments using the coreutils multicall binary +pub fn run_uutils_binary(util_name: &str, args: &[&str]) -> i32 { + use std::process::{Command, Stdio}; + + // Use the multicall binary + let output = Command::new("../../../target/release/coreutils") + .args([util_name].iter().chain(args.iter())) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .expect("Failed to execute command"); + + i32::from(!output.success()) +} + +/// Generate test data with different characteristics for text processing utilities +pub mod text_data { + /// Generate test data with a specific size in MB and average line length + pub fn generate_by_size(size_mb: usize, avg_line_length: usize) -> Vec { + let total_size = size_mb * 1024 * 1024; + let mut data = Vec::with_capacity(total_size); + + let mut current_size = 0; + let mut line_chars = 0; + + while current_size < total_size { + if line_chars >= avg_line_length { + data.push(b'\n'); + line_chars = 0; + } else { + // Use various ASCII characters to make it realistic + data.push(b'a' + (current_size % 26) as u8); + line_chars += 1; + } + current_size += 1; + } + + // Ensure we end with a newline + if data.last() != Some(&b'\n') { + data.push(b'\n'); + } + + data + } + + /// Generate test data by line count instead of size + pub fn generate_by_lines(num_lines: usize, avg_line_length: usize) -> Vec { + let mut data = Vec::new(); + + for line_num in 0..num_lines { + // Vary line length slightly for realism + let line_length = avg_line_length + (line_num % 40).saturating_sub(20); + + for char_pos in 0..line_length { + // Create more realistic text with spaces + if char_pos > 0 && char_pos % 8 == 0 { + data.push(b' '); // Add spaces every 8 characters + } else { + // Cycle through letters with some variation + let char_offset = (line_num + char_pos) % 26; + data.push(b'a' + char_offset as u8); + } + } + data.push(b'\n'); + } + + data + } +} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 1904195ef..91c9f001a 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -36,6 +36,8 @@ pub use crate::mods::posix; // * feature-gated modules #[cfg(feature = "backup-control")] pub use crate::features::backup_control; +#[cfg(feature = "benchmark")] +pub use crate::features::benchmark; #[cfg(feature = "buf-copy")] pub use crate::features::buf_copy; #[cfg(feature = "checksum")]