benchmarking: move the common functions into uucore

This commit is contained in:
Sylvestre Ledru 2025-09-23 10:05:08 +02:00
parent ad7aa24206
commit e3f5a23603
10 changed files with 143 additions and 146 deletions

View file

@ -387,7 +387,6 @@ jobs:
test -h /tmp/usr/local/bin/sha512sum
test -h /tmp/usr/local/bin/shake128sum
test -h /tmp/usr/local/bin/shake256sum
build_rust_stable:
name: Build/stable
needs: [ min_version, deps ]

1
Cargo.lock generated
View file

@ -4268,6 +4268,7 @@ dependencies = [
"bstr",
"chrono",
"clap",
"codspeed-divan-compat",
"crc-fast",
"crc32fast",
"data-encoding",

View file

@ -30,6 +30,7 @@ path = "src/main.rs"
[dev-dependencies]
divan = { workspace = true }
tempfile = { workspace = true }
uucore = { workspace = true, features = ["benchmark"] }
[[bench]]
name = "tsort_bench"

View file

@ -4,9 +4,7 @@
// file that was distributed with this source code.
use divan::{Bencher, black_box};
use std::fs::File;
use std::io::{BufWriter, Write};
use tempfile::TempDir;
use uucore::benchmark::{create_test_file, run_uutils_binary};
/// Generate topological sort test data with different characteristics
fn generate_linear_chain(num_nodes: usize) -> Vec<u8> {
@ -117,42 +115,17 @@ fn generate_wide_dag(num_nodes: usize) -> Vec<u8> {
data
}
/// Create a temporary file with test data
fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf {
let file_path = temp_dir.path().join("test_data.txt");
let file = File::create(&file_path).unwrap();
let mut writer = BufWriter::new(file);
writer.write_all(data).unwrap();
writer.flush().unwrap();
file_path
}
/// Run uutils tsort with given arguments
fn run_uutils_tsort(args: &[&str]) -> i32 {
use std::process::{Command, Stdio};
// Use the binary instead of calling uumain directly to avoid stdout issues
let output = Command::new("../../../target/release/coreutils")
.args(["tsort"].iter().chain(args.iter()))
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.expect("Failed to execute tsort command");
i32::from(!output.success())
}
/// Benchmark linear chain graphs of different sizes
/// This tests the performance improvements mentioned in PR #8694
#[divan::bench(args = [1_000, 10_000, 100_000, 1_000_000])]
fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_linear_chain(num_nodes);
let file_path = create_test_file(&data, &temp_dir);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_tsort(&[file_path_str]));
black_box(run_uutils_binary("tsort", &[file_path_str]));
});
}
@ -161,11 +134,11 @@ fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) {
fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_tree_dag(depth, branching);
let file_path = create_test_file(&data, &temp_dir);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_tsort(&[file_path_str]));
black_box(run_uutils_binary("tsort", &[file_path_str]));
});
}
@ -174,11 +147,11 @@ fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) {
fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_complex_dag(num_nodes);
let file_path = create_test_file(&data, &temp_dir);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_tsort(&[file_path_str]));
black_box(run_uutils_binary("tsort", &[file_path_str]));
});
}
@ -188,11 +161,11 @@ fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) {
fn tsort_wide_dag(bencher: Bencher, num_nodes: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_wide_dag(num_nodes);
let file_path = create_test_file(&data, &temp_dir);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_tsort(&[file_path_str]));
black_box(run_uutils_binary("tsort", &[file_path_str]));
});
}
@ -213,11 +186,11 @@ fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) {
}
}
let file_path = create_test_file(&data, &temp_dir);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_tsort(&[file_path_str]));
black_box(run_uutils_binary("tsort", &[file_path_str]));
});
}

View file

@ -32,6 +32,7 @@ libc = { workspace = true }
[dev-dependencies]
divan = { workspace = true }
tempfile = { workspace = true }
uucore = { workspace = true, features = ["benchmark"] }
[[bin]]
name = "wc"

View file

@ -4,109 +4,30 @@
// file that was distributed with this source code.
use divan::{Bencher, black_box};
use std::fs::File;
use std::io::{BufWriter, Write};
use tempfile::TempDir;
/// Generate test data with different characteristics
fn generate_test_data(size_mb: usize, avg_line_length: usize) -> Vec<u8> {
let total_size = size_mb * 1024 * 1024;
let mut data = Vec::with_capacity(total_size);
let mut current_size = 0;
let mut line_chars = 0;
while current_size < total_size {
if line_chars >= avg_line_length {
data.push(b'\n');
line_chars = 0;
} else {
// Use various ASCII characters to make it realistic
data.push(b'a' + (current_size % 26) as u8);
line_chars += 1;
}
current_size += 1;
}
// Ensure we end with a newline
if data.last() != Some(&b'\n') {
data.push(b'\n');
}
data
}
/// Generate test data by line count instead of size
fn generate_test_data_by_lines(num_lines: usize, avg_line_length: usize) -> Vec<u8> {
let mut data = Vec::new();
for line_num in 0..num_lines {
// Vary line length slightly for realism
let line_length = avg_line_length + (line_num % 40).saturating_sub(20);
for char_pos in 0..line_length {
// Create more realistic text with spaces
if char_pos > 0 && char_pos % 8 == 0 {
data.push(b' '); // Add spaces every 8 characters
} else {
// Cycle through letters with some variation
let char_offset = (line_num + char_pos) % 26;
data.push(b'a' + char_offset as u8);
}
}
data.push(b'\n');
}
data
}
/// Create a temporary file with test data
fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf {
let file_path = temp_dir.path().join("test_data.txt");
let file = File::create(&file_path).unwrap();
let mut writer = BufWriter::new(file);
writer.write_all(data).unwrap();
writer.flush().unwrap();
file_path
}
/// Run uutils wc with given arguments
fn run_uutils_wc(args: &[&str]) -> i32 {
use std::process::{Command, Stdio};
// Use the binary instead of calling uumain directly to avoid stdout issues
let output = Command::new("../../../target/release/coreutils")
.args(["wc"].iter().chain(args.iter()))
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.expect("Failed to execute wc command");
i32::from(!output.success())
}
use uucore::benchmark::{create_test_file, run_uutils_binary, text_data};
/// Benchmark different file sizes for byte counting
#[divan::bench(args = [10, 50, 100])]
fn wc_bytes_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_size(size_mb, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-c", file_path_str]));
black_box(run_uutils_binary("wc", &["-c", file_path_str]));
});
}
#[divan::bench(args = [10, 100, 1_000])]
fn wc_words_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_size(size_mb, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-w", file_path_str]));
black_box(run_uutils_binary("wc", &["-w", file_path_str]));
});
}
@ -114,12 +35,12 @@ fn wc_words_synthetic(bencher: Bencher, size_mb: usize) {
#[divan::bench(args = [10, 100, 1_000])]
fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_size(size_mb, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-cl", file_path_str]));
black_box(run_uutils_binary("wc", &["-cl", file_path_str]));
});
}
@ -127,12 +48,12 @@ fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) {
#[divan::bench(args = [(5, 50), (5, 100), (5, 200), (5, 500)])]
fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, usize)) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, avg_line_len);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_size(size_mb, avg_line_len);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
black_box(run_uutils_binary("wc", &["-l", file_path_str]));
});
}
@ -140,12 +61,12 @@ fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, u
#[divan::bench(args = [10_000, 50_000, 100_000, 500_000])]
fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_lines(num_lines, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
black_box(run_uutils_binary("wc", &["-l", file_path_str]));
});
}
@ -153,12 +74,12 @@ fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) {
#[divan::bench(args = [10_000, 50_000, 100_000])]
fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_lines(num_lines, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-m", file_path_str]));
black_box(run_uutils_binary("wc", &["-m", file_path_str]));
});
}
@ -166,12 +87,12 @@ fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) {
#[divan::bench(args = [10_000, 50_000, 100_000])]
fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_lines(num_lines, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-w", file_path_str]));
black_box(run_uutils_binary("wc", &["-w", file_path_str]));
});
}
@ -179,12 +100,12 @@ fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) {
#[divan::bench(args = [10_000, 50_000, 100_000])]
fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_lines(num_lines, 80);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&[file_path_str]));
black_box(run_uutils_binary("wc", &[file_path_str]));
});
}
@ -192,12 +113,12 @@ fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) {
#[divan::bench(args = [(100_000, 10), (100_000, 200), (100_000, 1000)])]
fn wc_lines_extreme_line_lengths(bencher: Bencher, (num_lines, line_len): (usize, usize)) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, line_len);
let file_path = create_test_file(&data, &temp_dir);
let data = text_data::generate_by_lines(num_lines, line_len);
let file_path = create_test_file(&data, temp_dir.path());
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
black_box(run_uutils_binary("wc", &["-l", file_path_str]));
});
}

View file

@ -47,6 +47,9 @@ base64-simd = { version = "0.8", optional = true }
libc = { workspace = true, optional = true }
os_display = "0.1.3"
# Benchmark dependencies (optional)
divan = { workspace = true, optional = true }
digest = { workspace = true, optional = true }
hex = { workspace = true, optional = true }
memchr = { workspace = true, optional = true }
@ -169,3 +172,4 @@ wide = []
tty = []
time = ["jiff"]
uptime = ["chrono", "libc", "windows-sys", "utmpx", "utmp-classic"]
benchmark = ["divan"]

View file

@ -8,6 +8,8 @@
#[cfg(feature = "backup-control")]
pub mod backup_control;
#[cfg(feature = "benchmark")]
pub mod benchmark;
#[cfg(feature = "buf-copy")]
pub mod buf_copy;
#[cfg(feature = "checksum")]

View file

@ -0,0 +1,93 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Common benchmark utilities for uutils coreutils
//!
//! This module provides shared functionality for benchmarking utilities,
//! including test data generation and binary execution helpers.
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
/// Create a temporary file with test data
pub fn create_test_file(data: &[u8], temp_dir: &Path) -> PathBuf {
let file_path = temp_dir.join("test_data.txt");
let file = File::create(&file_path).unwrap();
let mut writer = BufWriter::new(file);
writer.write_all(data).unwrap();
writer.flush().unwrap();
file_path
}
/// Run a uutils binary with given arguments using the coreutils multicall binary
pub fn run_uutils_binary(util_name: &str, args: &[&str]) -> i32 {
use std::process::{Command, Stdio};
// Use the multicall binary
let output = Command::new("../../../target/release/coreutils")
.args([util_name].iter().chain(args.iter()))
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.expect("Failed to execute command");
i32::from(!output.success())
}
/// Generate test data with different characteristics for text processing utilities
pub mod text_data {
/// Generate test data with a specific size in MB and average line length
pub fn generate_by_size(size_mb: usize, avg_line_length: usize) -> Vec<u8> {
let total_size = size_mb * 1024 * 1024;
let mut data = Vec::with_capacity(total_size);
let mut current_size = 0;
let mut line_chars = 0;
while current_size < total_size {
if line_chars >= avg_line_length {
data.push(b'\n');
line_chars = 0;
} else {
// Use various ASCII characters to make it realistic
data.push(b'a' + (current_size % 26) as u8);
line_chars += 1;
}
current_size += 1;
}
// Ensure we end with a newline
if data.last() != Some(&b'\n') {
data.push(b'\n');
}
data
}
/// Generate test data by line count instead of size
pub fn generate_by_lines(num_lines: usize, avg_line_length: usize) -> Vec<u8> {
let mut data = Vec::new();
for line_num in 0..num_lines {
// Vary line length slightly for realism
let line_length = avg_line_length + (line_num % 40).saturating_sub(20);
for char_pos in 0..line_length {
// Create more realistic text with spaces
if char_pos > 0 && char_pos % 8 == 0 {
data.push(b' '); // Add spaces every 8 characters
} else {
// Cycle through letters with some variation
let char_offset = (line_num + char_pos) % 26;
data.push(b'a' + char_offset as u8);
}
}
data.push(b'\n');
}
data
}
}

View file

@ -36,6 +36,8 @@ pub use crate::mods::posix;
// * feature-gated modules
#[cfg(feature = "backup-control")]
pub use crate::features::backup_control;
#[cfg(feature = "benchmark")]
pub use crate::features::benchmark;
#[cfg(feature = "buf-copy")]
pub use crate::features::buf_copy;
#[cfg(feature = "checksum")]