Merge pull request #8680 from sylvestre/wc-perf
Some checks are pending
FreeBSD / Tests (push) Waiting to run
CICD / Style/cargo-deny (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
CodSpeed Benchmarks / Run benchmarks (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
WSL2 / Test (push) Waiting to run
Check uudoc Documentation Generation / Verify uudoc generates correct documentation (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run

evaluate codspeed for perfs
This commit is contained in:
Sylvestre Ledru 2025-09-20 19:00:15 +02:00 committed by GitHub
commit cb594452d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 347 additions and 0 deletions

53
.github/workflows/codspeed.yml vendored Normal file
View file

@ -0,0 +1,53 @@
name: CodSpeed Benchmarks
# spell-checker: disable
on:
push:
branches:
- "main"
pull_request:
permissions:
contents: read
jobs:
benchmarks:
name: Run benchmarks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
with:
persist-credentials: false
- name: Install system dependencies
shell: bash
run: |
sudo apt-get -y update
sudo apt-get -y install libselinux1-dev
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- name: Install cargo-codspeed
shell: bash
run: cargo install cargo-codspeed --locked
- name: Run benchmarks
uses: CodSpeedHQ/action@v4
with:
mode: instrumentation
run: |
# Find all utilities with benchmarks and run them
find src/uu/*/benches/ -name "*.rs" 2>/dev/null | while read bench_file; do
crate_dir=$(dirname $(dirname "$bench_file"))
echo "Building benchmarks in $crate_dir"
(cd "$crate_dir" && cargo codspeed build)
echo "Running benchmarks in $crate_dir"
(cd "$crate_dir" && cargo codspeed run)
done
token: ${{ secrets.CODSPEED_TOKEN }}

39
Cargo.lock generated
View file

@ -432,6 +432,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3"
[[package]]
name = "condtype"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
[[package]]
name = "console"
version = "0.16.0"
@ -896,6 +902,31 @@ dependencies = [
"syn",
]
[[package]]
name = "divan"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a405457ec78b8fe08b0e32b4a3570ab5dff6dd16eb9e76a5ee0a9d9cbd898933"
dependencies = [
"cfg-if",
"clap",
"condtype",
"divan-macros",
"libc",
"regex-lite",
]
[[package]]
name = "divan-macros"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "dlv-list"
version = "0.5.2"
@ -2335,6 +2366,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-lite"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30"
[[package]]
name = "regex-syntax"
version = "0.8.5"
@ -4072,9 +4109,11 @@ version = "0.2.2"
dependencies = [
"bytecount",
"clap",
"divan",
"fluent",
"libc",
"nix",
"tempfile",
"thiserror 2.0.16",
"unicode-width 0.2.1",
"uucore",

View file

@ -316,6 +316,7 @@ compare = "0.1.0"
crossterm = "0.29.0"
ctor = "0.5.0"
ctrlc = { version = "3.4.7", features = ["termination"] }
divan = "0.1"
dns-lookup = { version = "3.0.0" }
exacl = "0.12.0"
file_diff = "1.0.0"

View file

@ -29,6 +29,14 @@ fluent = { workspace = true }
nix = { workspace = true }
libc = { workspace = true }
[dev-dependencies]
divan = { workspace = true }
tempfile = { workspace = true }
[[bin]]
name = "wc"
path = "src/main.rs"
[[bench]]
name = "wc_bench"
harness = false

View file

@ -0,0 +1,246 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use divan::{Bencher, black_box};
use std::fs::File;
use std::io::{BufWriter, Write};
use tempfile::TempDir;
/// Generate test data with different characteristics
fn generate_test_data(size_mb: usize, avg_line_length: usize) -> Vec<u8> {
let total_size = size_mb * 1024 * 1024;
let mut data = Vec::with_capacity(total_size);
let mut current_size = 0;
let mut line_chars = 0;
while current_size < total_size {
if line_chars >= avg_line_length {
data.push(b'\n');
line_chars = 0;
} else {
// Use various ASCII characters to make it realistic
data.push(b'a' + (current_size % 26) as u8);
line_chars += 1;
}
current_size += 1;
}
// Ensure we end with a newline
if data.last() != Some(&b'\n') {
data.push(b'\n');
}
data
}
/// Generate test data by line count instead of size
fn generate_test_data_by_lines(num_lines: usize, avg_line_length: usize) -> Vec<u8> {
let mut data = Vec::new();
for line_num in 0..num_lines {
// Vary line length slightly for realism
let line_length = avg_line_length + (line_num % 40).saturating_sub(20);
for char_pos in 0..line_length {
// Create more realistic text with spaces
if char_pos > 0 && char_pos % 8 == 0 {
data.push(b' '); // Add spaces every 8 characters
} else {
// Cycle through letters with some variation
let char_offset = (line_num + char_pos) % 26;
data.push(b'a' + char_offset as u8);
}
}
data.push(b'\n');
}
data
}
/// Create a temporary file with test data
fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf {
let file_path = temp_dir.path().join("test_data.txt");
let file = File::create(&file_path).unwrap();
let mut writer = BufWriter::new(file);
writer.write_all(data).unwrap();
writer.flush().unwrap();
file_path
}
/// Run uutils wc with given arguments
fn run_uutils_wc(args: &[&str]) -> i32 {
use std::process::{Command, Stdio};
// Use the binary instead of calling uumain directly to avoid stdout issues
let output = Command::new("../../../target/release/coreutils")
.args(["wc"].iter().chain(args.iter()))
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.expect("Failed to execute wc command");
i32::from(!output.success())
}
/// Benchmark different file sizes for line counting
#[divan::bench(args = [1, 5, 10, 25, 50])]
fn wc_lines_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
});
}
/// Benchmark different file sizes for character counting
#[divan::bench(args = [1, 5, 10, 25])]
fn wc_chars_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-m", file_path_str]));
});
}
/// Benchmark different file sizes for byte counting
#[divan::bench(args = [1, 5, 10, 50, 100])]
fn wc_bytes_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-c", file_path_str]));
});
}
/// Benchmark word counting (should use traditional read path)
#[divan::bench(args = [1, 5, 10, 25])]
fn wc_words_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-w", file_path_str]));
});
}
/// Benchmark combined byte+line counting
#[divan::bench(args = [1, 5, 10, 50])]
fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-cl", file_path_str]));
});
}
/// Benchmark default wc behavior (bytes, lines, words)
#[divan::bench(args = [1, 5, 10])]
fn wc_default_synthetic(bencher: Bencher, size_mb: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&[file_path_str]));
});
}
/// Test different line lengths impact on performance
#[divan::bench(args = [(5, 50), (5, 100), (5, 200), (5, 500)])]
fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, usize)) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data(size_mb, avg_line_len);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
});
}
/// Benchmark large files by line count - up to 500K lines!
#[divan::bench(args = [10_000, 50_000, 100_000, 500_000])]
fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
});
}
/// Benchmark character counting on large line counts
#[divan::bench(args = [10_000, 50_000, 100_000])]
fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-m", file_path_str]));
});
}
/// Benchmark word counting on large line counts
#[divan::bench(args = [10_000, 50_000, 100_000])]
fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-w", file_path_str]));
});
}
/// Benchmark default wc (lines, words, bytes) on large line counts
#[divan::bench(args = [10_000, 50_000, 100_000])]
fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, 80);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&[file_path_str]));
});
}
/// Benchmark very short vs very long lines with 100K lines
#[divan::bench(args = [(100_000, 10), (100_000, 200), (100_000, 1000)])]
fn wc_lines_extreme_line_lengths(bencher: Bencher, (num_lines, line_len): (usize, usize)) {
let temp_dir = tempfile::tempdir().unwrap();
let data = generate_test_data_by_lines(num_lines, line_len);
let file_path = create_test_file(&data, &temp_dir);
let file_path_str = file_path.to_str().unwrap();
bencher.bench(|| {
black_box(run_uutils_wc(&["-l", file_path_str]));
});
}
fn main() {
divan::main();
}