mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
Merge pull request #8680 from sylvestre/wc-perf
Some checks are pending
FreeBSD / Tests (push) Waiting to run
CICD / Style/cargo-deny (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
CodSpeed Benchmarks / Run benchmarks (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
WSL2 / Test (push) Waiting to run
Check uudoc Documentation Generation / Verify uudoc generates correct documentation (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
Some checks are pending
FreeBSD / Tests (push) Waiting to run
CICD / Style/cargo-deny (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
CodSpeed Benchmarks / Run benchmarks (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
WSL2 / Test (push) Waiting to run
Check uudoc Documentation Generation / Verify uudoc generates correct documentation (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
evaluate codspeed for perfs
This commit is contained in:
commit
cb594452d9
5 changed files with 347 additions and 0 deletions
53
.github/workflows/codspeed.yml
vendored
Normal file
53
.github/workflows/codspeed.yml
vendored
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
name: CodSpeed Benchmarks
|
||||
|
||||
# spell-checker: disable
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
pull_request:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run benchmarks
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install system dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y install libselinux1-dev
|
||||
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
|
||||
- name: Run sccache-cache
|
||||
uses: mozilla-actions/sccache-action@v0.0.9
|
||||
|
||||
- name: Install cargo-codspeed
|
||||
shell: bash
|
||||
run: cargo install cargo-codspeed --locked
|
||||
|
||||
- name: Run benchmarks
|
||||
uses: CodSpeedHQ/action@v4
|
||||
with:
|
||||
mode: instrumentation
|
||||
run: |
|
||||
# Find all utilities with benchmarks and run them
|
||||
find src/uu/*/benches/ -name "*.rs" 2>/dev/null | while read bench_file; do
|
||||
crate_dir=$(dirname $(dirname "$bench_file"))
|
||||
echo "Building benchmarks in $crate_dir"
|
||||
(cd "$crate_dir" && cargo codspeed build)
|
||||
echo "Running benchmarks in $crate_dir"
|
||||
(cd "$crate_dir" && cargo codspeed run)
|
||||
done
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
39
Cargo.lock
generated
39
Cargo.lock
generated
|
|
@ -432,6 +432,12 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3"
|
||||
|
||||
[[package]]
|
||||
name = "condtype"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.16.0"
|
||||
|
|
@ -896,6 +902,31 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "divan"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a405457ec78b8fe08b0e32b4a3570ab5dff6dd16eb9e76a5ee0a9d9cbd898933"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"clap",
|
||||
"condtype",
|
||||
"divan-macros",
|
||||
"libc",
|
||||
"regex-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "divan-macros"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dlv-list"
|
||||
version = "0.5.2"
|
||||
|
|
@ -2335,6 +2366,12 @@ dependencies = [
|
|||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-lite"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
|
|
@ -4072,9 +4109,11 @@ version = "0.2.2"
|
|||
dependencies = [
|
||||
"bytecount",
|
||||
"clap",
|
||||
"divan",
|
||||
"fluent",
|
||||
"libc",
|
||||
"nix",
|
||||
"tempfile",
|
||||
"thiserror 2.0.16",
|
||||
"unicode-width 0.2.1",
|
||||
"uucore",
|
||||
|
|
|
|||
|
|
@ -316,6 +316,7 @@ compare = "0.1.0"
|
|||
crossterm = "0.29.0"
|
||||
ctor = "0.5.0"
|
||||
ctrlc = { version = "3.4.7", features = ["termination"] }
|
||||
divan = "0.1"
|
||||
dns-lookup = { version = "3.0.0" }
|
||||
exacl = "0.12.0"
|
||||
file_diff = "1.0.0"
|
||||
|
|
|
|||
|
|
@ -29,6 +29,14 @@ fluent = { workspace = true }
|
|||
nix = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
divan = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
|
||||
[[bin]]
|
||||
name = "wc"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bench]]
|
||||
name = "wc_bench"
|
||||
harness = false
|
||||
|
|
|
|||
246
src/uu/wc/benches/wc_bench.rs
Normal file
246
src/uu/wc/benches/wc_bench.rs
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use divan::{Bencher, black_box};
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Generate test data with different characteristics
|
||||
fn generate_test_data(size_mb: usize, avg_line_length: usize) -> Vec<u8> {
|
||||
let total_size = size_mb * 1024 * 1024;
|
||||
let mut data = Vec::with_capacity(total_size);
|
||||
|
||||
let mut current_size = 0;
|
||||
let mut line_chars = 0;
|
||||
|
||||
while current_size < total_size {
|
||||
if line_chars >= avg_line_length {
|
||||
data.push(b'\n');
|
||||
line_chars = 0;
|
||||
} else {
|
||||
// Use various ASCII characters to make it realistic
|
||||
data.push(b'a' + (current_size % 26) as u8);
|
||||
line_chars += 1;
|
||||
}
|
||||
current_size += 1;
|
||||
}
|
||||
|
||||
// Ensure we end with a newline
|
||||
if data.last() != Some(&b'\n') {
|
||||
data.push(b'\n');
|
||||
}
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
/// Generate test data by line count instead of size
|
||||
fn generate_test_data_by_lines(num_lines: usize, avg_line_length: usize) -> Vec<u8> {
|
||||
let mut data = Vec::new();
|
||||
|
||||
for line_num in 0..num_lines {
|
||||
// Vary line length slightly for realism
|
||||
let line_length = avg_line_length + (line_num % 40).saturating_sub(20);
|
||||
|
||||
for char_pos in 0..line_length {
|
||||
// Create more realistic text with spaces
|
||||
if char_pos > 0 && char_pos % 8 == 0 {
|
||||
data.push(b' '); // Add spaces every 8 characters
|
||||
} else {
|
||||
// Cycle through letters with some variation
|
||||
let char_offset = (line_num + char_pos) % 26;
|
||||
data.push(b'a' + char_offset as u8);
|
||||
}
|
||||
}
|
||||
data.push(b'\n');
|
||||
}
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
/// Create a temporary file with test data
|
||||
fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf {
|
||||
let file_path = temp_dir.path().join("test_data.txt");
|
||||
let file = File::create(&file_path).unwrap();
|
||||
let mut writer = BufWriter::new(file);
|
||||
writer.write_all(data).unwrap();
|
||||
writer.flush().unwrap();
|
||||
file_path
|
||||
}
|
||||
|
||||
/// Run uutils wc with given arguments
|
||||
fn run_uutils_wc(args: &[&str]) -> i32 {
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
// Use the binary instead of calling uumain directly to avoid stdout issues
|
||||
let output = Command::new("../../../target/release/coreutils")
|
||||
.args(["wc"].iter().chain(args.iter()))
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
.expect("Failed to execute wc command");
|
||||
|
||||
i32::from(!output.success())
|
||||
}
|
||||
|
||||
/// Benchmark different file sizes for line counting
|
||||
#[divan::bench(args = [1, 5, 10, 25, 50])]
|
||||
fn wc_lines_synthetic(bencher: Bencher, size_mb: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-l", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark different file sizes for character counting
|
||||
#[divan::bench(args = [1, 5, 10, 25])]
|
||||
fn wc_chars_synthetic(bencher: Bencher, size_mb: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-m", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark different file sizes for byte counting
|
||||
#[divan::bench(args = [1, 5, 10, 50, 100])]
|
||||
fn wc_bytes_synthetic(bencher: Bencher, size_mb: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-c", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark word counting (should use traditional read path)
|
||||
#[divan::bench(args = [1, 5, 10, 25])]
|
||||
fn wc_words_synthetic(bencher: Bencher, size_mb: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-w", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark combined byte+line counting
|
||||
#[divan::bench(args = [1, 5, 10, 50])]
|
||||
fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-cl", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark default wc behavior (bytes, lines, words)
|
||||
#[divan::bench(args = [1, 5, 10])]
|
||||
fn wc_default_synthetic(bencher: Bencher, size_mb: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Test different line lengths impact on performance
|
||||
#[divan::bench(args = [(5, 50), (5, 100), (5, 200), (5, 500)])]
|
||||
fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, usize)) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data(size_mb, avg_line_len);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-l", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark large files by line count - up to 500K lines!
|
||||
#[divan::bench(args = [10_000, 50_000, 100_000, 500_000])]
|
||||
fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data_by_lines(num_lines, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-l", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark character counting on large line counts
|
||||
#[divan::bench(args = [10_000, 50_000, 100_000])]
|
||||
fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data_by_lines(num_lines, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-m", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark word counting on large line counts
|
||||
#[divan::bench(args = [10_000, 50_000, 100_000])]
|
||||
fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data_by_lines(num_lines, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-w", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark default wc (lines, words, bytes) on large line counts
|
||||
#[divan::bench(args = [10_000, 50_000, 100_000])]
|
||||
fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data_by_lines(num_lines, 80);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&[file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
/// Benchmark very short vs very long lines with 100K lines
|
||||
#[divan::bench(args = [(100_000, 10), (100_000, 200), (100_000, 1000)])]
|
||||
fn wc_lines_extreme_line_lengths(bencher: Bencher, (num_lines, line_len): (usize, usize)) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let data = generate_test_data_by_lines(num_lines, line_len);
|
||||
let file_path = create_test_file(&data, &temp_dir);
|
||||
let file_path_str = file_path.to_str().unwrap();
|
||||
|
||||
bencher.bench(|| {
|
||||
black_box(run_uutils_wc(&["-l", file_path_str]));
|
||||
});
|
||||
}
|
||||
|
||||
fn main() {
|
||||
divan::main();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue