From 23261a38a08dd3afb231979e3e664eeec56fdefe Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 13:41:38 +0200 Subject: [PATCH] [ty] Add more benchmarks (#18714) --- .github/actionlint.yaml | 1 + .github/workflows/ci.yaml | 38 +- Cargo.lock | 77 ++++ Cargo.toml | 1 + crates/ruff_benchmark/Cargo.toml | 46 +- crates/ruff_benchmark/benches/ty.rs | 140 ++++++- crates/ruff_benchmark/benches/ty_walltime.rs | 253 +++++++++++ crates/ruff_benchmark/src/lib.rs | 2 + .../ruff_benchmark/src/real_world_projects.rs | 392 ++++++++++++++++++ crates/ruff_db/src/lib.rs | 6 + crates/ruff_db/src/system/test.rs | 7 + crates/ty_project/src/lib.rs | 13 +- 12 files changed, 959 insertions(+), 17 deletions(-) create mode 100644 crates/ruff_benchmark/benches/ty_walltime.rs create mode 100644 crates/ruff_benchmark/src/real_world_projects.rs diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index c3464e3992..81969ccb17 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -9,3 +9,4 @@ self-hosted-runner: - depot-ubuntu-22.04-32 - github-windows-2025-x86_64-8 - github-windows-2025-x86_64-16 + - codspeed-macro diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a18c5e3939..a00c1ba721 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -893,7 +893,7 @@ jobs: run: npm run fmt:check working-directory: playground - benchmarks: + benchmarks-instrumented: runs-on: ubuntu-24.04 needs: determine_changes if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main') }} @@ -905,6 +905,7 @@ jobs: persist-credentials: false - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 + - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0 - name: "Install Rust toolchain" run: rustup show @@ -915,7 +916,40 @@ jobs: tool: cargo-codspeed - name: "Build benchmarks" - run: cargo codspeed build --features codspeed -p ruff_benchmark + run: cargo codspeed build --features "codspeed,instrumented" --no-default-features -p ruff_benchmark + + - name: "Run benchmarks" + uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0 + with: + run: cargo codspeed run + token: ${{ secrets.CODSPEED_TOKEN }} + + benchmarks-walltime: + runs-on: codspeed-macro + needs: determine_changes + if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.ty == 'true' || github.ref == 'refs/heads/main') }} + timeout-minutes: 20 + env: + TY_LOG: ruff_benchmark=debug + steps: + - name: "Checkout Branch" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + + - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 + - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0 + + - name: "Install Rust toolchain" + run: rustup show + + - name: "Install codspeed" + uses: taiki-e/install-action@735e5933943122c5ac182670a935f54a949265c1 # v2.52.4 + with: + tool: cargo-codspeed + + - name: "Build benchmarks" + run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark - name: "Run benchmarks" uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0 diff --git a/Cargo.lock b/Cargo.lock index 2e523ecd84..f753997842 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -479,6 +479,46 @@ dependencies = [ "walkdir", ] +[[package]] +name = "codspeed-divan-compat" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8620a09dfaf37b3c45f982c4b65bd8f9b0203944da3ffa705c0fcae6b84655ff" +dependencies = [ + "codspeed", + "codspeed-divan-compat-macros", + "codspeed-divan-compat-walltime", +] + +[[package]] +name = "codspeed-divan-compat-macros" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30fe872bc4214626b35d3a1706a905d0243503bb6ba3bb7be2fc59083d5d680c" +dependencies = [ + "divan-macros", + "itertools 0.14.0", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "codspeed-divan-compat-walltime" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "104caa97b36d4092d89e24e4b103b40ede1edab03c0372d19e14a33f9393132b" +dependencies = [ + "cfg-if", + "clap", + "codspeed", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -519,6 +559,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "console" version = "0.15.11" @@ -837,6 +883,17 @@ dependencies = [ "syn", ] +[[package]] +name = "divan-macros" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -2272,6 +2329,15 @@ dependencies = [ "yansi", ] +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -2485,6 +2551,12 @@ dependencies = [ "regex-syntax 0.8.5", ] +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + [[package]] name = "regex-syntax" version = "0.6.29" @@ -2586,7 +2658,9 @@ dependencies = [ name = "ruff_benchmark" version = "0.0.0" dependencies = [ + "anyhow", "codspeed-criterion-compat", + "codspeed-divan-compat", "criterion", "mimalloc", "rayon", @@ -2597,7 +2671,10 @@ dependencies = [ "ruff_python_parser", "ruff_python_trivia", "rustc-hash 2.1.1", + "serde", + "serde_json", "tikv-jemallocator", + "tracing", "ty_project", ] diff --git a/Cargo.toml b/Cargo.toml index 1da4d9bdff..32804e36ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ camino = { version = "1.1.7" } clap = { version = "4.5.3", features = ["derive"] } clap_complete_command = { version = "0.6.0" } clearscreen = { version = "4.0.0" } +divan = { package = "codspeed-divan-compat", version = "2.10.1" } codspeed-criterion-compat = { version = "2.6.0", default-features = false } colored = { version = "3.0.0" } console_error_panic_hook = { version = "0.1.7" } diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index b5c2a50a13..b6b4b40de2 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -19,43 +19,69 @@ doctest = false [[bench]] name = "linter" harness = false +required-features = ["instrumented"] [[bench]] name = "lexer" harness = false +required-features = ["instrumented"] [[bench]] name = "parser" harness = false +required-features = ["instrumented"] [[bench]] name = "formatter" harness = false +required-features = ["instrumented"] [[bench]] name = "ty" harness = false +required-features = ["instrumented"] + +[[bench]] +name = "ty_walltime" +harness = false +required-features = ["walltime"] [dependencies] +ruff_db = { workspace = true, features = ["testing"] } +ruff_python_ast = { workspace = true } +ruff_linter = { workspace = true, optional = true } +ruff_python_formatter = { workspace = true, optional = true } +ruff_python_parser = { workspace = true, optional = true } +ruff_python_trivia = { workspace = true, optional = true } +ty_project = { workspace = true, optional = true } + +divan = { workspace = true, optional = true } +anyhow = { workspace = true } codspeed-criterion-compat = { workspace = true, default-features = false, optional = true } -criterion = { workspace = true, default-features = false } +criterion = { workspace = true, default-features = false, optional = true } rayon = { workspace = true } rustc-hash = { workspace = true } - -[dev-dependencies] -ruff_db = { workspace = true } -ruff_linter = { workspace = true } -ruff_python_ast = { workspace = true } -ruff_python_formatter = { workspace = true } -ruff_python_parser = { workspace = true } -ruff_python_trivia = { workspace = true } -ty_project = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tracing = { workspace = true } [lints] workspace = true [features] +default = ["instrumented", "walltime"] +# Enables the benchmark that should only run with codspeed's instrumented runner +instrumented = [ + "criterion", + "ruff_linter", + "ruff_python_formatter", + "ruff_python_parser", + "ruff_python_trivia", + "ty_project", +] codspeed = ["codspeed-criterion-compat"] +# Enables benchmark that should only run with codspeed's walltime runner. +walltime = ["ruff_db/os", "ty_project", "divan"] [target.'cfg(target_os = "windows")'.dev-dependencies] mimalloc = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index fe974de25b..bdef551be7 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -1,5 +1,6 @@ #![allow(clippy::disallowed_names)] use ruff_benchmark::criterion; +use ruff_benchmark::real_world_projects::{InstalledProject, RealWorldProject}; use std::ops::Range; @@ -11,10 +12,10 @@ use ruff_benchmark::TestFile; use ruff_db::diagnostic::{Diagnostic, DiagnosticId, Severity}; use ruff_db::files::{File, system_path_to_file}; use ruff_db::source::source_text; -use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; +use ruff_db::system::{InMemorySystem, MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; use ruff_python_ast::PythonVersion; use ty_project::metadata::options::{EnvironmentOptions, Options}; -use ty_project::metadata::value::RangedValue; +use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::watch::{ChangeEvent, ChangedKind}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; @@ -347,10 +348,141 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) { }); } +struct ProjectBenchmark<'a> { + project: InstalledProject<'a>, + fs: MemoryFileSystem, + max_diagnostics: usize, +} + +impl<'a> ProjectBenchmark<'a> { + fn new(project: RealWorldProject<'a>, max_diagnostics: usize) -> Self { + let setup_project = project.setup().expect("Failed to setup project"); + let fs = setup_project + .copy_to_memory_fs() + .expect("Failed to copy project to memory fs"); + + Self { + project: setup_project, + fs, + max_diagnostics, + } + } + + fn setup_iteration(&self) -> ProjectDatabase { + let system = TestSystem::new(InMemorySystem::from_memory_fs(self.fs.clone())); + + let src_root = SystemPath::new("/"); + let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(self.project.config.python_version)), + python: (!self.project.config().dependencies.is_empty()) + .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + let mut db = ProjectDatabase::new(metadata, system).unwrap(); + + db.project().set_included_paths( + &mut db, + self.project + .check_paths() + .iter() + .map(|path| path.to_path_buf()) + .collect(), + ); + + db + } +} + +#[track_caller] +fn bench_project(benchmark: &ProjectBenchmark, criterion: &mut Criterion) { + fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { + let result = db.check(); + let diagnostics = result.len(); + + assert!( + diagnostics > 1 && diagnostics <= max_diagnostics, + "Expected between {} and {} diagnostics but got {}", + 1, + max_diagnostics, + diagnostics + ); + } + + setup_rayon(); + + let mut group = criterion.benchmark_group("project"); + group.sampling_mode(criterion::SamplingMode::Flat); + group.bench_function(benchmark.project.config.name, |b| { + b.iter_batched_ref( + || benchmark.setup_iteration(), + |db| check_project(db, benchmark.max_diagnostics), + BatchSize::SmallInput, + ); + }); +} + +fn hydra(criterion: &mut Criterion) { + let benchmark = ProjectBenchmark::new( + RealWorldProject { + name: "hydra-zen", + repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", + commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", + paths: vec![SystemPath::new("src")], + dependencies: vec!["pydantic", "beartype", "hydra-core"], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }, + 100, + ); + + bench_project(&benchmark, criterion); +} + +fn attrs(criterion: &mut Criterion) { + let benchmark = ProjectBenchmark::new( + RealWorldProject { + name: "attrs", + repository: "https://github.com/python-attrs/attrs", + commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", + paths: vec![SystemPath::new("src")], + dependencies: vec![], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }, + 100, + ); + + bench_project(&benchmark, criterion); +} + +fn anyio(criterion: &mut Criterion) { + let benchmark = ProjectBenchmark::new( + RealWorldProject { + name: "anyio", + repository: "https://github.com/agronholm/anyio", + commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", + paths: vec![SystemPath::new("src")], + dependencies: vec![], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }, + 100, + ); + + bench_project(&benchmark, criterion); +} + criterion_group!(check_file, benchmark_cold, benchmark_incremental); criterion_group!( micro, benchmark_many_string_assignments, - benchmark_many_tuple_assignments + benchmark_many_tuple_assignments, ); -criterion_main!(check_file, micro); +criterion_group!(project, anyio, attrs, hydra); +criterion_main!(check_file, micro, project); diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs new file mode 100644 index 0000000000..39029f4505 --- /dev/null +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -0,0 +1,253 @@ +use std::fmt::{Display, Formatter}; + +use divan::{Bencher, bench}; + +use rayon::ThreadPoolBuilder; +use ruff_benchmark::real_world_projects::{InstalledProject, RealWorldProject}; +use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; + +use ruff_db::testing::setup_logging_with_filter; +use ruff_python_ast::PythonVersion; +use ty_project::metadata::options::{EnvironmentOptions, Options}; +use ty_project::metadata::value::{RangedValue, RelativePathBuf}; +use ty_project::{Db, ProjectDatabase, ProjectMetadata}; + +struct Benchmark<'a> { + project: InstalledProject<'a>, + max_diagnostics: usize, +} + +impl<'a> Benchmark<'a> { + fn new(project: RealWorldProject<'a>, max_diagnostics: usize) -> Self { + let setup_project = project.setup().expect("Failed to setup project"); + + Self { + project: setup_project, + max_diagnostics, + } + } + + fn setup_iteration(&self) -> ProjectDatabase { + let root = SystemPathBuf::from_path_buf(self.project.path.clone()).unwrap(); + let system = OsSystem::new(&root); + + let mut metadata = ProjectMetadata::discover(&root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(self.project.config.python_version)), + python: (!self.project.config().dependencies.is_empty()) + .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + let mut db = ProjectDatabase::new(metadata, system).unwrap(); + + db.project().set_included_paths( + &mut db, + self.project + .check_paths() + .iter() + .map(|path| SystemPath::absolute(path, &root)) + .collect(), + ); + db + } +} + +impl Display for Benchmark<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(self.project.config.name) + } +} + +fn check_project(db: &ProjectDatabase, max_diagnostics: usize) { + let result = db.check(); + let diagnostics = result.len(); + + assert!( + diagnostics > 1 && diagnostics <= max_diagnostics, + "Expected between {} and {} diagnostics but got {}", + 1, + max_diagnostics, + diagnostics + ); +} + +static ALTAIR: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "altair", + repository: "https://github.com/vega/altair", + commit: "d1f4a1ef89006e5f6752ef1f6df4b7a509336fba", + paths: vec![SystemPath::new("altair")], + dependencies: vec![ + "jinja2", + "narwhals", + "numpy", + "packaging", + "pandas-stubs", + "pyarrow-stubs", + "pytest", + "scipy-stubs", + "types-jsonschema", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }, + 1000, + ) +}); + +static COLOUR_SCIENCE: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "colour-science", + repository: "https://github.com/colour-science/colour", + commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757", + paths: vec![SystemPath::new("colour")], + dependencies: vec![ + "matplotlib", + "numpy", + "pandas-stubs", + "pytest", + "scipy-stubs", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY310, + }, + 477, + ) +}); + +static FREQTRADE: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "freqtrade", + repository: "https://github.com/freqtrade/freqtrade", + commit: "2d842ea129e56575852ee0c45383c8c3f706be19", + paths: vec![SystemPath::new("freqtrade")], + dependencies: vec![ + "numpy", + "pandas-stubs", + "pydantic", + "sqlalchemy", + "types-cachetools", + "types-filelock", + "types-python-dateutil", + "types-requests", + "types-tabulate", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }, + 400, + ) +}); + +static PANDAS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "pandas", + repository: "https://github.com/pandas-dev/pandas", + commit: "5909621e2267eb67943a95ef5e895e8484c53432", + paths: vec![SystemPath::new("pandas")], + dependencies: vec![ + "numpy", + "types-python-dateutil", + "types-pytz", + "types-PyMySQL", + "types-setuptools", + "pytest", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }, + 3000, + ) +}); + +static PYDANTIC: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "pydantic", + repository: "https://github.com/pydantic/pydantic", + commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", + paths: vec![SystemPath::new("pydantic")], + dependencies: vec![ + "annotated-types", + "pydantic-core", + "typing-extensions", + "typing-inspection", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY39, + }, + 1000, + ) +}); + +static SYMPY: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "sympy", + repository: "https://github.com/sympy/sympy", + commit: "22fc107a94eaabc4f6eb31470b39db65abb7a394", + paths: vec![SystemPath::new("sympy")], + dependencies: vec!["mpmath"], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }, + 13000, + ) +}); + +#[bench(args=[&*ALTAIR, &*FREQTRADE, &*PYDANTIC], sample_size=2, sample_count=3)] +fn small(bencher: Bencher, benchmark: &Benchmark) { + bencher + .with_inputs(|| benchmark.setup_iteration()) + .bench_local_refs(|db| { + check_project(db, benchmark.max_diagnostics); + }); +} + +#[bench(args=[&*COLOUR_SCIENCE, &*PANDAS], sample_size=1, sample_count=3)] +fn medium(bencher: Bencher, benchmark: &Benchmark) { + bencher + .with_inputs(|| benchmark.setup_iteration()) + .bench_local_refs(|db| { + check_project(db, benchmark.max_diagnostics); + }); +} + +#[bench(args=[&*SYMPY], sample_size=1, sample_count=2)] +fn large(bencher: Bencher, benchmark: &Benchmark) { + bencher + .with_inputs(|| benchmark.setup_iteration()) + .bench_local_refs(|db| { + check_project(db, benchmark.max_diagnostics); + }); +} + +fn main() { + let filter = + std::env::var("TY_LOG").unwrap_or("ty_walltime=info,ruff_benchmark=info".to_string()); + + let _logging = setup_logging_with_filter(&filter).expect("Filter to be valid"); + + // Disable multithreading for now due to + // https://github.com/salsa-rs/salsa/issues/918. + // + // Salsa has a fast-path for the first db when looking up ingredients. + // It seems that this fast-path becomes extremely slow for all db's other + // than the first one, especially when using multithreading (10x slower than the first run). + ThreadPoolBuilder::new() + .num_threads(1) + .use_current_thread() + .build_global() + .unwrap(); + + divan::main(); +} diff --git a/crates/ruff_benchmark/src/lib.rs b/crates/ruff_benchmark/src/lib.rs index 3ecde5e8f8..34ba0d6364 100644 --- a/crates/ruff_benchmark/src/lib.rs +++ b/crates/ruff_benchmark/src/lib.rs @@ -1,6 +1,8 @@ use std::path::PathBuf; +#[cfg(feature = "instrumented")] pub mod criterion; +pub mod real_world_projects; pub static NUMPY_GLOBALS: TestFile = TestFile::new( "numpy/globals.py", diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs new file mode 100644 index 0000000000..fd8536889a --- /dev/null +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -0,0 +1,392 @@ +#![allow(clippy::print_stderr)] + +//! Infrastructure for benchmarking real-world Python projects. +//! +//! The module uses a setup similar to mypy primer's, which should make it easy +//! to add new benchmarks for projects in [mypy primer's project's list](https://github.com/hauntsaninja/mypy_primer/blob/ebaa9fd27b51a278873b63676fd25490cec6823b/mypy_primer/projects.py#L74). +//! +//! The basic steps for a project are: +//! 1. Clone or update the project into a directory inside `./target`. The commits are pinnted to prevent flaky benchmark results due to new commits. +//! 2. For projects with dependencies, run uv to create a virtual environment and install the dependencies. +//! 3. (optionally) Copy the entire project structure into a memory file system to reduce the IO noise in benchmarks. +//! 4. (not in this module) Create a `ProjectDatabase` and run the benchmark. + +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Instant; + +use anyhow::{Context, Result}; +use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf}; +use ruff_python_ast::PythonVersion; + +/// Configuration for a real-world project to benchmark +#[derive(Debug, Clone)] +pub struct RealWorldProject<'a> { + // The name of the project. + pub name: &'a str, + /// The project's GIT repository. Must be publicly accessible. + pub repository: &'a str, + /// Specific commit hash to checkout + pub commit: &'a str, + /// List of paths within the project to check (`ty check `) + pub paths: Vec<&'a SystemPath>, + /// Dependencies to install via uv + pub dependencies: Vec<&'a str>, + /// Limit candidate packages to those that were uploaded prior to a given point in time (ISO 8601 format). + /// Maps to uv's `exclude-newer`. + pub max_dep_date: &'a str, + /// Python version to use + pub python_version: PythonVersion, +} + +impl<'a> RealWorldProject<'a> { + /// Setup a real-world project for benchmarking + pub fn setup(self) -> Result> { + let start = Instant::now(); + tracing::debug!("Setting up project {}", self.name); + + // Create project directory in cargo target + let project_root = get_project_cache_dir(self.name)?; + + // Clone the repository if it doesn't exist, or update if it does + if project_root.exists() { + tracing::debug!("Updating repository for project '{}'...", self.name); + let start = std::time::Instant::now(); + update_repository(&project_root, self.commit)?; + tracing::debug!( + "Repository update completed in {:.2}s", + start.elapsed().as_secs_f64() + ); + } else { + tracing::debug!("Cloning repository for project '{}'...", self.name); + let start = std::time::Instant::now(); + clone_repository(self.repository, &project_root, self.commit)?; + tracing::debug!( + "Repository clone completed in {:.2}s", + start.elapsed().as_secs_f64() + ); + } + + let checkout = Checkout { + path: project_root, + project: self, + }; + + // Install dependencies if specified + if !checkout.project().dependencies.is_empty() { + tracing::debug!( + "Installing {} dependencies for project '{}'...", + checkout.project().dependencies.len(), + checkout.project().name + ); + let start = std::time::Instant::now(); + install_dependencies(&checkout)?; + tracing::debug!( + "Dependency installation completed in {:.2}s", + start.elapsed().as_secs_f64() + ); + } + + tracing::debug!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); + + Ok(InstalledProject { + path: checkout.path, + config: checkout.project, + }) + } +} + +struct Checkout<'a> { + project: RealWorldProject<'a>, + path: PathBuf, +} + +impl<'a> Checkout<'a> { + /// Get the virtual environment path + fn venv_path(&self) -> PathBuf { + self.path.join(".venv") + } + + fn project(&self) -> &RealWorldProject<'a> { + &self.project + } +} + +/// Checked out project with its dependencies installed. +pub struct InstalledProject<'a> { + /// Path to the cloned project + pub path: PathBuf, + /// Project configuration + pub config: RealWorldProject<'a>, +} + +impl<'a> InstalledProject<'a> { + /// Get the project configuration + pub fn config(&self) -> &RealWorldProject<'a> { + &self.config + } + + /// Get the benchmark paths as `SystemPathBuf` + pub fn check_paths(&self) -> &[&SystemPath] { + &self.config.paths + } + + /// Get the virtual environment path + pub fn venv_path(&self) -> PathBuf { + self.path.join(".venv") + } + + /// Copies the entire project to a memory file system. + pub fn copy_to_memory_fs(&self) -> anyhow::Result { + let fs = MemoryFileSystem::new(); + + copy_directory_recursive(&fs, &self.path, &SystemPathBuf::from("/"))?; + + Ok(fs) + } +} + +/// Get the cache directory for a project in the cargo target directory +fn get_project_cache_dir(project_name: &str) -> Result { + let target_dir = cargo_target_directory() + .cloned() + .unwrap_or_else(|| PathBuf::from("target")); + let target_dir = + std::path::absolute(target_dir).context("Failed to construct an absolute path")?; + let cache_dir = target_dir.join("benchmark_cache").join(project_name); + + if let Some(parent) = cache_dir.parent() { + std::fs::create_dir_all(parent).context("Failed to create cache directory")?; + } + + Ok(cache_dir) +} + +/// Update an existing repository +fn update_repository(project_root: &Path, commit: &str) -> Result<()> { + let output = Command::new("git") + .args(["fetch", "origin", commit]) + .current_dir(project_root) + .output() + .context("Failed to execute git fetch command")?; + + if !output.status.success() { + anyhow::bail!( + "Git fetch of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); + } + + // Checkout specific commit + let output = Command::new("git") + .args(["checkout", commit]) + .current_dir(project_root) + .output() + .context("Failed to execute git checkout command")?; + + anyhow::ensure!( + output.status.success(), + "Git checkout of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); + + Ok(()) +} + +/// Clone a git repository to the specified directory +fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<()> { + // Create parent directory if it doesn't exist + if let Some(parent) = target_dir.parent() { + std::fs::create_dir_all(parent).context("Failed to create parent directory for clone")?; + } + + // Clone with minimal depth and fetch only the specific commit + let output = Command::new("git") + .args([ + "clone", + "--filter=blob:none", // Don't download large files initially + "--no-checkout", // Don't checkout files yet + repo_url, + target_dir.to_str().unwrap(), + ]) + .output() + .context("Failed to execute git clone command")?; + + anyhow::ensure!( + output.status.success(), + "Git clone failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Fetch the specific commit + let output = Command::new("git") + .args(["fetch", "origin", commit]) + .current_dir(target_dir) + .output() + .context("Failed to execute git fetch command")?; + + anyhow::ensure!( + output.status.success(), + "Git fetch of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); + + // Checkout the specific commit + let output = Command::new("git") + .args(["checkout", commit]) + .current_dir(target_dir) + .output() + .context("Failed to execute git checkout command")?; + + anyhow::ensure!( + output.status.success(), + "Git checkout of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); + + Ok(()) +} + +/// Install dependencies using uv with date constraints +fn install_dependencies(checkout: &Checkout) -> Result<()> { + // Check if uv is available + let uv_check = Command::new("uv") + .arg("--version") + .output() + .context("Failed to execute uv version check.")?; + + if !uv_check.status.success() { + anyhow::bail!( + "uv is not installed or not found in PATH. If you need to install it, follow the instructions at https://docs.astral.sh/uv/getting-started/installation/" + ); + } + + let venv_path = checkout.venv_path(); + let python_version_str = checkout.project().python_version.to_string(); + + let output = Command::new("uv") + .args(["venv", "--python", &python_version_str, "--allow-existing"]) + .arg(&venv_path) + .output() + .context("Failed to execute uv venv command")?; + + anyhow::ensure!( + output.status.success(), + "Failed to create virtual environment: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Install dependencies with date constraint in the isolated environment + let mut cmd = Command::new("uv"); + cmd.args([ + "pip", + "install", + "--python", + venv_path.to_str().unwrap(), + "--exclude-newer", + checkout.project().max_dep_date, + ]) + .args(&checkout.project().dependencies); + + let output = cmd + .output() + .context("Failed to execute uv pip install command")?; + + anyhow::ensure!( + output.status.success(), + "Dependency installation failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + Ok(()) +} + +/// Recursively load a directory into the memory filesystem +fn copy_directory_recursive( + fs: &MemoryFileSystem, + source_path: &Path, + dest_path: &SystemPath, +) -> Result<()> { + if source_path.is_file() { + if source_path.file_name().and_then(OsStr::to_str) == Some("pyvenv.cfg") { + // Skip pyvenv.cfg files because the Python path will be invalid. + return Ok(()); + } + + match std::fs::read_to_string(source_path) { + Ok(content) => { + fs.write_file_all(dest_path.to_path_buf(), content) + .with_context(|| { + format!("Failed to write file to memory filesystem: {dest_path}") + })?; + } + Err(error) => { + if error.kind() == std::io::ErrorKind::InvalidData { + // Skip binary files. + return Ok(()); + } + return Err(error) + .with_context(|| format!("Failed to read file: {}", source_path.display())); + } + } + } else if source_path.is_dir() { + // Create directory in memory fs + fs.create_directory_all(dest_path.to_path_buf()) + .with_context(|| { + format!("Failed to create directory in memory filesystem: {dest_path}") + })?; + + // Read directory contents + let entries = std::fs::read_dir(source_path) + .with_context(|| format!("Failed to read directory: {}", source_path.display()))?; + + for entry in entries { + let entry = entry.with_context(|| { + format!("Failed to read directory entry: {}", source_path.display()) + })?; + + let file_name = entry.file_name(); + let file_name = file_name.to_str().context("Expected UTF8 path")?; + let source_child = source_path.join(file_name); + let dest_child = dest_path.join(file_name); + + // Skip hidden files and common non-Python directories + if file_name != ".venv" && (file_name.starts_with('.') || matches!(file_name, ".git")) { + continue; + } + + copy_directory_recursive(fs, &source_child, &dest_child)?; + } + } + + Ok(()) +} + +static CARGO_TARGET_DIR: std::sync::OnceLock> = std::sync::OnceLock::new(); + +fn cargo_target_directory() -> Option<&'static PathBuf> { + CARGO_TARGET_DIR + .get_or_init(|| { + #[derive(serde::Deserialize)] + struct Metadata { + target_directory: PathBuf, + } + + std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .or_else(|| { + let output = Command::new(std::env::var_os("CARGO")?) + .args(["metadata", "--format-version", "1"]) + .output() + .ok()?; + let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?; + Some(metadata.target_directory) + }) + }) + .as_ref() +} diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs index dec4500c5d..6172e1f484 100644 --- a/crates/ruff_db/src/lib.rs +++ b/crates/ruff_db/src/lib.rs @@ -18,6 +18,12 @@ pub mod system; pub mod testing; pub mod vendored; +#[cfg(not(target_arch = "wasm32"))] +pub use std::time::{Instant, SystemTime, SystemTimeError}; + +#[cfg(target_arch = "wasm32")] +pub use web_time::{Instant, SystemTime, SystemTimeError}; + pub type FxDashMap = dashmap::DashMap>; pub type FxDashSet = dashmap::DashSet>; diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index 943469127c..cfdf204bb0 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -280,6 +280,13 @@ impl InMemorySystem { } } + pub fn from_memory_fs(memory_fs: MemoryFileSystem) -> Self { + Self { + user_config_directory: Mutex::new(None), + memory_fs, + } + } + pub fn fs(&self) -> &MemoryFileSystem { &self.memory_fs } diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs index 924785ca2d..f869b06bab 100644 --- a/crates/ty_project/src/lib.rs +++ b/crates/ty_project/src/lib.rs @@ -237,6 +237,7 @@ impl Project { .map(IOErrorDiagnostic::to_diagnostic), ); + let check_start = ruff_db::Instant::now(); let file_diagnostics = std::sync::Mutex::new(vec![]); { @@ -262,6 +263,11 @@ impl Project { }); } + tracing::debug!( + "Checking all files took {:.3}s", + check_start.elapsed().as_secs_f64(), + ); + let mut file_diagnostics = file_diagnostics.into_inner().unwrap(); file_diagnostics.sort_by(|left, right| { left.rendering_sort_key(db) @@ -442,11 +448,16 @@ impl Project { let _entered = tracing::debug_span!("Project::index_files", project = %self.name(db)) .entered(); + let start = ruff_db::Instant::now(); let walker = ProjectFilesWalker::new(db); let (files, diagnostics) = walker.collect_set(db); - tracing::info!("Indexed {} file(s)", files.len()); + tracing::info!( + "Indexed {} file(s) in {:.3}s", + files.len(), + start.elapsed().as_secs_f64() + ); vacant.set(files, diagnostics) } Index::Indexed(indexed) => indexed,