[ty] Add more benchmarks (#18714)

2025-07-07 21:25:08 +00:00 · 2025-06-18 13:41:38 +02:00 · 2025-06-18 13:41:38 +02:00 · 23261a38a0
commit 23261a38a0
parent 37fdece72f
12 changed files with 959 additions and 17 deletions
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -9,3 +9,4 @@ self-hosted-runner:
    - depot-ubuntu-22.04-32
    - github-windows-2025-x86_64-8
    - github-windows-2025-x86_64-16
+    - codspeed-macro
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -893,7 +893,7 @@ jobs:
        run: npm run fmt:check
        working-directory: playground

-  benchmarks:
+  benchmarks-instrumented:
    runs-on: ubuntu-24.04
    needs: determine_changes
    if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main') }}
@ -905,6 +905,7 @@ jobs:
          persist-credentials: false

      - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
+      - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0

      - name: "Install Rust toolchain"
        run: rustup show
@ -915,7 +916,40 @@ jobs:
          tool: cargo-codspeed

      - name: "Build benchmarks"
-        run: cargo codspeed build --features codspeed -p ruff_benchmark
+        run: cargo codspeed build --features "codspeed,instrumented" --no-default-features -p ruff_benchmark
+
+      - name: "Run benchmarks"
+        uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0
+        with:
+          run: cargo codspeed run
+          token: ${{ secrets.CODSPEED_TOKEN }}
+
+  benchmarks-walltime:
+    runs-on: codspeed-macro
+    needs: determine_changes
+    if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.ty == 'true' || github.ref == 'refs/heads/main') }}
+    timeout-minutes: 20
+    env:
+      TY_LOG: ruff_benchmark=debug
+    steps:
+      - name: "Checkout Branch"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
+      - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0
+
+      - name: "Install Rust toolchain"
+        run: rustup show
+
+      - name: "Install codspeed"
+        uses: taiki-e/install-action@735e5933943122c5ac182670a935f54a949265c1 # v2.52.4
+        with:
+          tool: cargo-codspeed
+
+      - name: "Build benchmarks"
+        run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark

      - name: "Run benchmarks"
        uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0
--- a/Cargo.lock
+++ b/Cargo.lock
@ -479,6 +479,46 @@ dependencies = [
 "walkdir",
 ]

+[[package]]
+name = "codspeed-divan-compat"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8620a09dfaf37b3c45f982c4b65bd8f9b0203944da3ffa705c0fcae6b84655ff"
+dependencies = [
+ "codspeed",
+ "codspeed-divan-compat-macros",
+ "codspeed-divan-compat-walltime",
+]
+
+[[package]]
+name = "codspeed-divan-compat-macros"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30fe872bc4214626b35d3a1706a905d0243503bb6ba3bb7be2fc59083d5d680c"
+dependencies = [
+ "divan-macros",
+ "itertools 0.14.0",
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "codspeed-divan-compat-walltime"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "104caa97b36d4092d89e24e4b103b40ede1edab03c0372d19e14a33f9393132b"
+dependencies = [
+ "cfg-if",
+ "clap",
+ "codspeed",
+ "condtype",
+ "divan-macros",
+ "libc",
+ "regex-lite",
+]
+
 [[package]]
 name = "colorchoice"
 version = "1.0.3"
@ -519,6 +559,12 @@ dependencies = [
 "static_assertions",
 ]

+[[package]]
+name = "condtype"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
+
 [[package]]
 name = "console"
 version = "0.15.11"
@ -837,6 +883,17 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "divan-macros"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "doc-comment"
 version = "0.3.3"
@ -2272,6 +2329,15 @@ dependencies = [
 "yansi",
 ]

+[[package]]
+name = "proc-macro-crate"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
+dependencies = [
+ "toml_edit",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.95"
@ -2485,6 +2551,12 @@ dependencies = [
 "regex-syntax 0.8.5",
 ]

+[[package]]
+name = "regex-lite"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a"
+
 [[package]]
 name = "regex-syntax"
 version = "0.6.29"
@ -2586,7 +2658,9 @@ dependencies = [
 name = "ruff_benchmark"
 version = "0.0.0"
 dependencies = [
+ "anyhow",
 "codspeed-criterion-compat",
+ "codspeed-divan-compat",
 "criterion",
 "mimalloc",
 "rayon",
@ -2597,7 +2671,10 @@ dependencies = [
 "ruff_python_parser",
 "ruff_python_trivia",
 "rustc-hash 2.1.1",
+ "serde",
+ "serde_json",
 "tikv-jemallocator",
+ "tracing",
 "ty_project",
 ]

--- a/Cargo.toml
+++ b/Cargo.toml
@ -62,6 +62,7 @@ camino = { version = "1.1.7" }
 clap = { version = "4.5.3", features = ["derive"] }
 clap_complete_command = { version = "0.6.0" }
 clearscreen = { version = "4.0.0" }
+divan = { package = "codspeed-divan-compat", version = "2.10.1" }
 codspeed-criterion-compat = { version = "2.6.0", default-features = false }
 colored = { version = "3.0.0" }
 console_error_panic_hook = { version = "0.1.7" }
--- a/crates/ruff_benchmark/Cargo.toml
+++ b/crates/ruff_benchmark/Cargo.toml
@ -19,43 +19,69 @@ doctest = false
 [[bench]]
 name = "linter"
 harness = false
+required-features = ["instrumented"]

 [[bench]]
 name = "lexer"
 harness = false
+required-features = ["instrumented"]

 [[bench]]
 name = "parser"
 harness = false
+required-features = ["instrumented"]

 [[bench]]
 name = "formatter"
 harness = false
+required-features = ["instrumented"]

 [[bench]]
 name = "ty"
 harness = false
+required-features = ["instrumented"]
+
+[[bench]]
+name = "ty_walltime"
+harness = false
+required-features = ["walltime"]

 [dependencies]
+ruff_db = { workspace = true, features = ["testing"] }
+ruff_python_ast = { workspace = true }
+ruff_linter = { workspace = true, optional = true }
+ruff_python_formatter = { workspace = true, optional = true }
+ruff_python_parser = { workspace = true, optional = true }
+ruff_python_trivia = { workspace = true, optional = true }
+ty_project = { workspace = true, optional = true }
+
+divan = { workspace = true, optional = true }
+anyhow = { workspace = true }
 codspeed-criterion-compat = { workspace = true, default-features = false, optional = true }
-criterion = { workspace = true, default-features = false }
+criterion = { workspace = true, default-features = false, optional = true }
 rayon = { workspace = true }
 rustc-hash = { workspace = true }
-
-[dev-dependencies]
-ruff_db = { workspace = true }
-ruff_linter = { workspace = true }
-ruff_python_ast = { workspace = true }
-ruff_python_formatter = { workspace = true }
-ruff_python_parser = { workspace = true }
-ruff_python_trivia = { workspace = true }
-ty_project = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+tracing = { workspace = true }

 [lints]
 workspace = true

 [features]
+default = ["instrumented", "walltime"]
+# Enables the benchmark that should only run with codspeed's instrumented runner
+instrumented = [
+    "criterion",
+    "ruff_linter",
+    "ruff_python_formatter",
+    "ruff_python_parser",
+    "ruff_python_trivia",
+    "ty_project",
+]
 codspeed = ["codspeed-criterion-compat"]
+# Enables benchmark that should only run with codspeed's walltime runner.
+walltime = ["ruff_db/os", "ty_project", "divan"]

 [target.'cfg(target_os = "windows")'.dev-dependencies]
 mimalloc = { workspace = true }
--- a/crates/ruff_benchmark/benches/ty.rs
+++ b/crates/ruff_benchmark/benches/ty.rs
@ -1,5 +1,6 @@
 #![allow(clippy::disallowed_names)]
 use ruff_benchmark::criterion;
+use ruff_benchmark::real_world_projects::{InstalledProject, RealWorldProject};

 use std::ops::Range;

@ -11,10 +12,10 @@ use ruff_benchmark::TestFile;
 use ruff_db::diagnostic::{Diagnostic, DiagnosticId, Severity};
 use ruff_db::files::{File, system_path_to_file};
 use ruff_db::source::source_text;
-use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem};
+use ruff_db::system::{InMemorySystem, MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem};
 use ruff_python_ast::PythonVersion;
 use ty_project::metadata::options::{EnvironmentOptions, Options};
-use ty_project::metadata::value::RangedValue;
+use ty_project::metadata::value::{RangedValue, RelativePathBuf};
 use ty_project::watch::{ChangeEvent, ChangedKind};
 use ty_project::{Db, ProjectDatabase, ProjectMetadata};

@ -347,10 +348,141 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) {
    });
 }

+struct ProjectBenchmark<'a> {
+    project: InstalledProject<'a>,
+    fs: MemoryFileSystem,
+    max_diagnostics: usize,
+}
+
+impl<'a> ProjectBenchmark<'a> {
+    fn new(project: RealWorldProject<'a>, max_diagnostics: usize) -> Self {
+        let setup_project = project.setup().expect("Failed to setup project");
+        let fs = setup_project
+            .copy_to_memory_fs()
+            .expect("Failed to copy project to memory fs");
+
+        Self {
+            project: setup_project,
+            fs,
+            max_diagnostics,
+        }
+    }
+
+    fn setup_iteration(&self) -> ProjectDatabase {
+        let system = TestSystem::new(InMemorySystem::from_memory_fs(self.fs.clone()));
+
+        let src_root = SystemPath::new("/");
+        let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap();
+
+        metadata.apply_options(Options {
+            environment: Some(EnvironmentOptions {
+                python_version: Some(RangedValue::cli(self.project.config.python_version)),
+                python: (!self.project.config().dependencies.is_empty())
+                    .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))),
+                ..EnvironmentOptions::default()
+            }),
+            ..Options::default()
+        });
+
+        let mut db = ProjectDatabase::new(metadata, system).unwrap();
+
+        db.project().set_included_paths(
+            &mut db,
+            self.project
+                .check_paths()
+                .iter()
+                .map(|path| path.to_path_buf())
+                .collect(),
+        );
+
+        db
+    }
+}
+
+#[track_caller]
+fn bench_project(benchmark: &ProjectBenchmark, criterion: &mut Criterion) {
+    fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) {
+        let result = db.check();
+        let diagnostics = result.len();
+
+        assert!(
+            diagnostics > 1 && diagnostics <= max_diagnostics,
+            "Expected between {} and {} diagnostics but got {}",
+            1,
+            max_diagnostics,
+            diagnostics
+        );
+    }
+
+    setup_rayon();
+
+    let mut group = criterion.benchmark_group("project");
+    group.sampling_mode(criterion::SamplingMode::Flat);
+    group.bench_function(benchmark.project.config.name, |b| {
+        b.iter_batched_ref(
+            || benchmark.setup_iteration(),
+            |db| check_project(db, benchmark.max_diagnostics),
+            BatchSize::SmallInput,
+        );
+    });
+}
+
+fn hydra(criterion: &mut Criterion) {
+    let benchmark = ProjectBenchmark::new(
+        RealWorldProject {
+            name: "hydra-zen",
+            repository: "https://github.com/mit-ll-responsible-ai/hydra-zen",
+            commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8",
+            paths: vec![SystemPath::new("src")],
+            dependencies: vec!["pydantic", "beartype", "hydra-core"],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY313,
+        },
+        100,
+    );
+
+    bench_project(&benchmark, criterion);
+}
+
+fn attrs(criterion: &mut Criterion) {
+    let benchmark = ProjectBenchmark::new(
+        RealWorldProject {
+            name: "attrs",
+            repository: "https://github.com/python-attrs/attrs",
+            commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b",
+            paths: vec![SystemPath::new("src")],
+            dependencies: vec![],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY313,
+        },
+        100,
+    );
+
+    bench_project(&benchmark, criterion);
+}
+
+fn anyio(criterion: &mut Criterion) {
+    let benchmark = ProjectBenchmark::new(
+        RealWorldProject {
+            name: "anyio",
+            repository: "https://github.com/agronholm/anyio",
+            commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be",
+            paths: vec![SystemPath::new("src")],
+            dependencies: vec![],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY313,
+        },
+        100,
+    );
+
+    bench_project(&benchmark, criterion);
+}
+
 criterion_group!(check_file, benchmark_cold, benchmark_incremental);
 criterion_group!(
    micro,
    benchmark_many_string_assignments,
-    benchmark_many_tuple_assignments
+    benchmark_many_tuple_assignments,
 );
-criterion_main!(check_file, micro);
+criterion_group!(project, anyio, attrs, hydra);
+criterion_main!(check_file, micro, project);
--- a/crates/ruff_benchmark/benches/ty_walltime.rs
+++ b/crates/ruff_benchmark/benches/ty_walltime.rs
@ -0,0 +1,253 @@
+use std::fmt::{Display, Formatter};
+
+use divan::{Bencher, bench};
+
+use rayon::ThreadPoolBuilder;
+use ruff_benchmark::real_world_projects::{InstalledProject, RealWorldProject};
+use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf};
+
+use ruff_db::testing::setup_logging_with_filter;
+use ruff_python_ast::PythonVersion;
+use ty_project::metadata::options::{EnvironmentOptions, Options};
+use ty_project::metadata::value::{RangedValue, RelativePathBuf};
+use ty_project::{Db, ProjectDatabase, ProjectMetadata};
+
+struct Benchmark<'a> {
+    project: InstalledProject<'a>,
+    max_diagnostics: usize,
+}
+
+impl<'a> Benchmark<'a> {
+    fn new(project: RealWorldProject<'a>, max_diagnostics: usize) -> Self {
+        let setup_project = project.setup().expect("Failed to setup project");
+
+        Self {
+            project: setup_project,
+            max_diagnostics,
+        }
+    }
+
+    fn setup_iteration(&self) -> ProjectDatabase {
+        let root = SystemPathBuf::from_path_buf(self.project.path.clone()).unwrap();
+        let system = OsSystem::new(&root);
+
+        let mut metadata = ProjectMetadata::discover(&root, &system).unwrap();
+
+        metadata.apply_options(Options {
+            environment: Some(EnvironmentOptions {
+                python_version: Some(RangedValue::cli(self.project.config.python_version)),
+                python: (!self.project.config().dependencies.is_empty())
+                    .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))),
+                ..EnvironmentOptions::default()
+            }),
+            ..Options::default()
+        });
+
+        let mut db = ProjectDatabase::new(metadata, system).unwrap();
+
+        db.project().set_included_paths(
+            &mut db,
+            self.project
+                .check_paths()
+                .iter()
+                .map(|path| SystemPath::absolute(path, &root))
+                .collect(),
+        );
+        db
+    }
+}
+
+impl Display for Benchmark<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.project.config.name)
+    }
+}
+
+fn check_project(db: &ProjectDatabase, max_diagnostics: usize) {
+    let result = db.check();
+    let diagnostics = result.len();
+
+    assert!(
+        diagnostics > 1 && diagnostics <= max_diagnostics,
+        "Expected between {} and {} diagnostics but got {}",
+        1,
+        max_diagnostics,
+        diagnostics
+    );
+}
+
+static ALTAIR: std::sync::LazyLock<Benchmark<'static>> = std::sync::LazyLock::new(|| {
+    Benchmark::new(
+        RealWorldProject {
+            name: "altair",
+            repository: "https://github.com/vega/altair",
+            commit: "d1f4a1ef89006e5f6752ef1f6df4b7a509336fba",
+            paths: vec![SystemPath::new("altair")],
+            dependencies: vec![
+                "jinja2",
+                "narwhals",
+                "numpy",
+                "packaging",
+                "pandas-stubs",
+                "pyarrow-stubs",
+                "pytest",
+                "scipy-stubs",
+                "types-jsonschema",
+            ],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY312,
+        },
+        1000,
+    )
+});
+
+static COLOUR_SCIENCE: std::sync::LazyLock<Benchmark<'static>> = std::sync::LazyLock::new(|| {
+    Benchmark::new(
+        RealWorldProject {
+            name: "colour-science",
+            repository: "https://github.com/colour-science/colour",
+            commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757",
+            paths: vec![SystemPath::new("colour")],
+            dependencies: vec![
+                "matplotlib",
+                "numpy",
+                "pandas-stubs",
+                "pytest",
+                "scipy-stubs",
+            ],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY310,
+        },
+        477,
+    )
+});
+
+static FREQTRADE: std::sync::LazyLock<Benchmark<'static>> = std::sync::LazyLock::new(|| {
+    Benchmark::new(
+        RealWorldProject {
+            name: "freqtrade",
+            repository: "https://github.com/freqtrade/freqtrade",
+            commit: "2d842ea129e56575852ee0c45383c8c3f706be19",
+            paths: vec![SystemPath::new("freqtrade")],
+            dependencies: vec![
+                "numpy",
+                "pandas-stubs",
+                "pydantic",
+                "sqlalchemy",
+                "types-cachetools",
+                "types-filelock",
+                "types-python-dateutil",
+                "types-requests",
+                "types-tabulate",
+            ],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY312,
+        },
+        400,
+    )
+});
+
+static PANDAS: std::sync::LazyLock<Benchmark<'static>> = std::sync::LazyLock::new(|| {
+    Benchmark::new(
+        RealWorldProject {
+            name: "pandas",
+            repository: "https://github.com/pandas-dev/pandas",
+            commit: "5909621e2267eb67943a95ef5e895e8484c53432",
+            paths: vec![SystemPath::new("pandas")],
+            dependencies: vec![
+                "numpy",
+                "types-python-dateutil",
+                "types-pytz",
+                "types-PyMySQL",
+                "types-setuptools",
+                "pytest",
+            ],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY312,
+        },
+        3000,
+    )
+});
+
+static PYDANTIC: std::sync::LazyLock<Benchmark<'static>> = std::sync::LazyLock::new(|| {
+    Benchmark::new(
+        RealWorldProject {
+            name: "pydantic",
+            repository: "https://github.com/pydantic/pydantic",
+            commit: "0c4a22b64b23dfad27387750cf07487efc45eb05",
+            paths: vec![SystemPath::new("pydantic")],
+            dependencies: vec![
+                "annotated-types",
+                "pydantic-core",
+                "typing-extensions",
+                "typing-inspection",
+            ],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY39,
+        },
+        1000,
+    )
+});
+
+static SYMPY: std::sync::LazyLock<Benchmark<'static>> = std::sync::LazyLock::new(|| {
+    Benchmark::new(
+        RealWorldProject {
+            name: "sympy",
+            repository: "https://github.com/sympy/sympy",
+            commit: "22fc107a94eaabc4f6eb31470b39db65abb7a394",
+            paths: vec![SystemPath::new("sympy")],
+            dependencies: vec!["mpmath"],
+            max_dep_date: "2025-06-17",
+            python_version: PythonVersion::PY312,
+        },
+        13000,
+    )
+});
+
+#[bench(args=[&*ALTAIR, &*FREQTRADE, &*PYDANTIC], sample_size=2, sample_count=3)]
+fn small(bencher: Bencher, benchmark: &Benchmark) {
+    bencher
+        .with_inputs(|| benchmark.setup_iteration())
+        .bench_local_refs(|db| {
+            check_project(db, benchmark.max_diagnostics);
+        });
+}
+
+#[bench(args=[&*COLOUR_SCIENCE, &*PANDAS], sample_size=1, sample_count=3)]
+fn medium(bencher: Bencher, benchmark: &Benchmark) {
+    bencher
+        .with_inputs(|| benchmark.setup_iteration())
+        .bench_local_refs(|db| {
+            check_project(db, benchmark.max_diagnostics);
+        });
+}
+
+#[bench(args=[&*SYMPY], sample_size=1, sample_count=2)]
+fn large(bencher: Bencher, benchmark: &Benchmark) {
+    bencher
+        .with_inputs(|| benchmark.setup_iteration())
+        .bench_local_refs(|db| {
+            check_project(db, benchmark.max_diagnostics);
+        });
+}
+
+fn main() {
+    let filter =
+        std::env::var("TY_LOG").unwrap_or("ty_walltime=info,ruff_benchmark=info".to_string());
+
+    let _logging = setup_logging_with_filter(&filter).expect("Filter to be valid");
+
+    // Disable multithreading for now due to
+    // https://github.com/salsa-rs/salsa/issues/918.
+    //
+    // Salsa has a fast-path for the first db when looking up ingredients.
+    // It seems that this fast-path becomes extremely slow for all db's other
+    // than the first one, especially when using multithreading (10x slower than the first run).
+    ThreadPoolBuilder::new()
+        .num_threads(1)
+        .use_current_thread()
+        .build_global()
+        .unwrap();
+
+    divan::main();
+}
--- a/crates/ruff_benchmark/src/lib.rs
+++ b/crates/ruff_benchmark/src/lib.rs
@ -1,6 +1,8 @@
 use std::path::PathBuf;

+#[cfg(feature = "instrumented")]
 pub mod criterion;
+pub mod real_world_projects;

 pub static NUMPY_GLOBALS: TestFile = TestFile::new(
    "numpy/globals.py",
--- a/crates/ruff_benchmark/src/real_world_projects.rs
+++ b/crates/ruff_benchmark/src/real_world_projects.rs
@ -0,0 +1,392 @@
+#![allow(clippy::print_stderr)]
+
+//! Infrastructure for benchmarking real-world Python projects.
+//!
+//! The module uses a setup similar to mypy primer's, which should make it easy
+//! to add new benchmarks for projects in [mypy primer's project's list](https://github.com/hauntsaninja/mypy_primer/blob/ebaa9fd27b51a278873b63676fd25490cec6823b/mypy_primer/projects.py#L74).
+//!
+//! The basic steps for a project are:
+//! 1. Clone or update the project into a directory inside `./target`. The commits are pinnted to prevent flaky benchmark results due to new commits.
+//! 2. For projects with dependencies, run uv to create a virtual environment and install the dependencies.
+//! 3. (optionally) Copy the entire project structure into a memory file system to reduce the IO noise in benchmarks.
+//! 4. (not in this module) Create a `ProjectDatabase` and run the benchmark.
+
+use std::ffi::OsStr;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::time::Instant;
+
+use anyhow::{Context, Result};
+use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf};
+use ruff_python_ast::PythonVersion;
+
+/// Configuration for a real-world project to benchmark
+#[derive(Debug, Clone)]
+pub struct RealWorldProject<'a> {
+    // The name of the project.
+    pub name: &'a str,
+    /// The project's GIT repository. Must be publicly accessible.
+    pub repository: &'a str,
+    /// Specific commit hash to checkout
+    pub commit: &'a str,
+    /// List of paths within the project to check (`ty check <paths>`)
+    pub paths: Vec<&'a SystemPath>,
+    /// Dependencies to install via uv
+    pub dependencies: Vec<&'a str>,
+    /// Limit candidate packages to those that were uploaded prior to a given point in time (ISO 8601 format).
+    /// Maps to uv's `exclude-newer`.
+    pub max_dep_date: &'a str,
+    /// Python version to use
+    pub python_version: PythonVersion,
+}
+
+impl<'a> RealWorldProject<'a> {
+    /// Setup a real-world project for benchmarking
+    pub fn setup(self) -> Result<InstalledProject<'a>> {
+        let start = Instant::now();
+        tracing::debug!("Setting up project {}", self.name);
+
+        // Create project directory in cargo target
+        let project_root = get_project_cache_dir(self.name)?;
+
+        // Clone the repository if it doesn't exist, or update if it does
+        if project_root.exists() {
+            tracing::debug!("Updating repository for project '{}'...", self.name);
+            let start = std::time::Instant::now();
+            update_repository(&project_root, self.commit)?;
+            tracing::debug!(
+                "Repository update completed in {:.2}s",
+                start.elapsed().as_secs_f64()
+            );
+        } else {
+            tracing::debug!("Cloning repository for project '{}'...", self.name);
+            let start = std::time::Instant::now();
+            clone_repository(self.repository, &project_root, self.commit)?;
+            tracing::debug!(
+                "Repository clone completed in {:.2}s",
+                start.elapsed().as_secs_f64()
+            );
+        }
+
+        let checkout = Checkout {
+            path: project_root,
+            project: self,
+        };
+
+        // Install dependencies if specified
+        if !checkout.project().dependencies.is_empty() {
+            tracing::debug!(
+                "Installing {} dependencies for project '{}'...",
+                checkout.project().dependencies.len(),
+                checkout.project().name
+            );
+            let start = std::time::Instant::now();
+            install_dependencies(&checkout)?;
+            tracing::debug!(
+                "Dependency installation completed in {:.2}s",
+                start.elapsed().as_secs_f64()
+            );
+        }
+
+        tracing::debug!("Project setup took: {:.2}s", start.elapsed().as_secs_f64());
+
+        Ok(InstalledProject {
+            path: checkout.path,
+            config: checkout.project,
+        })
+    }
+}
+
+struct Checkout<'a> {
+    project: RealWorldProject<'a>,
+    path: PathBuf,
+}
+
+impl<'a> Checkout<'a> {
+    /// Get the virtual environment path
+    fn venv_path(&self) -> PathBuf {
+        self.path.join(".venv")
+    }
+
+    fn project(&self) -> &RealWorldProject<'a> {
+        &self.project
+    }
+}
+
+/// Checked out project with its dependencies installed.
+pub struct InstalledProject<'a> {
+    /// Path to the cloned project
+    pub path: PathBuf,
+    /// Project configuration
+    pub config: RealWorldProject<'a>,
+}
+
+impl<'a> InstalledProject<'a> {
+    /// Get the project configuration
+    pub fn config(&self) -> &RealWorldProject<'a> {
+        &self.config
+    }
+
+    /// Get the benchmark paths as `SystemPathBuf`
+    pub fn check_paths(&self) -> &[&SystemPath] {
+        &self.config.paths
+    }
+
+    /// Get the virtual environment path
+    pub fn venv_path(&self) -> PathBuf {
+        self.path.join(".venv")
+    }
+
+    /// Copies the entire project to a memory file system.
+    pub fn copy_to_memory_fs(&self) -> anyhow::Result<MemoryFileSystem> {
+        let fs = MemoryFileSystem::new();
+
+        copy_directory_recursive(&fs, &self.path, &SystemPathBuf::from("/"))?;
+
+        Ok(fs)
+    }
+}
+
+/// Get the cache directory for a project in the cargo target directory
+fn get_project_cache_dir(project_name: &str) -> Result<std::path::PathBuf> {
+    let target_dir = cargo_target_directory()
+        .cloned()
+        .unwrap_or_else(|| PathBuf::from("target"));
+    let target_dir =
+        std::path::absolute(target_dir).context("Failed to construct an absolute path")?;
+    let cache_dir = target_dir.join("benchmark_cache").join(project_name);
+
+    if let Some(parent) = cache_dir.parent() {
+        std::fs::create_dir_all(parent).context("Failed to create cache directory")?;
+    }
+
+    Ok(cache_dir)
+}
+
+/// Update an existing repository
+fn update_repository(project_root: &Path, commit: &str) -> Result<()> {
+    let output = Command::new("git")
+        .args(["fetch", "origin", commit])
+        .current_dir(project_root)
+        .output()
+        .context("Failed to execute git fetch command")?;
+
+    if !output.status.success() {
+        anyhow::bail!(
+            "Git fetch of commit {} failed: {}",
+            commit,
+            String::from_utf8_lossy(&output.stderr)
+        );
+    }
+
+    // Checkout specific commit
+    let output = Command::new("git")
+        .args(["checkout", commit])
+        .current_dir(project_root)
+        .output()
+        .context("Failed to execute git checkout command")?;
+
+    anyhow::ensure!(
+        output.status.success(),
+        "Git checkout of commit {} failed: {}",
+        commit,
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    Ok(())
+}
+
+/// Clone a git repository to the specified directory
+fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<()> {
+    // Create parent directory if it doesn't exist
+    if let Some(parent) = target_dir.parent() {
+        std::fs::create_dir_all(parent).context("Failed to create parent directory for clone")?;
+    }
+
+    // Clone with minimal depth and fetch only the specific commit
+    let output = Command::new("git")
+        .args([
+            "clone",
+            "--filter=blob:none", // Don't download large files initially
+            "--no-checkout",      // Don't checkout files yet
+            repo_url,
+            target_dir.to_str().unwrap(),
+        ])
+        .output()
+        .context("Failed to execute git clone command")?;
+
+    anyhow::ensure!(
+        output.status.success(),
+        "Git clone failed: {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    // Fetch the specific commit
+    let output = Command::new("git")
+        .args(["fetch", "origin", commit])
+        .current_dir(target_dir)
+        .output()
+        .context("Failed to execute git fetch command")?;
+
+    anyhow::ensure!(
+        output.status.success(),
+        "Git fetch of commit {} failed: {}",
+        commit,
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    // Checkout the specific commit
+    let output = Command::new("git")
+        .args(["checkout", commit])
+        .current_dir(target_dir)
+        .output()
+        .context("Failed to execute git checkout command")?;
+
+    anyhow::ensure!(
+        output.status.success(),
+        "Git checkout of commit {} failed: {}",
+        commit,
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    Ok(())
+}
+
+/// Install dependencies using uv with date constraints
+fn install_dependencies(checkout: &Checkout) -> Result<()> {
+    // Check if uv is available
+    let uv_check = Command::new("uv")
+        .arg("--version")
+        .output()
+        .context("Failed to execute uv version check.")?;
+
+    if !uv_check.status.success() {
+        anyhow::bail!(
+            "uv is not installed or not found in PATH. If you need to install it, follow the instructions at https://docs.astral.sh/uv/getting-started/installation/"
+        );
+    }
+
+    let venv_path = checkout.venv_path();
+    let python_version_str = checkout.project().python_version.to_string();
+
+    let output = Command::new("uv")
+        .args(["venv", "--python", &python_version_str, "--allow-existing"])
+        .arg(&venv_path)
+        .output()
+        .context("Failed to execute uv venv command")?;
+
+    anyhow::ensure!(
+        output.status.success(),
+        "Failed to create virtual environment: {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    // Install dependencies with date constraint in the isolated environment
+    let mut cmd = Command::new("uv");
+    cmd.args([
+        "pip",
+        "install",
+        "--python",
+        venv_path.to_str().unwrap(),
+        "--exclude-newer",
+        checkout.project().max_dep_date,
+    ])
+    .args(&checkout.project().dependencies);
+
+    let output = cmd
+        .output()
+        .context("Failed to execute uv pip install command")?;
+
+    anyhow::ensure!(
+        output.status.success(),
+        "Dependency installation failed: {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    Ok(())
+}
+
+/// Recursively load a directory into the memory filesystem
+fn copy_directory_recursive(
+    fs: &MemoryFileSystem,
+    source_path: &Path,
+    dest_path: &SystemPath,
+) -> Result<()> {
+    if source_path.is_file() {
+        if source_path.file_name().and_then(OsStr::to_str) == Some("pyvenv.cfg") {
+            // Skip pyvenv.cfg files because the Python path will be invalid.
+            return Ok(());
+        }
+
+        match std::fs::read_to_string(source_path) {
+            Ok(content) => {
+                fs.write_file_all(dest_path.to_path_buf(), content)
+                    .with_context(|| {
+                        format!("Failed to write file to memory filesystem: {dest_path}")
+                    })?;
+            }
+            Err(error) => {
+                if error.kind() == std::io::ErrorKind::InvalidData {
+                    // Skip binary files.
+                    return Ok(());
+                }
+                return Err(error)
+                    .with_context(|| format!("Failed to read file: {}", source_path.display()));
+            }
+        }
+    } else if source_path.is_dir() {
+        // Create directory in memory fs
+        fs.create_directory_all(dest_path.to_path_buf())
+            .with_context(|| {
+                format!("Failed to create directory in memory filesystem: {dest_path}")
+            })?;
+
+        // Read directory contents
+        let entries = std::fs::read_dir(source_path)
+            .with_context(|| format!("Failed to read directory: {}", source_path.display()))?;
+
+        for entry in entries {
+            let entry = entry.with_context(|| {
+                format!("Failed to read directory entry: {}", source_path.display())
+            })?;
+
+            let file_name = entry.file_name();
+            let file_name = file_name.to_str().context("Expected UTF8 path")?;
+            let source_child = source_path.join(file_name);
+            let dest_child = dest_path.join(file_name);
+
+            // Skip hidden files and common non-Python directories
+            if file_name != ".venv" && (file_name.starts_with('.') || matches!(file_name, ".git")) {
+                continue;
+            }
+
+            copy_directory_recursive(fs, &source_child, &dest_child)?;
+        }
+    }
+
+    Ok(())
+}
+
+static CARGO_TARGET_DIR: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
+
+fn cargo_target_directory() -> Option<&'static PathBuf> {
+    CARGO_TARGET_DIR
+        .get_or_init(|| {
+            #[derive(serde::Deserialize)]
+            struct Metadata {
+                target_directory: PathBuf,
+            }
+
+            std::env::var_os("CARGO_TARGET_DIR")
+                .map(PathBuf::from)
+                .or_else(|| {
+                    let output = Command::new(std::env::var_os("CARGO")?)
+                        .args(["metadata", "--format-version", "1"])
+                        .output()
+                        .ok()?;
+                    let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?;
+                    Some(metadata.target_directory)
+                })
+        })
+        .as_ref()
+}
--- a/crates/ruff_db/src/lib.rs
+++ b/crates/ruff_db/src/lib.rs
@ -18,6 +18,12 @@ pub mod system;
 pub mod testing;
 pub mod vendored;

+#[cfg(not(target_arch = "wasm32"))]
+pub use std::time::{Instant, SystemTime, SystemTimeError};
+
+#[cfg(target_arch = "wasm32")]
+pub use web_time::{Instant, SystemTime, SystemTimeError};
+
 pub type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FxDashSet<K> = dashmap::DashSet<K, BuildHasherDefault<FxHasher>>;

--- a/crates/ruff_db/src/system/test.rs
+++ b/crates/ruff_db/src/system/test.rs
@ -280,6 +280,13 @@ impl InMemorySystem {
        }
    }

+    pub fn from_memory_fs(memory_fs: MemoryFileSystem) -> Self {
+        Self {
+            user_config_directory: Mutex::new(None),
+            memory_fs,
+        }
+    }
+
    pub fn fs(&self) -> &MemoryFileSystem {
        &self.memory_fs
    }
--- a/crates/ty_project/src/lib.rs
+++ b/crates/ty_project/src/lib.rs
@ -237,6 +237,7 @@ impl Project {
                .map(IOErrorDiagnostic::to_diagnostic),
        );

+        let check_start = ruff_db::Instant::now();
        let file_diagnostics = std::sync::Mutex::new(vec![]);

        {
@ -262,6 +263,11 @@ impl Project {
            });
        }

+        tracing::debug!(
+            "Checking all files took {:.3}s",
+            check_start.elapsed().as_secs_f64(),
+        );
+
        let mut file_diagnostics = file_diagnostics.into_inner().unwrap();
        file_diagnostics.sort_by(|left, right| {
            left.rendering_sort_key(db)
@ -442,11 +448,16 @@ impl Project {
                let _entered =
                    tracing::debug_span!("Project::index_files", project = %self.name(db))
                        .entered();
+                let start = ruff_db::Instant::now();

                let walker = ProjectFilesWalker::new(db);
                let (files, diagnostics) = walker.collect_set(db);

-                tracing::info!("Indexed {} file(s)", files.len());
+                tracing::info!(
+                    "Indexed {} file(s) in {:.3}s",
+                    files.len(),
+                    start.elapsed().as_secs_f64()
+                );
                vacant.set(files, diagnostics)
            }
            Index::Indexed(indexed) => indexed,