Add airflow benchmark (#3643)

## Summary

This seems to be one of the most consistent benchmark cases we have in
terms of standard deviation:
```
➜  hyperfine "target/profiling/main pip compile scripts/requirements/airflow.in" --runs 200
Benchmark 1: target/profiling/main pip compile scripts/requirements/airflow.in
  Time (mean ± σ):     292.6 ms ±   6.6 ms    [User: 414.1 ms, System: 194.2 ms]
  Range (min … max):   282.7 ms … 320.1 ms    200 runs
```

For smaller benchmarks, scispacy and dtlssocket seem to be a bit more
consistent than our current jupyter benchmark, but it hasn't given us
any problems so I'll leave it for now.
This commit is contained in:
Ibraheem Ahmed 2024-05-23 13:25:54 -04:00 committed by GitHub
parent 8f0f4d2e0c
commit 0d2f3fc4e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 83 additions and 102 deletions

View file

@ -1057,6 +1057,12 @@ jobs:
- uses: Swatinem/rust-cache@v2
- name: "Install requirements and prime cache"
run: |
sudo apt-get install -y libsasl2-dev libldap2-dev libkrb5-dev
cargo run --bin uv -- venv --cache-dir .cache
cargo run --bin uv -- pip compile scripts/requirements/airflow.in --cache-dir .cache
- name: "Build benchmarks"
run: cargo codspeed build --features codspeed -p bench

2
Cargo.lock generated
View file

@ -395,6 +395,7 @@ dependencies = [
"criterion",
"distribution-filename",
"distribution-types",
"install-wheel-rs",
"once_cell",
"pep508_rs",
"platform-tags",
@ -402,6 +403,7 @@ dependencies = [
"uv-cache",
"uv-client",
"uv-configuration",
"uv-dispatch",
"uv-distribution",
"uv-interpreter",
"uv-resolver",

View file

@ -34,11 +34,13 @@ pep508_rs = { workspace = true }
platform-tags = { workspace = true }
uv-cache = { workspace = true }
uv-client = { workspace = true }
uv-dispatch = { workspace = true }
uv-configuration = { workspace = true }
uv-distribution = { workspace = true }
uv-interpreter = { workspace = true }
uv-resolver = { workspace = true }
uv-types = { workspace = true }
install-wheel-rs = { workspace = true }
anyhow = { workspace = true }
codspeed-criterion-compat = { version = "2.6.0", default-features = false, optional = true }

View file

@ -1,10 +1,9 @@
use std::str::FromStr;
use bench::criterion::black_box;
use bench::criterion::{criterion_group, criterion_main, measurement::WallTime, Criterion};
use distribution_types::Requirement;
use uv_cache::Cache;
use uv_client::RegistryClientBuilder;
use uv_interpreter::PythonEnvironment;
use uv_resolver::Manifest;
fn resolve_warm_jupyter(c: &mut Criterion<WallTime>) {
@ -13,12 +12,12 @@ fn resolve_warm_jupyter(c: &mut Criterion<WallTime>) {
.build()
.unwrap();
let cache = &Cache::from_path(".cache").unwrap().init().unwrap();
let manifest = &Manifest::simple(vec![Requirement::from_pep508(
pep508_rs::Requirement::from_str("jupyter").unwrap(),
)
.unwrap()]);
let cache = &Cache::from_path("../../.cache").unwrap().init().unwrap();
let venv = PythonEnvironment::from_virtualenv(cache).unwrap();
let client = &RegistryClientBuilder::new(cache.clone()).build();
let manifest = &Manifest::simple(vec![
Requirement::from_pep508("jupyter".parse().unwrap()).unwrap()
]);
let run = || {
runtime
@ -26,6 +25,7 @@ fn resolve_warm_jupyter(c: &mut Criterion<WallTime>) {
black_box(manifest.clone()),
black_box(cache.clone()),
black_box(client),
&venv,
))
.unwrap();
};
@ -33,29 +33,62 @@ fn resolve_warm_jupyter(c: &mut Criterion<WallTime>) {
c.bench_function("resolve_warm_jupyter", |b| b.iter(run));
}
criterion_group!(uv, resolve_warm_jupyter);
fn resolve_warm_airflow(c: &mut Criterion<WallTime>) {
let runtime = &tokio::runtime::Builder::new_multi_thread()
// CodSpeed limits the total number of threads to 500
.max_blocking_threads(256)
.enable_all()
.build()
.unwrap();
let cache = &Cache::from_path("../../.cache").unwrap().init().unwrap();
let venv = PythonEnvironment::from_virtualenv(cache).unwrap();
let client = &RegistryClientBuilder::new(cache.clone()).build();
let manifest = &Manifest::simple(vec![
Requirement::from_pep508("apache-airflow[all]".parse().unwrap()).unwrap(),
Requirement::from_pep508(
"apache-airflow-providers-apache-beam>3.0.0"
.parse()
.unwrap(),
)
.unwrap(),
]);
let run = || {
runtime
.block_on(resolver::resolve(
black_box(manifest.clone()),
black_box(cache.clone()),
black_box(client),
&venv,
))
.unwrap();
};
c.bench_function("resolve_warm_airflow", |b| b.iter(run));
}
criterion_group!(uv, resolve_warm_airflow, resolve_warm_jupyter);
criterion_main!(uv);
mod resolver {
use std::path::{Path, PathBuf};
use anyhow::Result;
use install_wheel_rs::linker::LinkMode;
use once_cell::sync::Lazy;
use distribution_types::{CachedDist, IndexLocations, Requirement, Resolution, SourceDist};
use distribution_types::IndexLocations;
use pep508_rs::{MarkerEnvironment, MarkerEnvironmentBuilder};
use platform_tags::{Arch, Os, Platform, Tags};
use uv_cache::Cache;
use uv_client::RegistryClient;
use uv_configuration::{BuildKind, Concurrency, NoBinary, NoBuild, SetupPyStrategy};
use uv_configuration::{Concurrency, ConfigSettings, NoBinary, NoBuild, SetupPyStrategy};
use uv_dispatch::BuildDispatch;
use uv_distribution::DistributionDatabase;
use uv_interpreter::{Interpreter, PythonEnvironment};
use uv_interpreter::PythonEnvironment;
use uv_resolver::{
FlatIndex, InMemoryIndex, Manifest, Options, PythonRequirement, ResolutionGraph, Resolver,
};
use uv_types::{
BuildContext, BuildIsolation, EmptyInstalledPackages, HashStrategy, SourceBuildTrait,
};
use uv_types::{BuildIsolation, EmptyInstalledPackages, HashStrategy, InFlight};
static MARKERS: Lazy<MarkerEnvironment> = Lazy::new(|| {
MarkerEnvironment::try_from(MarkerEnvironmentBuilder {
@ -88,15 +121,36 @@ mod resolver {
manifest: Manifest,
cache: Cache,
client: &RegistryClient,
venv: &PythonEnvironment,
) -> Result<ResolutionGraph> {
let flat_index = FlatIndex::default();
let index = InMemoryIndex::default();
let interpreter = Interpreter::artificial(PLATFORM.clone(), MARKERS.clone());
let build_context = Context::new(cache, interpreter.clone());
let hashes = HashStrategy::None;
let index_locations = IndexLocations::default();
let in_flight = InFlight::default();
let installed_packages = EmptyInstalledPackages;
let interpreter = venv.interpreter().clone();
let python_requirement = PythonRequirement::from_marker_environment(&interpreter, &MARKERS);
let concurrency = Concurrency::default();
let config_settings = ConfigSettings::default();
let build_isolation = BuildIsolation::Isolated;
let build_context = BuildDispatch::new(
client,
&cache,
&interpreter,
&index_locations,
&flat_index,
&index,
&in_flight,
SetupPyStrategy::default(),
&config_settings,
build_isolation,
LinkMode::default(),
&NoBuild::None,
&NoBinary::None,
concurrency,
);
let resolver = Resolver::new(
manifest,
@ -114,87 +168,4 @@ mod resolver {
Ok(resolver.resolve().await?)
}
struct Context {
cache: Cache,
interpreter: Interpreter,
index_locations: IndexLocations,
}
impl Context {
fn new(cache: Cache, interpreter: Interpreter) -> Self {
Self {
cache,
interpreter,
index_locations: IndexLocations::default(),
}
}
}
impl BuildContext for Context {
type SourceDistBuilder = DummyBuilder;
fn cache(&self) -> &Cache {
&self.cache
}
fn interpreter(&self) -> &Interpreter {
&self.interpreter
}
fn build_isolation(&self) -> BuildIsolation {
BuildIsolation::Isolated
}
fn no_build(&self) -> &NoBuild {
&NoBuild::None
}
fn no_binary(&self) -> &NoBinary {
&NoBinary::None
}
fn setup_py_strategy(&self) -> SetupPyStrategy {
SetupPyStrategy::default()
}
fn index_locations(&self) -> &IndexLocations {
&self.index_locations
}
async fn resolve<'a>(&'a self, _: &'a [Requirement]) -> Result<Resolution> {
panic!("benchmarks should not build source distributions")
}
async fn install<'a>(
&'a self,
_: &'a Resolution,
_: &'a PythonEnvironment,
) -> Result<Vec<CachedDist>> {
panic!("benchmarks should not build source distributions")
}
async fn setup_build<'a>(
&'a self,
_: &'a Path,
_: Option<&'a Path>,
_: &'a str,
_: Option<&'a SourceDist>,
_: BuildKind,
) -> Result<Self::SourceDistBuilder> {
Ok(DummyBuilder)
}
}
struct DummyBuilder;
impl SourceBuildTrait for DummyBuilder {
async fn metadata(&mut self) -> Result<Option<PathBuf>> {
panic!("benchmarks should not build source distributions")
}
async fn wheel<'a>(&'a self, _: &'a Path) -> Result<String> {
panic!("benchmarks should not build source distributions")
}
}
}

View file

@ -1 +1 @@
DTLSSocket==0.1.16
DTLSSocket==0.1.16