Remove spawn_blocking from version map (#1966)

I previously add `spawn_blocking` to the version map construction as it
had become a bottleneck
(https://github.com/astral-sh/uv/pull/1163/files#diff-704ceeaedada99f90369eac535713ec82e19550bff166cd44745d7277ecae527R116).
With the zero copy deserialization, this has become so fast we don't
need to move it to the thread pool anymore. I've also checked
`DataWithCachePolicy` but it seems to still take a significant amount of
time. Span visualization:

Resolving jupyter warm:

![image](692b03da-61c5-4f96-b413-199c14aa47c4)

Resolving jupyter cold:

![image](a6893155-d327-40c9-a83a-7c537b7c99c4)

![image](213556a3-a331-42db-aaf5-bdef5e0205dd)

I've also updated the instrumentation a little.

We don't seem cpu bound for the cold cache (top) and refresh case
(bottom) from jupyter:

![image](cb976add-3d30-465a-a470-8490b7b6caea)

![image](d7ecb745-dd2d-4f91-939c-2e46b7c812dd)
This commit is contained in:
konsti 2024-02-26 10:44:24 +01:00 committed by GitHub
parent c80d5c6ffb
commit 70dad51cd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 62 additions and 57 deletions

View file

@ -336,12 +336,9 @@ impl CachedClient {
.await .await
} }
#[instrument(name="read_and_parse_cache", skip_all, fields(file = %cache_entry.path().display()))]
async fn read_cache(cache_entry: &CacheEntry) -> Option<DataWithCachePolicy> { async fn read_cache(cache_entry: &CacheEntry) -> Option<DataWithCachePolicy> {
let span = info_span!("read_and_parse_cache", file = %cache_entry.path().display()); match DataWithCachePolicy::from_path_async(cache_entry.path()).await {
match span
.in_scope(|| DataWithCachePolicy::from_path_async(cache_entry.path()))
.await
{
Ok(data) => Some(data), Ok(data) => Some(data),
Err(err) => { Err(err) => {
// When we know the cache entry doesn't exist, then things are // When we know the cache entry doesn't exist, then things are
@ -577,6 +574,7 @@ impl DataWithCachePolicy {
/// ///
/// If the given byte buffer is not in a valid format or if reading the /// If the given byte buffer is not in a valid format or if reading the
/// file given fails, then this returns an error. /// file given fails, then this returns an error.
#[instrument]
fn from_path_sync(path: &Path) -> Result<Self, Error> { fn from_path_sync(path: &Path) -> Result<Self, Error> {
let file = fs_err::File::open(path).map_err(ErrorKind::Io)?; let file = fs_err::File::open(path).map_err(ErrorKind::Io)?;
// Note that we don't wrap our file in a buffer because it will just // Note that we don't wrap our file in a buffer because it will just

View file

@ -1,5 +1,7 @@
use std::path::Path; use std::path::Path;
use tracing::instrument;
use uv_extract::Error; use uv_extract::Error;
use crate::download::BuiltWheel; use crate::download::BuiltWheel;
@ -23,6 +25,7 @@ impl Unzip for BuiltWheel {
} }
impl Unzip for LocalWheel { impl Unzip for LocalWheel {
#[instrument(skip_all, fields(filename=self.filename().to_string()))]
fn unzip(&self, target: &Path) -> Result<(), Error> { fn unzip(&self, target: &Path) -> Result<(), Error> {
match self { match self {
Self::Unzipped(_) => Ok(()), Self::Unzipped(_) => Ok(()),

View file

@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
use anyhow::Result; use anyhow::Result;
use reqwest::Client; use reqwest::Client;
use tracing::debug; use tracing::{debug, instrument};
use url::Url; use url::Url;
use cache_key::{digest, RepositoryUrl}; use cache_key::{digest, RepositoryUrl};
@ -49,6 +49,7 @@ impl GitSource {
} }
/// Fetch the underlying Git repository at the given revision. /// Fetch the underlying Git repository at the given revision.
#[instrument(skip(self))]
pub fn fetch(self) -> Result<Fetch> { pub fn fetch(self) -> Result<Fetch> {
// The path to the repo, within the Git database. // The path to the repo, within the Git database.
let ident = digest(&RepositoryUrl::new(&self.git.repository)); let ident = digest(&RepositoryUrl::new(&self.git.repository));

View file

@ -1,6 +1,4 @@
use std::future::Future; use std::future::Future;
use std::ops::Deref;
use std::sync::Arc;
use anyhow::Result; use anyhow::Result;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
@ -62,11 +60,6 @@ pub trait ResolverProvider: Send + Sync {
pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> { pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> {
/// The [`DistributionDatabase`] used to build source distributions. /// The [`DistributionDatabase`] used to build source distributions.
fetcher: DistributionDatabase<'a, Context>, fetcher: DistributionDatabase<'a, Context>,
/// Allow moving the parameters to `VersionMap::from_metadata` to a different thread.
inner: Arc<DefaultResolverProviderInner>,
}
pub struct DefaultResolverProviderInner {
/// The [`RegistryClient`] used to query the index. /// The [`RegistryClient`] used to query the index.
client: RegistryClient, client: RegistryClient,
/// These are the entries from `--find-links` that act as overrides for index responses. /// These are the entries from `--find-links` that act as overrides for index responses.
@ -77,14 +70,6 @@ pub struct DefaultResolverProviderInner {
no_binary: NoBinary, no_binary: NoBinary,
} }
impl<'a, Context: BuildContext + Send + Sync> Deref for DefaultResolverProvider<'a, Context> {
type Target = DefaultResolverProviderInner;
fn deref(&self) -> &Self::Target {
self.inner.as_ref()
}
}
impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Context> { impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Context> {
/// Reads the flat index entries and builds the provider. /// Reads the flat index entries and builds the provider.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
@ -99,14 +84,12 @@ impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Contex
) -> Self { ) -> Self {
Self { Self {
fetcher, fetcher,
inner: Arc::new(DefaultResolverProviderInner { client: client.clone(),
client: client.clone(), flat_index: flat_index.clone(),
flat_index: flat_index.clone(), tags: tags.clone(),
tags: tags.clone(), python_requirement,
python_requirement, exclude_newer,
exclude_newer, no_binary: no_binary.clone(),
no_binary: no_binary.clone(),
}),
} }
} }
} }
@ -124,24 +107,16 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
// If the "Simple API" request was successful, convert to `VersionMap` on the Tokio // If the "Simple API" request was successful, convert to `VersionMap` on the Tokio
// threadpool, since it can be slow. // threadpool, since it can be slow.
match result { match result {
Ok((index, metadata)) => { Ok((index, metadata)) => Ok(VersionsResponse::Found(VersionMap::from_metadata(
let self_send = self.inner.clone(); metadata,
let package_name_owned = package_name.clone(); package_name,
Ok(tokio::task::spawn_blocking(move || { &index,
VersionsResponse::Found(VersionMap::from_metadata( &self.tags,
metadata, &self.python_requirement,
&package_name_owned, self.exclude_newer.as_ref(),
&index, self.flat_index.get(package_name).cloned(),
&self_send.tags, &self.no_binary,
&self_send.python_requirement, ))),
self_send.exclude_newer.as_ref(),
self_send.flat_index.get(&package_name_owned).cloned(),
&self_send.no_binary,
))
})
.await
.expect("Tokio executor failed, was there a panic?"))
}
Err(err) => match err.into_kind() { Err(err) => match err.into_kind() {
uv_client::ErrorKind::PackageNotFound(_) => { uv_client::ErrorKind::PackageNotFound(_) => {
if let Some(flat_index) = self.flat_index.get(package_name).cloned() { if let Some(flat_index) = self.flat_index.get(package_name).cloned() {

View file

@ -808,8 +808,15 @@ fn install_no_index_version() {
fn install_git_public_https() { fn install_git_public_https() {
let context = TestContext::new("3.8"); let context = TestContext::new("3.8");
uv_snapshot!(command(&context) let mut command = command(&context);
.arg("uv-public-pypackage @ git+https://github.com/astral-test/uv-public-pypackage") command.arg("uv-public-pypackage @ git+https://github.com/astral-test/uv-public-pypackage");
if cfg!(all(windows, debug_assertions)) {
// TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the
// default windows stack of 1MB
command.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string());
}
uv_snapshot!(command
, @r###" , @r###"
success: true success: true
exit_code: 0 exit_code: 0
@ -838,8 +845,7 @@ fn install_git_public_https_missing_branch_or_tag() {
uv_snapshot!(filters, command(&context) uv_snapshot!(filters, command(&context)
// 2.0.0 does not exist // 2.0.0 does not exist
.arg("uv-public-pypackage @ git+https://github.com/astral-test/uv-public-pypackage@2.0.0") .arg("uv-public-pypackage @ git+https://github.com/astral-test/uv-public-pypackage@2.0.0"), @r###"
, @r###"
success: false success: false
exit_code: 2 exit_code: 2
----- stdout ----- ----- stdout -----
@ -897,8 +903,17 @@ fn install_git_private_https_pat() {
let mut filters = INSTA_FILTERS.to_vec(); let mut filters = INSTA_FILTERS.to_vec();
filters.insert(0, (&token, "***")); filters.insert(0, (&token, "***"));
uv_snapshot!(filters, command(&context) let mut command = command(&context);
.arg(format!("uv-private-pypackage @ git+https://{token}@github.com/astral-test/uv-private-pypackage")) command.arg(format!(
"uv-private-pypackage @ git+https://{token}@github.com/astral-test/uv-private-pypackage"
));
if cfg!(all(windows, debug_assertions)) {
// TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the
// default windows stack of 1MB
command.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string());
}
uv_snapshot!(filters, command
, @r###" , @r###"
success: true success: true
exit_code: 0 exit_code: 0
@ -933,9 +948,15 @@ fn install_git_private_https_pat_at_ref() {
"" ""
}; };
uv_snapshot!(filters, command(&context) let mut command = command(&context);
.arg(format!("uv-private-pypackage @ git+https://{user}{token}@github.com/astral-test/uv-private-pypackage@6c09ce9ae81f50670a60abd7d95f30dd416d00ac")) command.arg(format!("uv-private-pypackage @ git+https://{user}{token}@github.com/astral-test/uv-private-pypackage@6c09ce9ae81f50670a60abd7d95f30dd416d00ac"));
, @r###" if cfg!(all(windows, debug_assertions)) {
// TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the
// default windows stack of 1MB
command.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string());
}
uv_snapshot!(filters, command, @r###"
success: true success: true
exit_code: 0 exit_code: 0
----- stdout ----- ----- stdout -----
@ -1476,7 +1497,14 @@ fn direct_url_zip_file_bunk_permissions() -> Result<()> {
"opensafely-pipeline @ https://github.com/opensafely-core/pipeline/archive/refs/tags/v2023.11.06.145820.zip", "opensafely-pipeline @ https://github.com/opensafely-core/pipeline/archive/refs/tags/v2023.11.06.145820.zip",
)?; )?;
uv_snapshot!(command(&context) let mut command = command(&context);
if cfg!(all(windows, debug_assertions)) {
// TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the
// default windows stack of 1MB
command.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string());
}
uv_snapshot!(command
.arg("-r") .arg("-r")
.arg("requirements.txt") .arg("requirements.txt")
.arg("--strict"), @r###" .arg("--strict"), @r###"