mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-02 18:12:17 +00:00
Switch to msgpack in the cached client (#662)
This gives a 1.23 speedup on transformers-extras. We could change to msgpack for the entire cache if we want. I only tried this format and postcard so far, where postcard was much slower (like 1.6s). I don't actually want to merge it like this, i wanted to figure out the ballpark of improvement for switching away from json. ``` hyperfine --warmup 3 --runs 10 "target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in" "target/profiling/branch pip-compile scripts/requirements/transformers-extras.in" Benchmark 1: target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in Time (mean ± σ): 179.1 ms ± 4.8 ms [User: 157.5 ms, System: 48.1 ms] Range (min … max): 174.9 ms … 188.1 ms 10 runs Benchmark 2: target/profiling/branch pip-compile scripts/requirements/transformers-extras.in Time (mean ± σ): 221.1 ms ± 6.7 ms [User: 208.1 ms, System: 46.5 ms] Range (min … max): 213.5 ms … 235.5 ms 10 runs Summary target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in ran 1.23 ± 0.05 times faster than target/profiling/branch pip-compile scripts/requirements/transformers-extras.in ``` Disadvantage: We can't manually look into the cache anymore to debug things - [ ] Check more formats, i currently only tested json, msgpack and postcard, there should be other formats, too - [x] Switch over `CachedByTimestamp` serialization (for the interpreter caching) - [x] Switch over error handling and make sure puffin is still resilient to cache failure
This commit is contained in:
parent
e4673a0c52
commit
71964ec7a8
12 changed files with 117 additions and 66 deletions
|
@ -56,8 +56,10 @@ pub enum SourceDistError {
|
|||
// Cache writing error
|
||||
#[error("Failed to write to source dist cache")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Cache (de)serialization failed")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
#[error("Cache deserialization failed")]
|
||||
Decode(#[from] rmp_serde::decode::Error),
|
||||
#[error("Cache serialization failed")]
|
||||
Encode(#[from] rmp_serde::encode::Error),
|
||||
|
||||
// Build error
|
||||
#[error("Failed to build: {0}")]
|
||||
|
@ -179,7 +181,8 @@ pub struct SourceDistCachedBuilder<'a, T: BuildContext> {
|
|||
tags: &'a Tags,
|
||||
}
|
||||
|
||||
const METADATA_JSON: &str = "metadata.json";
|
||||
/// The name of the file that contains the cached metadata, encoded via `MsgPack`.
|
||||
const METADATA: &str = "metadata.msgpack";
|
||||
|
||||
impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||
/// Initialize a [`SourceDistCachedBuilder`] from a [`BuildContext`].
|
||||
|
@ -268,7 +271,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
cache_shard: &CacheShard,
|
||||
subdirectory: Option<&'data Path>,
|
||||
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
||||
let cache_entry = cache_shard.entry(METADATA_JSON.to_string());
|
||||
let cache_entry = cache_shard.entry(METADATA.to_string());
|
||||
|
||||
let response_callback = |response| async {
|
||||
// At this point, we're seeing a new or updated source distribution; delete all
|
||||
|
@ -368,12 +371,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
if let Ok(cached) = fs::read(cache_entry.path()).await {
|
||||
// If the file exists and it was just read or written by `CachedClient`, we assume it must
|
||||
// be correct.
|
||||
let mut cached = serde_json::from_slice::<DataWithCachePolicy<Manifest>>(&cached)?;
|
||||
let mut cached = rmp_serde::from_slice::<DataWithCachePolicy<Manifest>>(&cached)?;
|
||||
|
||||
cached
|
||||
.data
|
||||
.insert(wheel_filename.clone(), cached_data.clone());
|
||||
write_atomic(cache_entry.path(), serde_json::to_vec(&cached)?).await?;
|
||||
write_atomic(cache_entry.path(), rmp_serde::to_vec(&cached)?).await?;
|
||||
};
|
||||
|
||||
Ok(BuiltWheelMetadata::from_cached(
|
||||
|
@ -393,7 +396,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Path(&path_source_dist.url)
|
||||
.remote_wheel_dir(path_source_dist.name().as_ref()),
|
||||
METADATA_JSON.to_string(),
|
||||
METADATA.to_string(),
|
||||
);
|
||||
|
||||
// Determine the last-modified time of the source distribution.
|
||||
|
@ -464,7 +467,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
timestamp: modified,
|
||||
data: manifest,
|
||||
};
|
||||
let data = serde_json::to_vec(&cached)?;
|
||||
let data = rmp_serde::to_vec(&cached)?;
|
||||
write_atomic(cache_entry.path(), data).await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
@ -498,7 +501,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Git(&git_source_dist.url, &git_sha.to_short_string())
|
||||
.remote_wheel_dir(git_source_dist.name().as_ref()),
|
||||
METADATA_JSON.to_string(),
|
||||
METADATA.to_string(),
|
||||
);
|
||||
|
||||
// Read the existing metadata from the cache.
|
||||
|
@ -540,7 +543,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
metadata: metadata.clone(),
|
||||
},
|
||||
);
|
||||
let data = serde_json::to_vec(&manifest)?;
|
||||
let data = rmp_serde::to_vec(&manifest)?;
|
||||
write_atomic(cache_entry.path(), data).await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
@ -707,7 +710,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
) -> Result<Option<Manifest>, SourceDistError> {
|
||||
match fs::read(&cache_entry.path()).await {
|
||||
Ok(cached) => {
|
||||
let cached = serde_json::from_slice::<CachedByTimestamp<Manifest>>(&cached)?;
|
||||
let cached = rmp_serde::from_slice::<CachedByTimestamp<Manifest>>(&cached)?;
|
||||
if cached.timestamp == modified {
|
||||
Ok(Some(cached.data))
|
||||
} else {
|
||||
|
@ -729,7 +732,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
/// Read an existing cache entry, if it exists.
|
||||
async fn read_metadata(cache_entry: &CacheEntry) -> Result<Option<Manifest>, SourceDistError> {
|
||||
match fs::read(&cache_entry.path()).await {
|
||||
Ok(cached) => Ok(Some(serde_json::from_slice::<Manifest>(&cached)?)),
|
||||
Ok(cached) => Ok(Some(rmp_serde::from_slice::<Manifest>(&cached)?)),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue