mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-03 18:38:21 +00:00
Switch to msgpack in the cached client (#662)
This gives a 1.23 speedup on transformers-extras. We could change to msgpack for the entire cache if we want. I only tried this format and postcard so far, where postcard was much slower (like 1.6s). I don't actually want to merge it like this, i wanted to figure out the ballpark of improvement for switching away from json. ``` hyperfine --warmup 3 --runs 10 "target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in" "target/profiling/branch pip-compile scripts/requirements/transformers-extras.in" Benchmark 1: target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in Time (mean ± σ): 179.1 ms ± 4.8 ms [User: 157.5 ms, System: 48.1 ms] Range (min … max): 174.9 ms … 188.1 ms 10 runs Benchmark 2: target/profiling/branch pip-compile scripts/requirements/transformers-extras.in Time (mean ± σ): 221.1 ms ± 6.7 ms [User: 208.1 ms, System: 46.5 ms] Range (min … max): 213.5 ms … 235.5 ms 10 runs Summary target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in ran 1.23 ± 0.05 times faster than target/profiling/branch pip-compile scripts/requirements/transformers-extras.in ``` Disadvantage: We can't manually look into the cache anymore to debug things - [ ] Check more formats, i currently only tested json, msgpack and postcard, there should be other formats, too - [x] Switch over `CachedByTimestamp` serialization (for the interpreter caching) - [x] Switch over error handling and make sure puffin is still resilient to cache failure
This commit is contained in:
parent
e4673a0c52
commit
71964ec7a8
12 changed files with 117 additions and 66 deletions
32
Cargo.lock
generated
32
Cargo.lock
generated
|
@ -1971,6 +1971,12 @@ dependencies = [
|
|||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
||||
|
||||
[[package]]
|
||||
name = "pep440_rs"
|
||||
version = "0.3.12"
|
||||
|
@ -2373,6 +2379,7 @@ dependencies = [
|
|||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
"reqwest-retry",
|
||||
"rmp-serde",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
|
@ -2474,9 +2481,9 @@ dependencies = [
|
|||
"puffin-traits",
|
||||
"pypi-types",
|
||||
"reqwest",
|
||||
"rmp-serde",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
|
@ -2571,6 +2578,7 @@ dependencies = [
|
|||
"platform-host",
|
||||
"puffin-cache",
|
||||
"puffin-fs",
|
||||
"rmp-serde",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
|
@ -3091,6 +3099,28 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rmp"
|
||||
version = "0.8.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f9860a6cc38ed1da53456442089b4dfa35e7cedaa326df63017af88385e6b20"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"num-traits",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rmp-serde"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bffea85eea980d8a74453e5d02a8d93028f3c34725de143085a844ebe953258a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"rmp",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
|
|
|
@ -64,6 +64,7 @@ reqwest = { version = "0.11.22", default-features = false, features = ["json", "
|
|||
reqwest-middleware = { version = "0.2.4" }
|
||||
reqwest-retry = { version = "0.3.0" }
|
||||
rfc2047-decoder = { version = "1.0.1" }
|
||||
rmp-serde = { version = "1.1.2" }
|
||||
rustc-hash = { version = "1.1.0" }
|
||||
seahash = { version = "4.1.0" }
|
||||
serde = { version = "1.0.190" }
|
||||
|
|
|
@ -173,15 +173,15 @@ impl Cache {
|
|||
pub enum CacheBucket {
|
||||
/// Wheels (excluding built wheels), alongside their metadata and cache policy.
|
||||
///
|
||||
/// There are three kinds from cache entries: Wheel metadata and policy as JSON files, the
|
||||
/// There are three kinds from cache entries: Wheel metadata and policy as MsgPack files, the
|
||||
/// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
|
||||
/// size threshold, we first download the zip file into the cache, then unzip it into a
|
||||
/// directory with the same name (exclusive of the `.whl` extension).
|
||||
///
|
||||
/// Cache structure:
|
||||
/// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
|
||||
/// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
|
||||
///
|
||||
/// See `puffin_client::RegistryClient::wheel_metadata` for information on how wheel metadata
|
||||
/// is fetched.
|
||||
|
@ -203,12 +203,12 @@ pub enum CacheBucket {
|
|||
/// ├── pypi
|
||||
/// │ ...
|
||||
/// │ ├── pandas
|
||||
/// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
||||
/// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
|
||||
/// │ ...
|
||||
/// └── url
|
||||
/// └── 4b8be67c801a7ecb
|
||||
/// └── flask
|
||||
/// └── flask-3.0.0-py3-none-any.json
|
||||
/// └── flask-3.0.0-py3-none-any.msgpack
|
||||
/// ```
|
||||
///
|
||||
/// We get the following `requirement.txt` from `pip-compile`:
|
||||
|
@ -250,7 +250,7 @@ pub enum CacheBucket {
|
|||
/// ├── pypi
|
||||
/// │ ├── ...
|
||||
/// │ ├── pandas
|
||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
|
||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
/// │ │ └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
|
||||
/// │ │ ├── pandas
|
||||
|
@ -262,8 +262,8 @@ pub enum CacheBucket {
|
|||
/// └── url
|
||||
/// └── 4b8be67c801a7ecb
|
||||
/// └── flask
|
||||
/// ├── flask-3.0.0-py3-none-any.json
|
||||
/// ├── flask-3.0.0-py3-none-any.json
|
||||
/// ├── flask-3.0.0-py3-none-any.msgpack
|
||||
/// ├── flask-3.0.0-py3-none-any.msgpack
|
||||
/// └── flask-3.0.0-py3-none-any
|
||||
/// ├── flask
|
||||
/// │ └── ...
|
||||
|
@ -287,15 +287,15 @@ pub enum CacheBucket {
|
|||
/// directories in the cache.
|
||||
///
|
||||
/// Cache structure:
|
||||
/// * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{metadata.json, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{metadata.msgpack, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||
///
|
||||
/// But the url filename does not need to be a valid source dist filename
|
||||
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
|
||||
/// so it could also be the following and we have to take any string as filename:
|
||||
/// * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.json`
|
||||
/// * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.msgpack`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
|
@ -315,22 +315,22 @@ pub enum CacheBucket {
|
|||
/// ├── git
|
||||
/// │ └── a67db8ed076e3814
|
||||
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f
|
||||
/// │ ├── metadata.json
|
||||
/// │ ├── metadata.msgpack
|
||||
/// │ └── pydantic_extra_types-2.1.0-py3-none-any.whl
|
||||
/// ├── pypi
|
||||
/// │ └── django
|
||||
/// │ └── django-allauth-0.51.0.tar.gz
|
||||
/// │ ├── django_allauth-0.51.0-py3-none-any.whl
|
||||
/// │ └── metadata.json
|
||||
/// │ └── metadata.msgpack
|
||||
/// └── url
|
||||
/// └── 6781bd6440ae72c2
|
||||
/// └── werkzeug
|
||||
/// └── werkzeug-3.0.1.tar.gz
|
||||
/// ├── metadata.json
|
||||
/// ├── metadata.msgpack
|
||||
/// └── werkzeug-3.0.1-py3-none-any.whl
|
||||
/// ```
|
||||
///
|
||||
/// The inside of a `metadata.json`:
|
||||
/// Structurally, the inside of a `metadata.msgpack` looks like:
|
||||
/// ```json
|
||||
/// {
|
||||
/// "data": {
|
||||
|
@ -352,11 +352,11 @@ pub enum CacheBucket {
|
|||
/// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
|
||||
/// the path we're running is the python executable itself and not a shim.
|
||||
///
|
||||
/// Cache structure: `interpreter-v0/<digest(path)>.json`
|
||||
/// Cache structure: `interpreter-v0/<digest(path)>.msgpack`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// The contents of each of the json files has a timestamp field in unix time, the [PEP 508]
|
||||
/// The contents of each of the MsgPack files has a timestamp field in unix time, the [PEP 508]
|
||||
/// markers and some information from the `sys`/`sysconfig` modules.
|
||||
///
|
||||
/// ```json
|
||||
|
@ -388,8 +388,8 @@ pub enum CacheBucket {
|
|||
/// Index responses through the simple metadata API.
|
||||
///
|
||||
/// Cache structure:
|
||||
/// * `simple-v0/pypi/<package_name>.json`
|
||||
/// * `simple-v0/<digest(index_url)>/<package_name>.json`
|
||||
/// * `simple-v0/pypi/<package_name>.msgpack`
|
||||
/// * `simple-v0/<digest(index_url)>/<package_name>.msgpack`
|
||||
///
|
||||
/// The response is parsed into [`puffin_client::SimpleMetadata`] before storage.
|
||||
Simple,
|
||||
|
@ -492,17 +492,17 @@ impl CacheBucket {
|
|||
}
|
||||
}
|
||||
CacheBucket::Simple => {
|
||||
// For `pypi` wheels, we expect a JSON file per package, indexed by name.
|
||||
// For `pypi` wheels, we expect a MsgPack file per package, indexed by name.
|
||||
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
|
||||
if remove(root.join(format!("{name}.json")))? {
|
||||
if remove(root.join(format!("{name}.msgpack")))? {
|
||||
count += 1;
|
||||
}
|
||||
|
||||
// For alternate indices, we expect a directory for every index, followed by a
|
||||
// JSON file per package, indexed by name.
|
||||
// MsgPack file per package, indexed by name.
|
||||
let root = cache.bucket(self).join(WheelCacheKind::Url);
|
||||
for directory in directories(root) {
|
||||
if remove(directory.join(format!("{name}.json")))? {
|
||||
if remove(directory.join(format!("{name}.msgpack")))? {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,18 +15,19 @@ pypi-types = { path = "../pypi-types" }
|
|||
|
||||
async_http_range_reader = { workspace = true }
|
||||
async_zip = { workspace = true, features = ["tokio"] }
|
||||
futures = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
http-cache-semantics = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
reqwest-middleware = { workspace = true }
|
||||
reqwest-retry = { workspace = true }
|
||||
rmp-serde = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs"] }
|
||||
tokio-util = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
|
|
|
@ -101,7 +101,7 @@ impl CachedClient {
|
|||
CallbackReturn: Future<Output = Result<Payload, CallBackError>>,
|
||||
{
|
||||
let cached = if let Ok(cached) = fs_err::tokio::read(cache_entry.path()).await {
|
||||
match serde_json::from_slice::<DataWithCachePolicy<Payload>>(&cached) {
|
||||
match rmp_serde::from_slice::<DataWithCachePolicy<Payload>>(&cached) {
|
||||
Ok(data) => Some(data),
|
||||
Err(err) => {
|
||||
warn!(
|
||||
|
@ -123,7 +123,7 @@ impl CachedClient {
|
|||
CachedResponse::NotModified(data_with_cache_policy) => {
|
||||
write_atomic(
|
||||
cache_entry.path(),
|
||||
serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?,
|
||||
rmp_serde::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?,
|
||||
)
|
||||
.await
|
||||
.map_err(crate::Error::CacheWrite)?;
|
||||
|
@ -139,7 +139,7 @@ impl CachedClient {
|
|||
.await
|
||||
.map_err(crate::Error::CacheWrite)?;
|
||||
let data =
|
||||
serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?;
|
||||
rmp_serde::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?;
|
||||
write_atomic(cache_entry.path(), data)
|
||||
.await
|
||||
.map_err(crate::Error::CacheWrite)?;
|
||||
|
|
|
@ -73,12 +73,15 @@ pub enum Error {
|
|||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
|
||||
#[error("Cache deserialization failed")]
|
||||
Decode(#[from] rmp_serde::decode::Error),
|
||||
|
||||
#[error("Cache serialization failed")]
|
||||
Encode(#[from] rmp_serde::encode::Error),
|
||||
|
||||
/// An [`io::Error`] with a filename attached
|
||||
#[error(transparent)]
|
||||
Persist(#[from] tempfile::PersistError),
|
||||
|
||||
#[error("Failed to serialize response to cache")]
|
||||
SerdeJson(#[from] serde_json::Error),
|
||||
}
|
||||
|
||||
impl Error {
|
||||
|
|
|
@ -145,7 +145,7 @@ impl RegistryClient {
|
|||
IndexUrl::Pypi => "pypi".to_string(),
|
||||
IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)),
|
||||
}),
|
||||
format!("{}.json", package_name.as_ref()),
|
||||
format!("{}.msgpack", package_name.as_ref()),
|
||||
);
|
||||
|
||||
let simple_request = self
|
||||
|
@ -246,7 +246,7 @@ impl RegistryClient {
|
|||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
WheelCache::Index(&index).remote_wheel_dir(filename.name.as_ref()),
|
||||
format!("{}.json", filename.stem()),
|
||||
format!("{}.msgpack", filename.stem()),
|
||||
);
|
||||
|
||||
let response_callback = |response: Response| async {
|
||||
|
@ -281,7 +281,7 @@ impl RegistryClient {
|
|||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
cache_shard.remote_wheel_dir(filename.name.as_ref()),
|
||||
format!("{}.json", filename.stem()),
|
||||
format!("{}.msgpack", filename.stem()),
|
||||
);
|
||||
|
||||
// This response callback is special, we actually make a number of subsequent requests to
|
||||
|
|
|
@ -34,9 +34,9 @@ fs-err = { workspace = true }
|
|||
fs2 = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
rmp-serde = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
serde = { workspace = true , features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
|
|
|
@ -56,8 +56,10 @@ pub enum SourceDistError {
|
|||
// Cache writing error
|
||||
#[error("Failed to write to source dist cache")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Cache (de)serialization failed")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
#[error("Cache deserialization failed")]
|
||||
Decode(#[from] rmp_serde::decode::Error),
|
||||
#[error("Cache serialization failed")]
|
||||
Encode(#[from] rmp_serde::encode::Error),
|
||||
|
||||
// Build error
|
||||
#[error("Failed to build: {0}")]
|
||||
|
@ -179,7 +181,8 @@ pub struct SourceDistCachedBuilder<'a, T: BuildContext> {
|
|||
tags: &'a Tags,
|
||||
}
|
||||
|
||||
const METADATA_JSON: &str = "metadata.json";
|
||||
/// The name of the file that contains the cached metadata, encoded via `MsgPack`.
|
||||
const METADATA: &str = "metadata.msgpack";
|
||||
|
||||
impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||
/// Initialize a [`SourceDistCachedBuilder`] from a [`BuildContext`].
|
||||
|
@ -268,7 +271,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
cache_shard: &CacheShard,
|
||||
subdirectory: Option<&'data Path>,
|
||||
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
||||
let cache_entry = cache_shard.entry(METADATA_JSON.to_string());
|
||||
let cache_entry = cache_shard.entry(METADATA.to_string());
|
||||
|
||||
let response_callback = |response| async {
|
||||
// At this point, we're seeing a new or updated source distribution; delete all
|
||||
|
@ -368,12 +371,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
if let Ok(cached) = fs::read(cache_entry.path()).await {
|
||||
// If the file exists and it was just read or written by `CachedClient`, we assume it must
|
||||
// be correct.
|
||||
let mut cached = serde_json::from_slice::<DataWithCachePolicy<Manifest>>(&cached)?;
|
||||
let mut cached = rmp_serde::from_slice::<DataWithCachePolicy<Manifest>>(&cached)?;
|
||||
|
||||
cached
|
||||
.data
|
||||
.insert(wheel_filename.clone(), cached_data.clone());
|
||||
write_atomic(cache_entry.path(), serde_json::to_vec(&cached)?).await?;
|
||||
write_atomic(cache_entry.path(), rmp_serde::to_vec(&cached)?).await?;
|
||||
};
|
||||
|
||||
Ok(BuiltWheelMetadata::from_cached(
|
||||
|
@ -393,7 +396,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Path(&path_source_dist.url)
|
||||
.remote_wheel_dir(path_source_dist.name().as_ref()),
|
||||
METADATA_JSON.to_string(),
|
||||
METADATA.to_string(),
|
||||
);
|
||||
|
||||
// Determine the last-modified time of the source distribution.
|
||||
|
@ -464,7 +467,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
timestamp: modified,
|
||||
data: manifest,
|
||||
};
|
||||
let data = serde_json::to_vec(&cached)?;
|
||||
let data = rmp_serde::to_vec(&cached)?;
|
||||
write_atomic(cache_entry.path(), data).await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
@ -498,7 +501,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Git(&git_source_dist.url, &git_sha.to_short_string())
|
||||
.remote_wheel_dir(git_source_dist.name().as_ref()),
|
||||
METADATA_JSON.to_string(),
|
||||
METADATA.to_string(),
|
||||
);
|
||||
|
||||
// Read the existing metadata from the cache.
|
||||
|
@ -540,7 +543,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
metadata: metadata.clone(),
|
||||
},
|
||||
);
|
||||
let data = serde_json::to_vec(&manifest)?;
|
||||
let data = rmp_serde::to_vec(&manifest)?;
|
||||
write_atomic(cache_entry.path(), data).await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
@ -707,7 +710,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
) -> Result<Option<Manifest>, SourceDistError> {
|
||||
match fs::read(&cache_entry.path()).await {
|
||||
Ok(cached) => {
|
||||
let cached = serde_json::from_slice::<CachedByTimestamp<Manifest>>(&cached)?;
|
||||
let cached = rmp_serde::from_slice::<CachedByTimestamp<Manifest>>(&cached)?;
|
||||
if cached.timestamp == modified {
|
||||
Ok(Some(cached.data))
|
||||
} else {
|
||||
|
@ -729,7 +732,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
/// Read an existing cache entry, if it exists.
|
||||
async fn read_metadata(cache_entry: &CacheEntry) -> Result<Option<Manifest>, SourceDistError> {
|
||||
match fs::read(&cache_entry.path()).await {
|
||||
Ok(cached) => Ok(Some(serde_json::from_slice::<Manifest>(&cached)?)),
|
||||
Ok(cached) => Ok(Some(rmp_serde::from_slice::<Manifest>(&cached)?)),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
|
|
|
@ -20,11 +20,12 @@ puffin-cache = { path = "../puffin-cache" }
|
|||
puffin-fs = { path = "../puffin-fs" }
|
||||
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
rmp-serde = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
indoc = { version = "2.0.4" }
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::process::Command;
|
|||
|
||||
use fs_err as fs;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use pep440_rs::Version;
|
||||
use pep508_rs::MarkerEnvironment;
|
||||
|
@ -133,9 +133,8 @@ impl InterpreterQueryResult {
|
|||
let data = serde_json::from_slice::<Self>(&output.stdout).map_err(|err| {
|
||||
Error::PythonSubcommandOutput {
|
||||
message: format!(
|
||||
"Querying python at {} did not return the expected data: {}",
|
||||
"Querying python at {} did not return the expected data: {err}",
|
||||
interpreter.display(),
|
||||
err,
|
||||
),
|
||||
stdout: String::from_utf8_lossy(&output.stdout).trim().to_string(),
|
||||
stderr: String::from_utf8_lossy(&output.stderr).trim().to_string(),
|
||||
|
@ -155,7 +154,7 @@ impl InterpreterQueryResult {
|
|||
let cache_entry = cache.entry(
|
||||
CacheBucket::Interpreter,
|
||||
"",
|
||||
format!("{}.json", digest(&executable_bytes)),
|
||||
format!("{}.msgpack", digest(&executable_bytes)),
|
||||
);
|
||||
|
||||
// `modified()` is infallible on windows and unix (i.e., all platforms we support).
|
||||
|
@ -163,16 +162,25 @@ impl InterpreterQueryResult {
|
|||
|
||||
// Read from the cache.
|
||||
if let Ok(data) = fs::read(cache_entry.path()) {
|
||||
if let Ok(cached) = serde_json::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||
if cached.timestamp == modified {
|
||||
debug!("Using cached markers for: {}", executable.display());
|
||||
return Ok(cached.data);
|
||||
}
|
||||
match rmp_serde::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||
Ok(cached) => {
|
||||
if cached.timestamp == modified {
|
||||
debug!("Using cached markers for: {}", executable.display());
|
||||
return Ok(cached.data);
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Ignoring stale cached markers for: {}",
|
||||
executable.display()
|
||||
);
|
||||
debug!(
|
||||
"Ignoring stale cached markers for: {}",
|
||||
executable.display()
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Broken cache entry at {}, removing: {err}",
|
||||
cache_entry.path().display()
|
||||
);
|
||||
let _ = fs_err::remove_file(cache_entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -187,7 +195,7 @@ impl InterpreterQueryResult {
|
|||
// Write to the cache.
|
||||
write_atomic_sync(
|
||||
cache_entry.path(),
|
||||
serde_json::to_vec(&CachedByTimestamp {
|
||||
rmp_serde::to_vec(&CachedByTimestamp {
|
||||
timestamp: modified,
|
||||
data: info.clone(),
|
||||
})?,
|
||||
|
|
|
@ -40,6 +40,10 @@ pub enum Error {
|
|||
},
|
||||
#[error("Failed to write to cache")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
#[error("Cache deserialization failed")]
|
||||
Decode(#[from] rmp_serde::decode::Error),
|
||||
#[error("Cache serialization failed")]
|
||||
Encode(#[from] rmp_serde::encode::Error),
|
||||
#[error("Failed to parse pyvenv.cfg")]
|
||||
Cfg(#[from] cfg::Error),
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue