mirror of
https://github.com/astral-sh/uv.git
synced 2025-09-28 04:54:47 +00:00
Switch to msgpack in the cached client (#662)
This gives a 1.23 speedup on transformers-extras. We could change to msgpack for the entire cache if we want. I only tried this format and postcard so far, where postcard was much slower (like 1.6s). I don't actually want to merge it like this, i wanted to figure out the ballpark of improvement for switching away from json. ``` hyperfine --warmup 3 --runs 10 "target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in" "target/profiling/branch pip-compile scripts/requirements/transformers-extras.in" Benchmark 1: target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in Time (mean ± σ): 179.1 ms ± 4.8 ms [User: 157.5 ms, System: 48.1 ms] Range (min … max): 174.9 ms … 188.1 ms 10 runs Benchmark 2: target/profiling/branch pip-compile scripts/requirements/transformers-extras.in Time (mean ± σ): 221.1 ms ± 6.7 ms [User: 208.1 ms, System: 46.5 ms] Range (min … max): 213.5 ms … 235.5 ms 10 runs Summary target/profiling/puffin pip-compile --cache-dir cache-msgpack scripts/requirements/transformers-extras.in ran 1.23 ± 0.05 times faster than target/profiling/branch pip-compile scripts/requirements/transformers-extras.in ``` Disadvantage: We can't manually look into the cache anymore to debug things - [ ] Check more formats, i currently only tested json, msgpack and postcard, there should be other formats, too - [x] Switch over `CachedByTimestamp` serialization (for the interpreter caching) - [x] Switch over error handling and make sure puffin is still resilient to cache failure
This commit is contained in:
parent
e4673a0c52
commit
71964ec7a8
12 changed files with 117 additions and 66 deletions
32
Cargo.lock
generated
32
Cargo.lock
generated
|
@ -1971,6 +1971,12 @@ dependencies = [
|
||||||
"windows-targets 0.48.5",
|
"windows-targets 0.48.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "paste"
|
||||||
|
version = "1.0.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pep440_rs"
|
name = "pep440_rs"
|
||||||
version = "0.3.12"
|
version = "0.3.12"
|
||||||
|
@ -2373,6 +2379,7 @@ dependencies = [
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"reqwest-middleware",
|
"reqwest-middleware",
|
||||||
"reqwest-retry",
|
"reqwest-retry",
|
||||||
|
"rmp-serde",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"sha2",
|
"sha2",
|
||||||
|
@ -2474,9 +2481,9 @@ dependencies = [
|
||||||
"puffin-traits",
|
"puffin-traits",
|
||||||
"pypi-types",
|
"pypi-types",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
"rmp-serde",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
@ -2571,6 +2578,7 @@ dependencies = [
|
||||||
"platform-host",
|
"platform-host",
|
||||||
"puffin-cache",
|
"puffin-cache",
|
||||||
"puffin-fs",
|
"puffin-fs",
|
||||||
|
"rmp-serde",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
@ -3091,6 +3099,28 @@ dependencies = [
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rmp"
|
||||||
|
version = "0.8.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f9860a6cc38ed1da53456442089b4dfa35e7cedaa326df63017af88385e6b20"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
"num-traits",
|
||||||
|
"paste",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rmp-serde"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bffea85eea980d8a74453e5d02a8d93028f3c34725de143085a844ebe953258a"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
"rmp",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-demangle"
|
name = "rustc-demangle"
|
||||||
version = "0.1.23"
|
version = "0.1.23"
|
||||||
|
|
|
@ -64,6 +64,7 @@ reqwest = { version = "0.11.22", default-features = false, features = ["json", "
|
||||||
reqwest-middleware = { version = "0.2.4" }
|
reqwest-middleware = { version = "0.2.4" }
|
||||||
reqwest-retry = { version = "0.3.0" }
|
reqwest-retry = { version = "0.3.0" }
|
||||||
rfc2047-decoder = { version = "1.0.1" }
|
rfc2047-decoder = { version = "1.0.1" }
|
||||||
|
rmp-serde = { version = "1.1.2" }
|
||||||
rustc-hash = { version = "1.1.0" }
|
rustc-hash = { version = "1.1.0" }
|
||||||
seahash = { version = "4.1.0" }
|
seahash = { version = "4.1.0" }
|
||||||
serde = { version = "1.0.190" }
|
serde = { version = "1.0.190" }
|
||||||
|
|
|
@ -173,15 +173,15 @@ impl Cache {
|
||||||
pub enum CacheBucket {
|
pub enum CacheBucket {
|
||||||
/// Wheels (excluding built wheels), alongside their metadata and cache policy.
|
/// Wheels (excluding built wheels), alongside their metadata and cache policy.
|
||||||
///
|
///
|
||||||
/// There are three kinds from cache entries: Wheel metadata and policy as JSON files, the
|
/// There are three kinds from cache entries: Wheel metadata and policy as MsgPack files, the
|
||||||
/// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
|
/// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
|
||||||
/// size threshold, we first download the zip file into the cache, then unzip it into a
|
/// size threshold, we first download the zip file into the cache, then unzip it into a
|
||||||
/// directory with the same name (exclusive of the `.whl` extension).
|
/// directory with the same name (exclusive of the `.whl` extension).
|
||||||
///
|
///
|
||||||
/// Cache structure:
|
/// Cache structure:
|
||||||
/// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
/// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
|
||||||
/// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
/// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
|
||||||
/// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.json, foo-1.0.0-py3-none-any.whl}`
|
/// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
|
||||||
///
|
///
|
||||||
/// See `puffin_client::RegistryClient::wheel_metadata` for information on how wheel metadata
|
/// See `puffin_client::RegistryClient::wheel_metadata` for information on how wheel metadata
|
||||||
/// is fetched.
|
/// is fetched.
|
||||||
|
@ -203,12 +203,12 @@ pub enum CacheBucket {
|
||||||
/// ├── pypi
|
/// ├── pypi
|
||||||
/// │ ...
|
/// │ ...
|
||||||
/// │ ├── pandas
|
/// │ ├── pandas
|
||||||
/// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
/// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
|
||||||
/// │ ...
|
/// │ ...
|
||||||
/// └── url
|
/// └── url
|
||||||
/// └── 4b8be67c801a7ecb
|
/// └── 4b8be67c801a7ecb
|
||||||
/// └── flask
|
/// └── flask
|
||||||
/// └── flask-3.0.0-py3-none-any.json
|
/// └── flask-3.0.0-py3-none-any.msgpack
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// We get the following `requirement.txt` from `pip-compile`:
|
/// We get the following `requirement.txt` from `pip-compile`:
|
||||||
|
@ -250,7 +250,7 @@ pub enum CacheBucket {
|
||||||
/// ├── pypi
|
/// ├── pypi
|
||||||
/// │ ├── ...
|
/// │ ├── ...
|
||||||
/// │ ├── pandas
|
/// │ ├── pandas
|
||||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.json
|
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
|
||||||
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||||
/// │ │ └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
|
/// │ │ └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
|
||||||
/// │ │ ├── pandas
|
/// │ │ ├── pandas
|
||||||
|
@ -262,8 +262,8 @@ pub enum CacheBucket {
|
||||||
/// └── url
|
/// └── url
|
||||||
/// └── 4b8be67c801a7ecb
|
/// └── 4b8be67c801a7ecb
|
||||||
/// └── flask
|
/// └── flask
|
||||||
/// ├── flask-3.0.0-py3-none-any.json
|
/// ├── flask-3.0.0-py3-none-any.msgpack
|
||||||
/// ├── flask-3.0.0-py3-none-any.json
|
/// ├── flask-3.0.0-py3-none-any.msgpack
|
||||||
/// └── flask-3.0.0-py3-none-any
|
/// └── flask-3.0.0-py3-none-any
|
||||||
/// ├── flask
|
/// ├── flask
|
||||||
/// │ └── ...
|
/// │ └── ...
|
||||||
|
@ -287,15 +287,15 @@ pub enum CacheBucket {
|
||||||
/// directories in the cache.
|
/// directories in the cache.
|
||||||
///
|
///
|
||||||
/// Cache structure:
|
/// Cache structure:
|
||||||
/// * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{metadata.json, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
/// * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{metadata.msgpack, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||||
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||||
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||||
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.json, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
|
||||||
///
|
///
|
||||||
/// But the url filename does not need to be a valid source dist filename
|
/// But the url filename does not need to be a valid source dist filename
|
||||||
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
|
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
|
||||||
/// so it could also be the following and we have to take any string as filename:
|
/// so it could also be the following and we have to take any string as filename:
|
||||||
/// * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.json`
|
/// * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.msgpack`
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
|
@ -315,22 +315,22 @@ pub enum CacheBucket {
|
||||||
/// ├── git
|
/// ├── git
|
||||||
/// │ └── a67db8ed076e3814
|
/// │ └── a67db8ed076e3814
|
||||||
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f
|
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f
|
||||||
/// │ ├── metadata.json
|
/// │ ├── metadata.msgpack
|
||||||
/// │ └── pydantic_extra_types-2.1.0-py3-none-any.whl
|
/// │ └── pydantic_extra_types-2.1.0-py3-none-any.whl
|
||||||
/// ├── pypi
|
/// ├── pypi
|
||||||
/// │ └── django
|
/// │ └── django
|
||||||
/// │ └── django-allauth-0.51.0.tar.gz
|
/// │ └── django-allauth-0.51.0.tar.gz
|
||||||
/// │ ├── django_allauth-0.51.0-py3-none-any.whl
|
/// │ ├── django_allauth-0.51.0-py3-none-any.whl
|
||||||
/// │ └── metadata.json
|
/// │ └── metadata.msgpack
|
||||||
/// └── url
|
/// └── url
|
||||||
/// └── 6781bd6440ae72c2
|
/// └── 6781bd6440ae72c2
|
||||||
/// └── werkzeug
|
/// └── werkzeug
|
||||||
/// └── werkzeug-3.0.1.tar.gz
|
/// └── werkzeug-3.0.1.tar.gz
|
||||||
/// ├── metadata.json
|
/// ├── metadata.msgpack
|
||||||
/// └── werkzeug-3.0.1-py3-none-any.whl
|
/// └── werkzeug-3.0.1-py3-none-any.whl
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// The inside of a `metadata.json`:
|
/// Structurally, the inside of a `metadata.msgpack` looks like:
|
||||||
/// ```json
|
/// ```json
|
||||||
/// {
|
/// {
|
||||||
/// "data": {
|
/// "data": {
|
||||||
|
@ -352,11 +352,11 @@ pub enum CacheBucket {
|
||||||
/// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
|
/// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
|
||||||
/// the path we're running is the python executable itself and not a shim.
|
/// the path we're running is the python executable itself and not a shim.
|
||||||
///
|
///
|
||||||
/// Cache structure: `interpreter-v0/<digest(path)>.json`
|
/// Cache structure: `interpreter-v0/<digest(path)>.msgpack`
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// The contents of each of the json files has a timestamp field in unix time, the [PEP 508]
|
/// The contents of each of the MsgPack files has a timestamp field in unix time, the [PEP 508]
|
||||||
/// markers and some information from the `sys`/`sysconfig` modules.
|
/// markers and some information from the `sys`/`sysconfig` modules.
|
||||||
///
|
///
|
||||||
/// ```json
|
/// ```json
|
||||||
|
@ -388,8 +388,8 @@ pub enum CacheBucket {
|
||||||
/// Index responses through the simple metadata API.
|
/// Index responses through the simple metadata API.
|
||||||
///
|
///
|
||||||
/// Cache structure:
|
/// Cache structure:
|
||||||
/// * `simple-v0/pypi/<package_name>.json`
|
/// * `simple-v0/pypi/<package_name>.msgpack`
|
||||||
/// * `simple-v0/<digest(index_url)>/<package_name>.json`
|
/// * `simple-v0/<digest(index_url)>/<package_name>.msgpack`
|
||||||
///
|
///
|
||||||
/// The response is parsed into [`puffin_client::SimpleMetadata`] before storage.
|
/// The response is parsed into [`puffin_client::SimpleMetadata`] before storage.
|
||||||
Simple,
|
Simple,
|
||||||
|
@ -492,17 +492,17 @@ impl CacheBucket {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CacheBucket::Simple => {
|
CacheBucket::Simple => {
|
||||||
// For `pypi` wheels, we expect a JSON file per package, indexed by name.
|
// For `pypi` wheels, we expect a MsgPack file per package, indexed by name.
|
||||||
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
|
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
|
||||||
if remove(root.join(format!("{name}.json")))? {
|
if remove(root.join(format!("{name}.msgpack")))? {
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For alternate indices, we expect a directory for every index, followed by a
|
// For alternate indices, we expect a directory for every index, followed by a
|
||||||
// JSON file per package, indexed by name.
|
// MsgPack file per package, indexed by name.
|
||||||
let root = cache.bucket(self).join(WheelCacheKind::Url);
|
let root = cache.bucket(self).join(WheelCacheKind::Url);
|
||||||
for directory in directories(root) {
|
for directory in directories(root) {
|
||||||
if remove(directory.join(format!("{name}.json")))? {
|
if remove(directory.join(format!("{name}.msgpack")))? {
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,18 +15,19 @@ pypi-types = { path = "../pypi-types" }
|
||||||
|
|
||||||
async_http_range_reader = { workspace = true }
|
async_http_range_reader = { workspace = true }
|
||||||
async_zip = { workspace = true, features = ["tokio"] }
|
async_zip = { workspace = true, features = ["tokio"] }
|
||||||
futures = { workspace = true }
|
|
||||||
fs-err = { workspace = true, features = ["tokio"] }
|
fs-err = { workspace = true, features = ["tokio"] }
|
||||||
|
futures = { workspace = true }
|
||||||
http = { workspace = true }
|
http = { workspace = true }
|
||||||
http-cache-semantics = { workspace = true }
|
http-cache-semantics = { workspace = true }
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
reqwest-middleware = { workspace = true }
|
reqwest-middleware = { workspace = true }
|
||||||
reqwest-retry = { workspace = true }
|
reqwest-retry = { workspace = true }
|
||||||
|
rmp-serde = { workspace = true }
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
sha2 = { workspace = true }
|
sha2 = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
|
||||||
tempfile = { workspace = true }
|
tempfile = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
tokio = { workspace = true, features = ["fs"] }
|
tokio = { workspace = true, features = ["fs"] }
|
||||||
tokio-util = { workspace = true }
|
tokio-util = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
|
|
|
@ -101,7 +101,7 @@ impl CachedClient {
|
||||||
CallbackReturn: Future<Output = Result<Payload, CallBackError>>,
|
CallbackReturn: Future<Output = Result<Payload, CallBackError>>,
|
||||||
{
|
{
|
||||||
let cached = if let Ok(cached) = fs_err::tokio::read(cache_entry.path()).await {
|
let cached = if let Ok(cached) = fs_err::tokio::read(cache_entry.path()).await {
|
||||||
match serde_json::from_slice::<DataWithCachePolicy<Payload>>(&cached) {
|
match rmp_serde::from_slice::<DataWithCachePolicy<Payload>>(&cached) {
|
||||||
Ok(data) => Some(data),
|
Ok(data) => Some(data),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
warn!(
|
warn!(
|
||||||
|
@ -123,7 +123,7 @@ impl CachedClient {
|
||||||
CachedResponse::NotModified(data_with_cache_policy) => {
|
CachedResponse::NotModified(data_with_cache_policy) => {
|
||||||
write_atomic(
|
write_atomic(
|
||||||
cache_entry.path(),
|
cache_entry.path(),
|
||||||
serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?,
|
rmp_serde::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(crate::Error::CacheWrite)?;
|
.map_err(crate::Error::CacheWrite)?;
|
||||||
|
@ -139,7 +139,7 @@ impl CachedClient {
|
||||||
.await
|
.await
|
||||||
.map_err(crate::Error::CacheWrite)?;
|
.map_err(crate::Error::CacheWrite)?;
|
||||||
let data =
|
let data =
|
||||||
serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?;
|
rmp_serde::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?;
|
||||||
write_atomic(cache_entry.path(), data)
|
write_atomic(cache_entry.path(), data)
|
||||||
.await
|
.await
|
||||||
.map_err(crate::Error::CacheWrite)?;
|
.map_err(crate::Error::CacheWrite)?;
|
||||||
|
|
|
@ -73,12 +73,15 @@ pub enum Error {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Io(#[from] io::Error),
|
Io(#[from] io::Error),
|
||||||
|
|
||||||
|
#[error("Cache deserialization failed")]
|
||||||
|
Decode(#[from] rmp_serde::decode::Error),
|
||||||
|
|
||||||
|
#[error("Cache serialization failed")]
|
||||||
|
Encode(#[from] rmp_serde::encode::Error),
|
||||||
|
|
||||||
/// An [`io::Error`] with a filename attached
|
/// An [`io::Error`] with a filename attached
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Persist(#[from] tempfile::PersistError),
|
Persist(#[from] tempfile::PersistError),
|
||||||
|
|
||||||
#[error("Failed to serialize response to cache")]
|
|
||||||
SerdeJson(#[from] serde_json::Error),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Error {
|
impl Error {
|
||||||
|
|
|
@ -145,7 +145,7 @@ impl RegistryClient {
|
||||||
IndexUrl::Pypi => "pypi".to_string(),
|
IndexUrl::Pypi => "pypi".to_string(),
|
||||||
IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)),
|
IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)),
|
||||||
}),
|
}),
|
||||||
format!("{}.json", package_name.as_ref()),
|
format!("{}.msgpack", package_name.as_ref()),
|
||||||
);
|
);
|
||||||
|
|
||||||
let simple_request = self
|
let simple_request = self
|
||||||
|
@ -246,7 +246,7 @@ impl RegistryClient {
|
||||||
let cache_entry = self.cache.entry(
|
let cache_entry = self.cache.entry(
|
||||||
CacheBucket::Wheels,
|
CacheBucket::Wheels,
|
||||||
WheelCache::Index(&index).remote_wheel_dir(filename.name.as_ref()),
|
WheelCache::Index(&index).remote_wheel_dir(filename.name.as_ref()),
|
||||||
format!("{}.json", filename.stem()),
|
format!("{}.msgpack", filename.stem()),
|
||||||
);
|
);
|
||||||
|
|
||||||
let response_callback = |response: Response| async {
|
let response_callback = |response: Response| async {
|
||||||
|
@ -281,7 +281,7 @@ impl RegistryClient {
|
||||||
let cache_entry = self.cache.entry(
|
let cache_entry = self.cache.entry(
|
||||||
CacheBucket::Wheels,
|
CacheBucket::Wheels,
|
||||||
cache_shard.remote_wheel_dir(filename.name.as_ref()),
|
cache_shard.remote_wheel_dir(filename.name.as_ref()),
|
||||||
format!("{}.json", filename.stem()),
|
format!("{}.msgpack", filename.stem()),
|
||||||
);
|
);
|
||||||
|
|
||||||
// This response callback is special, we actually make a number of subsequent requests to
|
// This response callback is special, we actually make a number of subsequent requests to
|
||||||
|
|
|
@ -34,9 +34,9 @@ fs-err = { workspace = true }
|
||||||
fs2 = { workspace = true }
|
fs2 = { workspace = true }
|
||||||
futures = { workspace = true }
|
futures = { workspace = true }
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
|
rmp-serde = { workspace = true }
|
||||||
rustc-hash = { workspace = true }
|
rustc-hash = { workspace = true }
|
||||||
serde = { workspace = true , features = ["derive"] }
|
serde = { workspace = true , features = ["derive"] }
|
||||||
serde_json = { workspace = true }
|
|
||||||
tempfile = { workspace = true }
|
tempfile = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
|
|
|
@ -56,8 +56,10 @@ pub enum SourceDistError {
|
||||||
// Cache writing error
|
// Cache writing error
|
||||||
#[error("Failed to write to source dist cache")]
|
#[error("Failed to write to source dist cache")]
|
||||||
Io(#[from] std::io::Error),
|
Io(#[from] std::io::Error),
|
||||||
#[error("Cache (de)serialization failed")]
|
#[error("Cache deserialization failed")]
|
||||||
Serde(#[from] serde_json::Error),
|
Decode(#[from] rmp_serde::decode::Error),
|
||||||
|
#[error("Cache serialization failed")]
|
||||||
|
Encode(#[from] rmp_serde::encode::Error),
|
||||||
|
|
||||||
// Build error
|
// Build error
|
||||||
#[error("Failed to build: {0}")]
|
#[error("Failed to build: {0}")]
|
||||||
|
@ -179,7 +181,8 @@ pub struct SourceDistCachedBuilder<'a, T: BuildContext> {
|
||||||
tags: &'a Tags,
|
tags: &'a Tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
const METADATA_JSON: &str = "metadata.json";
|
/// The name of the file that contains the cached metadata, encoded via `MsgPack`.
|
||||||
|
const METADATA: &str = "metadata.msgpack";
|
||||||
|
|
||||||
impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
/// Initialize a [`SourceDistCachedBuilder`] from a [`BuildContext`].
|
/// Initialize a [`SourceDistCachedBuilder`] from a [`BuildContext`].
|
||||||
|
@ -268,7 +271,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
cache_shard: &CacheShard,
|
cache_shard: &CacheShard,
|
||||||
subdirectory: Option<&'data Path>,
|
subdirectory: Option<&'data Path>,
|
||||||
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
||||||
let cache_entry = cache_shard.entry(METADATA_JSON.to_string());
|
let cache_entry = cache_shard.entry(METADATA.to_string());
|
||||||
|
|
||||||
let response_callback = |response| async {
|
let response_callback = |response| async {
|
||||||
// At this point, we're seeing a new or updated source distribution; delete all
|
// At this point, we're seeing a new or updated source distribution; delete all
|
||||||
|
@ -368,12 +371,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
if let Ok(cached) = fs::read(cache_entry.path()).await {
|
if let Ok(cached) = fs::read(cache_entry.path()).await {
|
||||||
// If the file exists and it was just read or written by `CachedClient`, we assume it must
|
// If the file exists and it was just read or written by `CachedClient`, we assume it must
|
||||||
// be correct.
|
// be correct.
|
||||||
let mut cached = serde_json::from_slice::<DataWithCachePolicy<Manifest>>(&cached)?;
|
let mut cached = rmp_serde::from_slice::<DataWithCachePolicy<Manifest>>(&cached)?;
|
||||||
|
|
||||||
cached
|
cached
|
||||||
.data
|
.data
|
||||||
.insert(wheel_filename.clone(), cached_data.clone());
|
.insert(wheel_filename.clone(), cached_data.clone());
|
||||||
write_atomic(cache_entry.path(), serde_json::to_vec(&cached)?).await?;
|
write_atomic(cache_entry.path(), rmp_serde::to_vec(&cached)?).await?;
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(BuiltWheelMetadata::from_cached(
|
Ok(BuiltWheelMetadata::from_cached(
|
||||||
|
@ -393,7 +396,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
CacheBucket::BuiltWheels,
|
CacheBucket::BuiltWheels,
|
||||||
WheelCache::Path(&path_source_dist.url)
|
WheelCache::Path(&path_source_dist.url)
|
||||||
.remote_wheel_dir(path_source_dist.name().as_ref()),
|
.remote_wheel_dir(path_source_dist.name().as_ref()),
|
||||||
METADATA_JSON.to_string(),
|
METADATA.to_string(),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Determine the last-modified time of the source distribution.
|
// Determine the last-modified time of the source distribution.
|
||||||
|
@ -464,7 +467,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
timestamp: modified,
|
timestamp: modified,
|
||||||
data: manifest,
|
data: manifest,
|
||||||
};
|
};
|
||||||
let data = serde_json::to_vec(&cached)?;
|
let data = rmp_serde::to_vec(&cached)?;
|
||||||
write_atomic(cache_entry.path(), data).await?;
|
write_atomic(cache_entry.path(), data).await?;
|
||||||
|
|
||||||
if let Some(task) = task {
|
if let Some(task) = task {
|
||||||
|
@ -498,7 +501,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
CacheBucket::BuiltWheels,
|
CacheBucket::BuiltWheels,
|
||||||
WheelCache::Git(&git_source_dist.url, &git_sha.to_short_string())
|
WheelCache::Git(&git_source_dist.url, &git_sha.to_short_string())
|
||||||
.remote_wheel_dir(git_source_dist.name().as_ref()),
|
.remote_wheel_dir(git_source_dist.name().as_ref()),
|
||||||
METADATA_JSON.to_string(),
|
METADATA.to_string(),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Read the existing metadata from the cache.
|
// Read the existing metadata from the cache.
|
||||||
|
@ -540,7 +543,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
metadata: metadata.clone(),
|
metadata: metadata.clone(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
let data = serde_json::to_vec(&manifest)?;
|
let data = rmp_serde::to_vec(&manifest)?;
|
||||||
write_atomic(cache_entry.path(), data).await?;
|
write_atomic(cache_entry.path(), data).await?;
|
||||||
|
|
||||||
if let Some(task) = task {
|
if let Some(task) = task {
|
||||||
|
@ -707,7 +710,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
) -> Result<Option<Manifest>, SourceDistError> {
|
) -> Result<Option<Manifest>, SourceDistError> {
|
||||||
match fs::read(&cache_entry.path()).await {
|
match fs::read(&cache_entry.path()).await {
|
||||||
Ok(cached) => {
|
Ok(cached) => {
|
||||||
let cached = serde_json::from_slice::<CachedByTimestamp<Manifest>>(&cached)?;
|
let cached = rmp_serde::from_slice::<CachedByTimestamp<Manifest>>(&cached)?;
|
||||||
if cached.timestamp == modified {
|
if cached.timestamp == modified {
|
||||||
Ok(Some(cached.data))
|
Ok(Some(cached.data))
|
||||||
} else {
|
} else {
|
||||||
|
@ -729,7 +732,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
/// Read an existing cache entry, if it exists.
|
/// Read an existing cache entry, if it exists.
|
||||||
async fn read_metadata(cache_entry: &CacheEntry) -> Result<Option<Manifest>, SourceDistError> {
|
async fn read_metadata(cache_entry: &CacheEntry) -> Result<Option<Manifest>, SourceDistError> {
|
||||||
match fs::read(&cache_entry.path()).await {
|
match fs::read(&cache_entry.path()).await {
|
||||||
Ok(cached) => Ok(Some(serde_json::from_slice::<Manifest>(&cached)?)),
|
Ok(cached) => Ok(Some(rmp_serde::from_slice::<Manifest>(&cached)?)),
|
||||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
||||||
Err(err) => Err(err.into()),
|
Err(err) => Err(err.into()),
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,11 +20,12 @@ puffin-cache = { path = "../puffin-cache" }
|
||||||
puffin-fs = { path = "../puffin-fs" }
|
puffin-fs = { path = "../puffin-fs" }
|
||||||
|
|
||||||
fs-err = { workspace = true, features = ["tokio"] }
|
fs-err = { workspace = true, features = ["tokio"] }
|
||||||
|
rmp-serde = { workspace = true }
|
||||||
|
serde = { workspace = true, features = ["derive"] }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
serde = { workspace = true, features = ["derive"] }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
indoc = { version = "2.0.4" }
|
indoc = { version = "2.0.4" }
|
||||||
|
|
|
@ -3,7 +3,7 @@ use std::process::Command;
|
||||||
|
|
||||||
use fs_err as fs;
|
use fs_err as fs;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tracing::debug;
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
use pep440_rs::Version;
|
use pep440_rs::Version;
|
||||||
use pep508_rs::MarkerEnvironment;
|
use pep508_rs::MarkerEnvironment;
|
||||||
|
@ -133,9 +133,8 @@ impl InterpreterQueryResult {
|
||||||
let data = serde_json::from_slice::<Self>(&output.stdout).map_err(|err| {
|
let data = serde_json::from_slice::<Self>(&output.stdout).map_err(|err| {
|
||||||
Error::PythonSubcommandOutput {
|
Error::PythonSubcommandOutput {
|
||||||
message: format!(
|
message: format!(
|
||||||
"Querying python at {} did not return the expected data: {}",
|
"Querying python at {} did not return the expected data: {err}",
|
||||||
interpreter.display(),
|
interpreter.display(),
|
||||||
err,
|
|
||||||
),
|
),
|
||||||
stdout: String::from_utf8_lossy(&output.stdout).trim().to_string(),
|
stdout: String::from_utf8_lossy(&output.stdout).trim().to_string(),
|
||||||
stderr: String::from_utf8_lossy(&output.stderr).trim().to_string(),
|
stderr: String::from_utf8_lossy(&output.stderr).trim().to_string(),
|
||||||
|
@ -155,7 +154,7 @@ impl InterpreterQueryResult {
|
||||||
let cache_entry = cache.entry(
|
let cache_entry = cache.entry(
|
||||||
CacheBucket::Interpreter,
|
CacheBucket::Interpreter,
|
||||||
"",
|
"",
|
||||||
format!("{}.json", digest(&executable_bytes)),
|
format!("{}.msgpack", digest(&executable_bytes)),
|
||||||
);
|
);
|
||||||
|
|
||||||
// `modified()` is infallible on windows and unix (i.e., all platforms we support).
|
// `modified()` is infallible on windows and unix (i.e., all platforms we support).
|
||||||
|
@ -163,16 +162,25 @@ impl InterpreterQueryResult {
|
||||||
|
|
||||||
// Read from the cache.
|
// Read from the cache.
|
||||||
if let Ok(data) = fs::read(cache_entry.path()) {
|
if let Ok(data) = fs::read(cache_entry.path()) {
|
||||||
if let Ok(cached) = serde_json::from_slice::<CachedByTimestamp<Self>>(&data) {
|
match rmp_serde::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||||
if cached.timestamp == modified {
|
Ok(cached) => {
|
||||||
debug!("Using cached markers for: {}", executable.display());
|
if cached.timestamp == modified {
|
||||||
return Ok(cached.data);
|
debug!("Using cached markers for: {}", executable.display());
|
||||||
}
|
return Ok(cached.data);
|
||||||
|
}
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Ignoring stale cached markers for: {}",
|
"Ignoring stale cached markers for: {}",
|
||||||
executable.display()
|
executable.display()
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
warn!(
|
||||||
|
"Broken cache entry at {}, removing: {err}",
|
||||||
|
cache_entry.path().display()
|
||||||
|
);
|
||||||
|
let _ = fs_err::remove_file(cache_entry.path());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +195,7 @@ impl InterpreterQueryResult {
|
||||||
// Write to the cache.
|
// Write to the cache.
|
||||||
write_atomic_sync(
|
write_atomic_sync(
|
||||||
cache_entry.path(),
|
cache_entry.path(),
|
||||||
serde_json::to_vec(&CachedByTimestamp {
|
rmp_serde::to_vec(&CachedByTimestamp {
|
||||||
timestamp: modified,
|
timestamp: modified,
|
||||||
data: info.clone(),
|
data: info.clone(),
|
||||||
})?,
|
})?,
|
||||||
|
|
|
@ -40,6 +40,10 @@ pub enum Error {
|
||||||
},
|
},
|
||||||
#[error("Failed to write to cache")]
|
#[error("Failed to write to cache")]
|
||||||
Serde(#[from] serde_json::Error),
|
Serde(#[from] serde_json::Error),
|
||||||
|
#[error("Cache deserialization failed")]
|
||||||
|
Decode(#[from] rmp_serde::decode::Error),
|
||||||
|
#[error("Cache serialization failed")]
|
||||||
|
Encode(#[from] rmp_serde::encode::Error),
|
||||||
#[error("Failed to parse pyvenv.cfg")]
|
#[error("Failed to parse pyvenv.cfg")]
|
||||||
Cfg(#[from] cfg::Error),
|
Cfg(#[from] cfg::Error),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue