Add zstandard support for wheels (#15645)

## Summary

This PR allows pyx to send down hashes for zstandard-compressed
tarballs. If the hash is present, then the file is assumed to be present
at `${wheel_url}.tar.zst`, similar in design to PEP 658
`${wheel_metadata}.metadata` files. The intent here is that the index
must include the wheel (to support all clients and support
random-access), but can optionally include a zstandard-compressed
version alongside it.
This commit is contained in:
Charlie Marsh 2025-09-02 21:38:31 -04:00 committed by GitHub
parent 7606f1ad3c
commit 4e48d759c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 279 additions and 29 deletions

View file

@ -20,8 +20,8 @@ use uv_client::{
};
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::{
BuildInfo, BuildableSource, BuiltDist, Dist, HashPolicy, Hashed, IndexUrl, InstalledDist, Name,
SourceDist,
BuildInfo, BuildableSource, BuiltDist, Dist, File, HashPolicy, Hashed, IndexUrl, InstalledDist,
Name, SourceDist, ToUrlError,
};
use uv_extract::hash::Hasher;
use uv_fs::write_atomic;
@ -179,7 +179,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
match dist {
BuiltDist::Registry(wheels) => {
let wheel = wheels.best_wheel();
let url = wheel.file.url.to_url()?;
let WheelTarget {
url,
extension,
size,
} = WheelTarget::try_from(&*wheel.file)?;
// Create a cache entry for the wheel.
let wheel_entry = self.build_context.cache().entry(
@ -194,7 +198,14 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.to_file_path()
.map_err(|()| Error::NonFileUrl(url.clone()))?;
return self
.load_wheel(&path, &wheel.filename, wheel_entry, dist, hashes)
.load_wheel(
&path,
&wheel.filename,
WheelExtension::Whl,
wheel_entry,
dist,
hashes,
)
.await;
}
@ -204,7 +215,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
url.clone(),
dist.index(),
&wheel.filename,
wheel.file.size,
extension,
size,
&wheel_entry,
dist,
hashes,
@ -241,7 +253,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
url,
dist.index(),
&wheel.filename,
wheel.file.size,
extension,
size,
&wheel_entry,
dist,
hashes,
@ -279,6 +292,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
wheel.url.raw().clone(),
None,
&wheel.filename,
WheelExtension::Whl,
None,
&wheel_entry,
dist,
@ -310,6 +324,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
wheel.url.raw().clone(),
None,
&wheel.filename,
WheelExtension::Whl,
None,
&wheel_entry,
dist,
@ -343,6 +358,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
self.load_wheel(
&wheel.install_path,
&wheel.filename,
WheelExtension::Whl,
cache_entry,
dist,
hashes,
@ -547,6 +563,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
url: DisplaySafeUrl,
index: Option<&IndexUrl>,
filename: &WheelFilename,
extension: WheelExtension,
size: Option<u64>,
wheel_entry: &CacheEntry,
dist: &BuiltDist,
@ -588,15 +605,31 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
match progress {
Some((reporter, progress)) => {
let mut reader = ProgressReader::new(&mut hasher, progress, &**reporter);
uv_extract::stream::unzip(&mut reader, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
None => {
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
match extension {
WheelExtension::Whl => {
uv_extract::stream::unzip(&mut reader, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
WheelExtension::WhlZst => {
uv_extract::stream::untar_zst(&mut reader, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
}
}
None => match extension {
WheelExtension::Whl => {
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
WheelExtension::WhlZst => {
uv_extract::stream::untar_zst(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
},
}
// If necessary, exhaust the reader to compute the hash.
@ -701,6 +734,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
url: DisplaySafeUrl,
index: Option<&IndexUrl>,
filename: &WheelFilename,
extension: WheelExtension,
size: Option<u64>,
wheel_entry: &CacheEntry,
dist: &BuiltDist,
@ -772,7 +806,14 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let target = temp_dir.path().to_owned();
move || -> Result<(), uv_extract::Error> {
// Unzip the wheel into a temporary directory.
uv_extract::unzip(file, &target)?;
match extension {
WheelExtension::Whl => {
uv_extract::unzip(file, &target)?;
}
WheelExtension::WhlZst => {
uv_extract::stream::untar_zst_file(file, &target)?;
}
}
Ok(())
}
})
@ -785,9 +826,19 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let algorithms = hashes.algorithms();
let mut hashers = algorithms.into_iter().map(Hasher::from).collect::<Vec<_>>();
let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers);
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
match extension {
WheelExtension::Whl => {
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
WheelExtension::WhlZst => {
uv_extract::stream::untar_zst(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
}
// If necessary, exhaust the reader to compute the hash.
hasher.finish().await.map_err(Error::HashExhaustion)?;
@ -887,6 +938,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
&self,
path: &Path,
filename: &WheelFilename,
extension: WheelExtension,
wheel_entry: CacheEntry,
dist: &BuiltDist,
hashes: HashPolicy<'_>,
@ -965,9 +1017,18 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers);
// Unzip the wheel to a temporary directory.
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
match extension {
WheelExtension::Whl => {
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
WheelExtension::WhlZst => {
uv_extract::stream::untar_zst(&mut hasher, temp_dir.path())
.await
.map_err(|err| Error::Extract(filename.to_string(), err))?;
}
}
// Exhaust the reader to compute the hash.
hasher.finish().await.map_err(Error::HashExhaustion)?;
@ -1227,3 +1288,90 @@ impl LocalArchivePointer {
None
}
}
#[derive(Debug, Clone)]
struct WheelTarget {
/// The URL from which the wheel can be downloaded.
url: DisplaySafeUrl,
/// The expected extension of the wheel file.
extension: WheelExtension,
/// The expected size of the wheel file, if known.
size: Option<u64>,
}
impl TryFrom<&File> for WheelTarget {
type Error = ToUrlError;
/// Determine the [`WheelTarget`] from a [`File`].
fn try_from(file: &File) -> Result<Self, Self::Error> {
let url = file.url.to_url()?;
if let Some(zstd) = file.zstd.as_ref() {
Ok(Self {
url: add_tar_zst_extension(url),
extension: WheelExtension::WhlZst,
size: zstd.size,
})
} else {
Ok(Self {
url,
extension: WheelExtension::Whl,
size: file.size,
})
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum WheelExtension {
/// A `.whl` file.
Whl,
/// A `.whl.tar.zst` file.
WhlZst,
}
/// Add `.tar.zst` to the end of the URL path, if it doesn't already exist.
#[must_use]
fn add_tar_zst_extension(mut url: DisplaySafeUrl) -> DisplaySafeUrl {
let mut path = url.path().to_string();
if !path.ends_with(".tar.zst") {
path.push_str(".tar.zst");
}
url.set_path(&path);
url
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_add_tar_zst_extension() {
let url =
DisplaySafeUrl::parse("https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl")
.unwrap();
assert_eq!(
add_tar_zst_extension(url).as_str(),
"https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl.tar.zst"
);
let url = DisplaySafeUrl::parse(
"https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl.tar.zst",
)
.unwrap();
assert_eq!(
add_tar_zst_extension(url).as_str(),
"https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl.tar.zst"
);
let url = DisplaySafeUrl::parse(
"https://files.pythonhosted.org/flask-3.1.0%2Bcu124-py3-none-any.whl",
)
.unwrap();
assert_eq!(
add_tar_zst_extension(url).as_str(),
"https://files.pythonhosted.org/flask-3.1.0%2Bcu124-py3-none-any.whl.tar.zst"
);
}
}