Add zstandard support for wheels (#15645)

## Summary

This PR allows pyx to send down hashes for zstandard-compressed
tarballs. If the hash is present, then the file is assumed to be present
at `${wheel_url}.tar.zst`, similar in design to PEP 658
`${wheel_metadata}.metadata` files. The intent here is that the index
must include the wheel (to support all clients and support
random-access), but can optionally include a zstandard-compressed
version alongside it.
This commit is contained in:
Charlie Marsh 2025-09-02 21:38:31 -04:00 committed by GitHub
parent 7606f1ad3c
commit 4e48d759c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 279 additions and 29 deletions

View file

@ -1369,6 +1369,7 @@ impl PylockTomlWheel {
upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond),
url: FileLocation::AbsoluteUrl(file_url),
yanked: None,
zstd: None,
});
Ok(RegistryBuiltWheel {
@ -1525,6 +1526,7 @@ impl PylockTomlSdist {
upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond),
url: FileLocation::AbsoluteUrl(file_url),
yanked: None,
zstd: None,
});
Ok(RegistrySourceDist {

View file

@ -2754,6 +2754,7 @@ impl Package {
upload_time_utc_ms: sdist.upload_time().map(Timestamp::as_millisecond),
url: FileLocation::AbsoluteUrl(file_url.clone()),
yanked: None,
zstd: None,
});
let index = IndexUrl::from(VerbatimUrl::from_url(
@ -2828,6 +2829,7 @@ impl Package {
upload_time_utc_ms: sdist.upload_time().map(Timestamp::as_millisecond),
url: file_url,
yanked: None,
zstd: None,
});
let index = IndexUrl::from(
@ -3076,6 +3078,9 @@ impl Package {
}
for wheel in &self.wheels {
hashes.extend(wheel.hash.as_ref().map(|h| h.0.clone()));
if let Some(zstd) = wheel.zstd.as_ref() {
hashes.extend(zstd.hash.as_ref().map(|h| h.0.clone()));
}
}
HashDigests::from(hashes)
}
@ -3648,6 +3653,14 @@ impl Source {
}
table.insert("source", value(source_table));
}
/// Check if a package is local by examining its source.
pub(crate) fn is_local(&self) -> bool {
matches!(
self,
Self::Path(_) | Self::Directory(_) | Self::Editable(_) | Self::Virtual(_)
)
}
}
impl Display for Source {
@ -3696,14 +3709,6 @@ impl Source {
}
}
}
/// Check if a package is local by examining its source.
pub(crate) fn is_local(&self) -> bool {
matches!(
self,
Self::Path(_) | Self::Directory(_) | Self::Editable(_) | Self::Virtual(_)
)
}
}
#[derive(Clone, Debug, serde::Deserialize)]
@ -4315,6 +4320,12 @@ fn locked_git_url(git_dist: &GitSourceDist) -> DisplaySafeUrl {
url
}
#[derive(Clone, Debug, serde::Deserialize, PartialEq, Eq)]
struct ZstdWheel {
hash: Option<Hash>,
size: Option<u64>,
}
/// Inspired by: <https://discuss.python.org/t/lock-files-again-but-this-time-w-sdists/46593>
#[derive(Clone, Debug, serde::Deserialize, PartialEq, Eq)]
#[serde(try_from = "WheelWire")]
@ -4345,6 +4356,8 @@ struct Wheel {
/// deserialization time. Not being able to extract a wheel filename from a
/// wheel URL is thus a deserialization error.
filename: WheelFilename,
/// The zstandard-compressed wheel metadata, if any.
zstd: Option<ZstdWheel>,
}
impl Wheel {
@ -4453,12 +4466,17 @@ impl Wheel {
.map(Timestamp::from_millisecond)
.transpose()
.map_err(LockErrorKind::InvalidTimestamp)?;
let zstd = wheel.file.zstd.as_ref().map(|zstd| ZstdWheel {
hash: zstd.hashes.iter().max().cloned().map(Hash::from),
size: zstd.size,
});
Ok(Self {
url,
hash,
size,
upload_time,
filename,
zstd,
})
}
@ -4471,6 +4489,7 @@ impl Wheel {
size: None,
upload_time: None,
filename: direct_dist.filename.clone(),
zstd: None,
}
}
@ -4483,6 +4502,7 @@ impl Wheel {
size: None,
upload_time: None,
filename: path_dist.filename.clone(),
zstd: None,
}
}
@ -4516,6 +4536,14 @@ impl Wheel {
upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond),
url: file_location,
yanked: None,
zstd: self
.zstd
.as_ref()
.map(|zstd| uv_distribution_types::Zstd {
hashes: zstd.hash.iter().map(|h| h.0.clone()).collect(),
size: zstd.size,
})
.map(Box::new),
});
let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
@ -4558,6 +4586,14 @@ impl Wheel {
upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond),
url: file_location,
yanked: None,
zstd: self
.zstd
.as_ref()
.map(|zstd| uv_distribution_types::Zstd {
hashes: zstd.hash.iter().map(|h| h.0.clone()).collect(),
size: zstd.size,
})
.map(Box::new),
});
let index = IndexUrl::from(
VerbatimUrl::from_absolute_path(root.join(index_path))
@ -4593,6 +4629,9 @@ struct WheelWire {
/// This is only present for wheels that come from registries.
#[serde(alias = "upload_time")]
upload_time: Option<Timestamp>,
/// The zstandard-compressed wheel metadata, if any.
#[serde(alias = "zstd")]
zstd: Option<ZstdWheel>,
}
#[derive(Clone, Debug, serde::Deserialize, PartialEq, Eq)]
@ -4648,6 +4687,19 @@ impl Wheel {
if let Some(upload_time) = self.upload_time {
table.insert("upload-time", Value::from(upload_time.to_string()));
}
if let Some(zstd) = &self.zstd {
let mut inner = InlineTable::new();
if let Some(ref hash) = zstd.hash {
inner.insert("hash", Value::from(hash.to_string()));
}
if let Some(size) = zstd.size {
inner.insert(
"size",
toml_edit::ser::ValueSerializer::new().serialize_u64(size)?,
);
}
table.insert("zstd", Value::from(inner));
}
Ok(table)
}
}
@ -4682,6 +4734,7 @@ impl TryFrom<WheelWire> for Wheel {
hash: wire.hash,
size: wire.size,
upload_time: wire.upload_time,
zstd: wire.zstd,
filename,
})
}

View file

@ -83,6 +83,7 @@ Ok(
},
},
},
zstd: None,
},
],
fork_markers: [],

View file

@ -90,6 +90,7 @@ Ok(
},
},
},
zstd: None,
},
],
fork_markers: [],

View file

@ -86,6 +86,7 @@ Ok(
},
},
},
zstd: None,
},
],
fork_markers: [],