diff --git a/Cargo.lock b/Cargo.lock index 6f033fa40..da013397d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5675,6 +5675,7 @@ dependencies = [ "reqwest", "rustc-hash", "sha2", + "tar", "thiserror 2.0.16", "tokio", "tokio-util", @@ -5685,6 +5686,7 @@ dependencies = [ "uv-static", "xz2", "zip", + "zstd", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index bea4616a8..ad43c0f5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -201,6 +201,7 @@ windows-sys = { version = "0.59.0", features = ["Win32_Foundation", "Win32_Secur wiremock = { version = "0.6.4" } xz2 = { version = "0.1.7" } zip = { version = "2.2.3", default-features = false, features = ["deflate", "zstd", "bzip2", "lzma", "xz"] } +zstd = { version = "0.13.3" } # dev-dependencies assert_cmd = { version = "2.0.16" } diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index a8ab1ba1d..53ff94632 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -305,6 +305,7 @@ impl<'a> FlatIndexClient<'a> { upload_time_utc_ms: None, url: FileLocation::AbsoluteUrl(UrlString::from(url)), yanked: None, + zstd: None, }; let Some(filename) = DistFilename::try_from_normalized_filename(filename) else { diff --git a/crates/uv-distribution-types/src/file.rs b/crates/uv-distribution-types/src/file.rs index 3cb2d5b30..cf014a14a 100644 --- a/crates/uv-distribution-types/src/file.rs +++ b/crates/uv-distribution-types/src/file.rs @@ -40,6 +40,7 @@ pub struct File { pub upload_time_utc_ms: Option, pub url: FileLocation, pub yanked: Option>, + pub zstd: Option>, } impl File { @@ -63,6 +64,7 @@ impl File { upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond), url: FileLocation::new(file.url, base), yanked: file.yanked, + zstd: None, }) } @@ -108,6 +110,13 @@ impl File { upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond), url: FileLocation::new(file.url, base), yanked: file.yanked, + zstd: file + .zstd + .map(|zstd| Zstd { + hashes: HashDigests::from(zstd.hashes), + size: zstd.size, + }) + .map(Box::new), }) } } @@ -289,6 +298,12 @@ pub enum ToUrlError { }, } +#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] +pub struct Zstd { + pub hashes: HashDigests, + pub size: Option, +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index df04b3845..7a158db25 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -20,8 +20,8 @@ use uv_client::{ }; use uv_distribution_filename::WheelFilename; use uv_distribution_types::{ - BuildInfo, BuildableSource, BuiltDist, Dist, HashPolicy, Hashed, IndexUrl, InstalledDist, Name, - SourceDist, + BuildInfo, BuildableSource, BuiltDist, Dist, File, HashPolicy, Hashed, IndexUrl, InstalledDist, + Name, SourceDist, ToUrlError, }; use uv_extract::hash::Hasher; use uv_fs::write_atomic; @@ -179,7 +179,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { match dist { BuiltDist::Registry(wheels) => { let wheel = wheels.best_wheel(); - let url = wheel.file.url.to_url()?; + let WheelTarget { + url, + extension, + size, + } = WheelTarget::try_from(&*wheel.file)?; // Create a cache entry for the wheel. let wheel_entry = self.build_context.cache().entry( @@ -194,7 +198,14 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { .to_file_path() .map_err(|()| Error::NonFileUrl(url.clone()))?; return self - .load_wheel(&path, &wheel.filename, wheel_entry, dist, hashes) + .load_wheel( + &path, + &wheel.filename, + WheelExtension::Whl, + wheel_entry, + dist, + hashes, + ) .await; } @@ -204,7 +215,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { url.clone(), dist.index(), &wheel.filename, - wheel.file.size, + extension, + size, &wheel_entry, dist, hashes, @@ -241,7 +253,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { url, dist.index(), &wheel.filename, - wheel.file.size, + extension, + size, &wheel_entry, dist, hashes, @@ -279,6 +292,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { wheel.url.raw().clone(), None, &wheel.filename, + WheelExtension::Whl, None, &wheel_entry, dist, @@ -310,6 +324,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { wheel.url.raw().clone(), None, &wheel.filename, + WheelExtension::Whl, None, &wheel_entry, dist, @@ -343,6 +358,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { self.load_wheel( &wheel.install_path, &wheel.filename, + WheelExtension::Whl, cache_entry, dist, hashes, @@ -547,6 +563,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { url: DisplaySafeUrl, index: Option<&IndexUrl>, filename: &WheelFilename, + extension: WheelExtension, size: Option, wheel_entry: &CacheEntry, dist: &BuiltDist, @@ -588,15 +605,31 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { match progress { Some((reporter, progress)) => { let mut reader = ProgressReader::new(&mut hasher, progress, &**reporter); - uv_extract::stream::unzip(&mut reader, temp_dir.path()) - .await - .map_err(|err| Error::Extract(filename.to_string(), err))?; - } - None => { - uv_extract::stream::unzip(&mut hasher, temp_dir.path()) - .await - .map_err(|err| Error::Extract(filename.to_string(), err))?; + match extension { + WheelExtension::Whl => { + uv_extract::stream::unzip(&mut reader, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + WheelExtension::WhlZst => { + uv_extract::stream::untar_zst(&mut reader, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + } } + None => match extension { + WheelExtension::Whl => { + uv_extract::stream::unzip(&mut hasher, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + WheelExtension::WhlZst => { + uv_extract::stream::untar_zst(&mut hasher, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + }, } // If necessary, exhaust the reader to compute the hash. @@ -701,6 +734,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { url: DisplaySafeUrl, index: Option<&IndexUrl>, filename: &WheelFilename, + extension: WheelExtension, size: Option, wheel_entry: &CacheEntry, dist: &BuiltDist, @@ -772,7 +806,14 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let target = temp_dir.path().to_owned(); move || -> Result<(), uv_extract::Error> { // Unzip the wheel into a temporary directory. - uv_extract::unzip(file, &target)?; + match extension { + WheelExtension::Whl => { + uv_extract::unzip(file, &target)?; + } + WheelExtension::WhlZst => { + uv_extract::stream::untar_zst_file(file, &target)?; + } + } Ok(()) } }) @@ -785,9 +826,19 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let algorithms = hashes.algorithms(); let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers); - uv_extract::stream::unzip(&mut hasher, temp_dir.path()) - .await - .map_err(|err| Error::Extract(filename.to_string(), err))?; + + match extension { + WheelExtension::Whl => { + uv_extract::stream::unzip(&mut hasher, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + WheelExtension::WhlZst => { + uv_extract::stream::untar_zst(&mut hasher, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + } // If necessary, exhaust the reader to compute the hash. hasher.finish().await.map_err(Error::HashExhaustion)?; @@ -887,6 +938,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { &self, path: &Path, filename: &WheelFilename, + extension: WheelExtension, wheel_entry: CacheEntry, dist: &BuiltDist, hashes: HashPolicy<'_>, @@ -965,9 +1017,18 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers); // Unzip the wheel to a temporary directory. - uv_extract::stream::unzip(&mut hasher, temp_dir.path()) - .await - .map_err(|err| Error::Extract(filename.to_string(), err))?; + match extension { + WheelExtension::Whl => { + uv_extract::stream::unzip(&mut hasher, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + WheelExtension::WhlZst => { + uv_extract::stream::untar_zst(&mut hasher, temp_dir.path()) + .await + .map_err(|err| Error::Extract(filename.to_string(), err))?; + } + } // Exhaust the reader to compute the hash. hasher.finish().await.map_err(Error::HashExhaustion)?; @@ -1227,3 +1288,90 @@ impl LocalArchivePointer { None } } + +#[derive(Debug, Clone)] +struct WheelTarget { + /// The URL from which the wheel can be downloaded. + url: DisplaySafeUrl, + /// The expected extension of the wheel file. + extension: WheelExtension, + /// The expected size of the wheel file, if known. + size: Option, +} + +impl TryFrom<&File> for WheelTarget { + type Error = ToUrlError; + + /// Determine the [`WheelTarget`] from a [`File`]. + fn try_from(file: &File) -> Result { + let url = file.url.to_url()?; + if let Some(zstd) = file.zstd.as_ref() { + Ok(Self { + url: add_tar_zst_extension(url), + extension: WheelExtension::WhlZst, + size: zstd.size, + }) + } else { + Ok(Self { + url, + extension: WheelExtension::Whl, + size: file.size, + }) + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum WheelExtension { + /// A `.whl` file. + Whl, + /// A `.whl.tar.zst` file. + WhlZst, +} + +/// Add `.tar.zst` to the end of the URL path, if it doesn't already exist. +#[must_use] +fn add_tar_zst_extension(mut url: DisplaySafeUrl) -> DisplaySafeUrl { + let mut path = url.path().to_string(); + + if !path.ends_with(".tar.zst") { + path.push_str(".tar.zst"); + } + + url.set_path(&path); + url +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_tar_zst_extension() { + let url = + DisplaySafeUrl::parse("https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl") + .unwrap(); + assert_eq!( + add_tar_zst_extension(url).as_str(), + "https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl.tar.zst" + ); + + let url = DisplaySafeUrl::parse( + "https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl.tar.zst", + ) + .unwrap(); + assert_eq!( + add_tar_zst_extension(url).as_str(), + "https://files.pythonhosted.org/flask-3.1.0-py3-none-any.whl.tar.zst" + ); + + let url = DisplaySafeUrl::parse( + "https://files.pythonhosted.org/flask-3.1.0%2Bcu124-py3-none-any.whl", + ) + .unwrap(); + assert_eq!( + add_tar_zst_extension(url).as_str(), + "https://files.pythonhosted.org/flask-3.1.0%2Bcu124-py3-none-any.whl.tar.zst" + ); + } +} diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index f75c540c9..acbd50927 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -32,12 +32,14 @@ rayon = { workspace = true } reqwest = { workspace = true } rustc-hash = { workspace = true } sha2 = { workspace = true } +tar = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true, features = ["compat"] } tracing = { workspace = true } xz2 = { workspace = true } zip = { workspace = true } +zstd = { workspace = true } [features] default = [] diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 45a775c39..2c4a9131c 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -686,6 +686,16 @@ pub async fn untar_zst( .map_err(Error::io_or_compression) } +/// Unpack a `.tar.zst` archive from a file on disk into the target directory. +pub fn untar_zst_file(reader: R, target: impl AsRef) -> Result<(), Error> { + let reader = std::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader); + let decompressed = zstd::Decoder::new(reader).map_err(Error::Io)?; + let mut archive = tar::Archive::new(decompressed); + archive.set_preserve_mtime(false); + archive.unpack(target).map_err(Error::io_or_compression)?; + Ok(()) +} + /// Unpack a `.tar.xz` archive into the target directory, without requiring `Seek`. /// /// This is useful for unpacking files as they're being downloaded. diff --git a/crates/uv-pypi-types/src/simple_json.rs b/crates/uv-pypi-types/src/simple_json.rs index 77bd1025d..92f72de1a 100644 --- a/crates/uv-pypi-types/src/simple_json.rs +++ b/crates/uv-pypi-types/src/simple_json.rs @@ -150,6 +150,7 @@ pub struct PyxFile { pub upload_time: Option, pub url: SmallString, pub yanked: Option>, + pub zstd: Option, } impl<'de> Deserialize<'de> for PyxFile { @@ -178,6 +179,7 @@ impl<'de> Deserialize<'de> for PyxFile { let mut upload_time = None; let mut url = None; let mut yanked = None; + let mut zstd = None; while let Some(key) = access.next_key::()? { match key.as_str() { @@ -201,6 +203,9 @@ impl<'de> Deserialize<'de> for PyxFile { "upload-time" => upload_time = Some(access.next_value()?), "url" => url = Some(access.next_value()?), "yanked" => yanked = Some(access.next_value()?), + "zstd" => { + zstd = Some(access.next_value()?); + } _ => { let _: serde::de::IgnoredAny = access.next_value()?; } @@ -216,6 +221,7 @@ impl<'de> Deserialize<'de> for PyxFile { upload_time, url: url.ok_or_else(|| serde::de::Error::missing_field("url"))?, yanked, + zstd, }) } } @@ -320,6 +326,13 @@ impl Default for Yanked { } } +#[derive(Debug, Clone, Eq, PartialEq, Default, Deserialize, Serialize)] +pub struct Zstd { + pub hashes: Hashes, + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, +} + /// A dictionary mapping a hash name to a hex encoded digest of the file. /// /// PEP 691 says multiple hashes can be included and the interpretation is left to the client. diff --git a/crates/uv-resolver/src/lock/export/pylock_toml.rs b/crates/uv-resolver/src/lock/export/pylock_toml.rs index 6f0225ae8..828c4c5fd 100644 --- a/crates/uv-resolver/src/lock/export/pylock_toml.rs +++ b/crates/uv-resolver/src/lock/export/pylock_toml.rs @@ -1369,6 +1369,7 @@ impl PylockTomlWheel { upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond), url: FileLocation::AbsoluteUrl(file_url), yanked: None, + zstd: None, }); Ok(RegistryBuiltWheel { @@ -1525,6 +1526,7 @@ impl PylockTomlSdist { upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond), url: FileLocation::AbsoluteUrl(file_url), yanked: None, + zstd: None, }); Ok(RegistrySourceDist { diff --git a/crates/uv-resolver/src/lock/mod.rs b/crates/uv-resolver/src/lock/mod.rs index 7f5d37818..2516fccd9 100644 --- a/crates/uv-resolver/src/lock/mod.rs +++ b/crates/uv-resolver/src/lock/mod.rs @@ -2754,6 +2754,7 @@ impl Package { upload_time_utc_ms: sdist.upload_time().map(Timestamp::as_millisecond), url: FileLocation::AbsoluteUrl(file_url.clone()), yanked: None, + zstd: None, }); let index = IndexUrl::from(VerbatimUrl::from_url( @@ -2828,6 +2829,7 @@ impl Package { upload_time_utc_ms: sdist.upload_time().map(Timestamp::as_millisecond), url: file_url, yanked: None, + zstd: None, }); let index = IndexUrl::from( @@ -3076,6 +3078,9 @@ impl Package { } for wheel in &self.wheels { hashes.extend(wheel.hash.as_ref().map(|h| h.0.clone())); + if let Some(zstd) = wheel.zstd.as_ref() { + hashes.extend(zstd.hash.as_ref().map(|h| h.0.clone())); + } } HashDigests::from(hashes) } @@ -3648,6 +3653,14 @@ impl Source { } table.insert("source", value(source_table)); } + + /// Check if a package is local by examining its source. + pub(crate) fn is_local(&self) -> bool { + matches!( + self, + Self::Path(_) | Self::Directory(_) | Self::Editable(_) | Self::Virtual(_) + ) + } } impl Display for Source { @@ -3696,14 +3709,6 @@ impl Source { } } } - - /// Check if a package is local by examining its source. - pub(crate) fn is_local(&self) -> bool { - matches!( - self, - Self::Path(_) | Self::Directory(_) | Self::Editable(_) | Self::Virtual(_) - ) - } } #[derive(Clone, Debug, serde::Deserialize)] @@ -4315,6 +4320,12 @@ fn locked_git_url(git_dist: &GitSourceDist) -> DisplaySafeUrl { url } +#[derive(Clone, Debug, serde::Deserialize, PartialEq, Eq)] +struct ZstdWheel { + hash: Option, + size: Option, +} + /// Inspired by: #[derive(Clone, Debug, serde::Deserialize, PartialEq, Eq)] #[serde(try_from = "WheelWire")] @@ -4345,6 +4356,8 @@ struct Wheel { /// deserialization time. Not being able to extract a wheel filename from a /// wheel URL is thus a deserialization error. filename: WheelFilename, + /// The zstandard-compressed wheel metadata, if any. + zstd: Option, } impl Wheel { @@ -4453,12 +4466,17 @@ impl Wheel { .map(Timestamp::from_millisecond) .transpose() .map_err(LockErrorKind::InvalidTimestamp)?; + let zstd = wheel.file.zstd.as_ref().map(|zstd| ZstdWheel { + hash: zstd.hashes.iter().max().cloned().map(Hash::from), + size: zstd.size, + }); Ok(Self { url, hash, size, upload_time, filename, + zstd, }) } @@ -4471,6 +4489,7 @@ impl Wheel { size: None, upload_time: None, filename: direct_dist.filename.clone(), + zstd: None, } } @@ -4483,6 +4502,7 @@ impl Wheel { size: None, upload_time: None, filename: path_dist.filename.clone(), + zstd: None, } } @@ -4516,6 +4536,14 @@ impl Wheel { upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond), url: file_location, yanked: None, + zstd: self + .zstd + .as_ref() + .map(|zstd| uv_distribution_types::Zstd { + hashes: zstd.hash.iter().map(|h| h.0.clone()).collect(), + size: zstd.size, + }) + .map(Box::new), }); let index = IndexUrl::from(VerbatimUrl::from_url( url.to_url().map_err(LockErrorKind::InvalidUrl)?, @@ -4558,6 +4586,14 @@ impl Wheel { upload_time_utc_ms: self.upload_time.map(Timestamp::as_millisecond), url: file_location, yanked: None, + zstd: self + .zstd + .as_ref() + .map(|zstd| uv_distribution_types::Zstd { + hashes: zstd.hash.iter().map(|h| h.0.clone()).collect(), + size: zstd.size, + }) + .map(Box::new), }); let index = IndexUrl::from( VerbatimUrl::from_absolute_path(root.join(index_path)) @@ -4593,6 +4629,9 @@ struct WheelWire { /// This is only present for wheels that come from registries. #[serde(alias = "upload_time")] upload_time: Option, + /// The zstandard-compressed wheel metadata, if any. + #[serde(alias = "zstd")] + zstd: Option, } #[derive(Clone, Debug, serde::Deserialize, PartialEq, Eq)] @@ -4648,6 +4687,19 @@ impl Wheel { if let Some(upload_time) = self.upload_time { table.insert("upload-time", Value::from(upload_time.to_string())); } + if let Some(zstd) = &self.zstd { + let mut inner = InlineTable::new(); + if let Some(ref hash) = zstd.hash { + inner.insert("hash", Value::from(hash.to_string())); + } + if let Some(size) = zstd.size { + inner.insert( + "size", + toml_edit::ser::ValueSerializer::new().serialize_u64(size)?, + ); + } + table.insert("zstd", Value::from(inner)); + } Ok(table) } } @@ -4682,6 +4734,7 @@ impl TryFrom for Wheel { hash: wire.hash, size: wire.size, upload_time: wire.upload_time, + zstd: wire.zstd, filename, }) } diff --git a/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_missing.snap b/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_missing.snap index 1e0f3ef0f..b5086442b 100644 --- a/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_missing.snap +++ b/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_missing.snap @@ -83,6 +83,7 @@ Ok( }, }, }, + zstd: None, }, ], fork_markers: [], diff --git a/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_present.snap b/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_present.snap index 123c3521b..26417647e 100644 --- a/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_present.snap +++ b/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_optional_present.snap @@ -90,6 +90,7 @@ Ok( }, }, }, + zstd: None, }, ], fork_markers: [], diff --git a/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_required_present.snap b/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_required_present.snap index 3c7d13be1..8d7476c02 100644 --- a/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_required_present.snap +++ b/crates/uv-resolver/src/lock/snapshots/uv_resolver__lock__tests__hash_required_present.snap @@ -86,6 +86,7 @@ Ok( }, }, }, + zstd: None, }, ], fork_markers: [],