Consistently enforce requested-vs.-built metadata when retrieving wheels (#9484)

## Summary

We were missing a bunch of edge cases, e.g., the wheel exists in the
cache already.

Closes https://github.com/astral-sh/uv/issues/9480.
This commit is contained in:
Charlie Marsh 2024-11-27 16:51:01 -05:00 committed by GitHub
parent 4f2b30ca02
commit f1ccbcb065
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 354 additions and 66 deletions

View file

@ -321,6 +321,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.boxed_local()
.await?;
// Validate that the metadata is consistent with the distribution.
// If the wheel was unzipped previously, respect it. Source distributions are
// cached under a unique revision ID, so unzipped directories are never stale.
match built_wheel.target.canonicalize() {
@ -397,7 +399,10 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.await;
match result {
Ok(metadata) => Ok(ArchiveMetadata::from_metadata23(metadata)),
Ok(metadata) => {
// Validate that the metadata is consistent with the distribution.
Ok(ArchiveMetadata::from_metadata23(metadata))
}
Err(err) if err.is_http_streaming_unsupported() => {
warn!("Streaming unsupported when fetching metadata for {dist}; downloading wheel directly ({err})");
@ -461,6 +466,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.boxed_local()
.await?;
// Validate that the metadata is consistent with the distribution.
Ok(metadata)
}

View file

@ -56,12 +56,26 @@ pub enum Error {
#[error("Built wheel has an invalid filename")]
WheelFilename(#[from] WheelFilenameError),
#[error("Package metadata name `{metadata}` does not match given name `{given}`")]
NameMismatch {
WheelMetadataNameMismatch {
given: PackageName,
metadata: PackageName,
},
#[error("Package metadata version `{metadata}` does not match given version `{given}`")]
VersionMismatch { given: Version, metadata: Version },
WheelMetadataVersionMismatch { given: Version, metadata: Version },
#[error(
"Package metadata name `{metadata}` does not match `{filename}` from the wheel filename"
)]
WheelFilenameNameMismatch {
filename: PackageName,
metadata: PackageName,
},
#[error(
"Package metadata version `{metadata}` does not match `{filename}` from the wheel filename"
)]
WheelFilenameVersionMismatch {
filename: Version,
metadata: Version,
},
#[error("Failed to parse metadata from built wheel")]
Metadata(#[from] uv_pypi_types::MetadataError),
#[error("Failed to read metadata: `{}`", _0.user_display())]

View file

@ -6,6 +6,8 @@ use uv_cache_info::CacheInfo;
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::Hashed;
use uv_fs::files;
use uv_normalize::PackageName;
use uv_pep440::Version;
use uv_platform_tags::Tags;
use uv_pypi_types::HashDigest;
@ -56,6 +58,12 @@ impl BuiltWheelMetadata {
self.hashes = hashes;
self
}
/// Returns `true` if the wheel matches the given package name and version.
pub(crate) fn matches(&self, name: Option<&PackageName>, version: Option<&Version>) -> bool {
name.map_or(true, |name| self.filename.name == *name)
&& version.map_or(true, |version| self.filename.version == *version)
}
}
impl Hashed for BuiltWheelMetadata {

View file

@ -34,7 +34,8 @@ use uv_distribution_types::{
use uv_extract::hash::Hasher;
use uv_fs::{rename_with_retry, write_atomic, LockedFile};
use uv_metadata::read_archive_metadata;
use uv_pep440::release_specifiers_to_ranges;
use uv_normalize::PackageName;
use uv_pep440::{release_specifiers_to_ranges, Version};
use uv_platform_tags::Tags;
use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
use uv_types::{BuildContext, SourceBuildTrait};
@ -416,7 +417,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};
// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel.with_hashes(revision.into_hashes()));
}
@ -520,10 +523,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
metadata: Metadata::from_metadata23(metadata.into()),
hashes: revision.into_hashes(),
});
}
@ -732,7 +738,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};
// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel);
}
@ -824,10 +832,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
metadata: Metadata::from_metadata23(metadata.into()),
hashes: revision.into_hashes(),
});
}
@ -1000,7 +1011,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};
// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel);
}
@ -1095,11 +1108,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata::from(
Metadata::from_workspace(
metadata,
metadata.into(),
resource.install_path.as_ref(),
None,
self.build_context.locations(),
@ -1278,7 +1294,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};
// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel);
}
@ -1388,7 +1406,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.map_err(Error::CacheRead)?
.is_fresh()
{
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
let path = if let Some(subdirectory) = resource.subdirectory {
Cow::Owned(fetch.path().join(subdirectory))
} else {
@ -1398,7 +1419,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata::from(
Metadata::from_workspace(
metadata,
metadata.into(),
&path,
None,
self.build_context.locations(),
@ -1798,6 +1819,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await
.map_err(Error::Build)?;
// Read the metadata from the wheel.
let filename = WheelFilename::from_str(&disk_filename)?;
let metadata = read_wheel_metadata(&filename, &temp_dir.path().join(&disk_filename))?;
// Validate the metadata.
validate_metadata(source, &metadata)?;
validate_filename(&filename, &metadata)?;
// Move the wheel to the cache.
rename_with_retry(
temp_dir.path().join(&disk_filename),
@ -1806,13 +1835,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await
.map_err(Error::CacheWrite)?;
// Read the metadata from the wheel.
let filename = WheelFilename::from_str(&disk_filename)?;
let metadata = read_wheel_metadata(&filename, &cache_shard.join(&disk_filename))?;
// Validate the metadata.
validate(source, &metadata)?;
debug!("Finished building: {source}");
Ok((disk_filename, filename, metadata))
}
@ -1883,7 +1905,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let metadata = ResolutionMetadata::parse_metadata(&content)?;
// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;
Ok(Some(metadata))
}
@ -1899,7 +1921,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Found static `pyproject.toml` for: {source}");
// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;
return Ok(Some(metadata));
}
@ -1929,7 +1951,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Found static `PKG-INFO` for: {source}");
// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;
return Ok(Some(metadata));
}
@ -1953,7 +1975,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Found static `egg-info` for: {source}");
// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;
return Ok(Some(metadata));
}
@ -2065,10 +2087,13 @@ pub fn prune(cache: &Cache) -> Result<Removal, Error> {
}
/// Validate that the source distribution matches the built metadata.
fn validate(source: &BuildableSource<'_>, metadata: &ResolutionMetadata) -> Result<(), Error> {
fn validate_metadata(
source: &BuildableSource<'_>,
metadata: &ResolutionMetadata,
) -> Result<(), Error> {
if let Some(name) = source.name() {
if metadata.name != *name {
return Err(Error::NameMismatch {
return Err(Error::WheelMetadataNameMismatch {
metadata: metadata.name.clone(),
given: name.clone(),
});
@ -2077,7 +2102,7 @@ fn validate(source: &BuildableSource<'_>, metadata: &ResolutionMetadata) -> Resu
if let Some(version) = source.version() {
if metadata.version != *version {
return Err(Error::VersionMismatch {
return Err(Error::WheelMetadataVersionMismatch {
metadata: metadata.version.clone(),
given: version.clone(),
});
@ -2087,6 +2112,25 @@ fn validate(source: &BuildableSource<'_>, metadata: &ResolutionMetadata) -> Resu
Ok(())
}
/// Validate that the source distribution matches the built filename.
fn validate_filename(filename: &WheelFilename, metadata: &ResolutionMetadata) -> Result<(), Error> {
if metadata.name != filename.name {
return Err(Error::WheelFilenameNameMismatch {
metadata: metadata.name.clone(),
filename: filename.name.clone(),
});
}
if metadata.version != filename.version {
return Err(Error::WheelFilenameVersionMismatch {
metadata: metadata.version.clone(),
filename: filename.version.clone(),
});
}
Ok(())
}
/// A pointer to a source distribution revision in the cache, fetched from an HTTP archive.
///
/// Encoded with `MsgPack`, and represented on disk by a `.http` file.
@ -2335,14 +2379,30 @@ async fn read_requires_dist(project_root: &Path) -> Result<uv_pypi_types::Requir
Ok(requires_dist)
}
/// Read an existing cached [`ResolutionMetadata`], if it exists.
async fn read_cached_metadata(
cache_entry: &CacheEntry,
) -> Result<Option<ResolutionMetadata>, Error> {
match fs::read(&cache_entry.path()).await {
Ok(cached) => Ok(Some(rmp_serde::from_slice(&cached)?)),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
/// Wheel metadata stored in the source distribution cache.
#[derive(Debug, Clone)]
struct CachedMetadata(ResolutionMetadata);
impl CachedMetadata {
/// Read an existing cached [`ResolutionMetadata`], if it exists.
async fn read(cache_entry: &CacheEntry) -> Result<Option<Self>, Error> {
match fs::read(&cache_entry.path()).await {
Ok(cached) => Ok(Some(Self(rmp_serde::from_slice(&cached)?))),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
}
}
/// Returns `true` if the metadata matches the given package name and version.
fn matches(&self, name: Option<&PackageName>, version: Option<&Version>) -> bool {
name.map_or(true, |name| self.0.name == *name)
&& version.map_or(true, |version| self.0.version == *version)
}
}
impl From<CachedMetadata> for ResolutionMetadata {
fn from(value: CachedMetadata) -> Self {
value.0
}
}