mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 02:48:17 +00:00
Use absolute urls in distribution_type::File
(#917)
Previously, the url on file could either be a relative or an absolute url, depending on the index, and we would finalize it lazily. Now we finalize the url when converting `pypi_types::File` to `distribution_types::File`. This change is required to make the hashes on `File` optional (https://github.com/astral-sh/puffin/pull/910), which are currently the only unique field usable for caching.
This commit is contained in:
parent
6e18e56789
commit
a99e5e00f2
4 changed files with 41 additions and 41 deletions
|
@ -1,8 +1,19 @@
|
|||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use url::Url;
|
||||
|
||||
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
||||
use pypi_types::{DistInfoMetadata, Hashes, Yanked};
|
||||
use pypi_types::{BaseUrl, DistInfoMetadata, Hashes, Yanked};
|
||||
|
||||
/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
|
||||
#[derive(Debug, Error)]
|
||||
pub enum FileConversionError {
|
||||
#[error("Invalid 'requires-python' value")]
|
||||
VersionSpecifiersParseError(#[from] VersionSpecifiersParseError),
|
||||
#[error("Failed to parse URL: {0}")]
|
||||
Url(String, #[source] url::ParseError),
|
||||
}
|
||||
|
||||
/// Internal analog to [`pypi_types::File`].
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
@ -13,15 +24,13 @@ pub struct File {
|
|||
pub requires_python: Option<VersionSpecifiers>,
|
||||
pub size: Option<u64>,
|
||||
pub upload_time: Option<DateTime<Utc>>,
|
||||
pub url: String,
|
||||
pub url: Url,
|
||||
pub yanked: Option<Yanked>,
|
||||
}
|
||||
|
||||
impl TryFrom<pypi_types::File> for File {
|
||||
type Error = VersionSpecifiersParseError;
|
||||
|
||||
impl File {
|
||||
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
|
||||
fn try_from(file: pypi_types::File) -> Result<Self, Self::Error> {
|
||||
pub fn try_from(file: pypi_types::File, base: &BaseUrl) -> Result<Self, FileConversionError> {
|
||||
Ok(Self {
|
||||
dist_info_metadata: file.dist_info_metadata,
|
||||
filename: file.filename,
|
||||
|
@ -29,7 +38,9 @@ impl TryFrom<pypi_types::File> for File {
|
|||
requires_python: file.requires_python.transpose()?,
|
||||
size: file.size,
|
||||
upload_time: file.upload_time,
|
||||
url: file.url,
|
||||
url: base
|
||||
.join_relative(&file.url)
|
||||
.map_err(|err| FileConversionError::Url(file.url.clone(), err))?,
|
||||
yanked: file.yanked,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -171,15 +171,16 @@ impl RegistryClient {
|
|||
let bytes = response.bytes().await?;
|
||||
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
|
||||
.map_err(|err| Error::from_json_err(err, url.clone()))?;
|
||||
let metadata = SimpleMetadata::from_files(data.files, package_name);
|
||||
let base = BaseUrl::from(url.clone());
|
||||
let metadata =
|
||||
SimpleMetadata::from_files(data.files, package_name, &base);
|
||||
Ok((base, metadata))
|
||||
}
|
||||
MediaType::Html => {
|
||||
let text = response.text().await?;
|
||||
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
|
||||
.map_err(|err| Error::from_html_err(err, url.clone()))?;
|
||||
let metadata = SimpleMetadata::from_files(files, package_name);
|
||||
let metadata = SimpleMetadata::from_files(files, package_name, &base);
|
||||
Ok((base, metadata))
|
||||
}
|
||||
}
|
||||
|
@ -217,7 +218,7 @@ impl RegistryClient {
|
|||
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata21, Error> {
|
||||
let metadata = match &built_dist {
|
||||
BuiltDist::Registry(wheel) => {
|
||||
self.wheel_metadata_registry(&wheel.index, &wheel.base, &wheel.file)
|
||||
self.wheel_metadata_registry(&wheel.index, &wheel.file)
|
||||
.await?
|
||||
}
|
||||
BuiltDist::DirectUrl(wheel) => {
|
||||
|
@ -248,7 +249,6 @@ impl RegistryClient {
|
|||
async fn wheel_metadata_registry(
|
||||
&self,
|
||||
index: &IndexUrl,
|
||||
base: &BaseUrl,
|
||||
file: &File,
|
||||
) -> Result<Metadata21, Error> {
|
||||
if self.index_urls.no_index() {
|
||||
|
@ -256,14 +256,13 @@ impl RegistryClient {
|
|||
}
|
||||
|
||||
// If the metadata file is available at its own url (PEP 658), download it from there.
|
||||
let url = base.join_relative(&file.url)?;
|
||||
let filename = WheelFilename::from_str(&file.filename)?;
|
||||
if file
|
||||
.dist_info_metadata
|
||||
.as_ref()
|
||||
.is_some_and(pypi_types::DistInfoMetadata::is_available)
|
||||
{
|
||||
let url = Url::parse(&format!("{url}.metadata"))?;
|
||||
let url = Url::parse(&format!("{}.metadata", file.url))?;
|
||||
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::Wheels,
|
||||
|
@ -287,7 +286,7 @@ impl RegistryClient {
|
|||
// If we lack PEP 658 support, try using HTTP range requests to read only the
|
||||
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
|
||||
// into the cache and read from there
|
||||
self.wheel_metadata_no_pep658(&filename, &url, WheelCache::Index(index))
|
||||
self.wheel_metadata_no_pep658(&filename, &file.url, WheelCache::Index(index))
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
@ -452,7 +451,11 @@ impl SimpleMetadata {
|
|||
self.0.iter()
|
||||
}
|
||||
|
||||
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName) -> Self {
|
||||
fn from_files(
|
||||
files: Vec<pypi_types::File>,
|
||||
package_name: &PackageName,
|
||||
base: &BaseUrl,
|
||||
) -> Self {
|
||||
let mut metadata = Self::default();
|
||||
|
||||
// Group the distributions by version and kind
|
||||
|
@ -465,7 +468,7 @@ impl SimpleMetadata {
|
|||
DistFilename::WheelFilename(ref inner) => &inner.version,
|
||||
};
|
||||
|
||||
let file = match file.try_into() {
|
||||
let file = match File::try_from(file, base) {
|
||||
Ok(file) => file,
|
||||
Err(err) => {
|
||||
// Ignore files with unparseable version specifiers.
|
||||
|
@ -526,9 +529,10 @@ impl MediaType {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
use url::Url;
|
||||
|
||||
use puffin_normalize::PackageName;
|
||||
use pypi_types::SimpleJson;
|
||||
use pypi_types::{BaseUrl, SimpleJson};
|
||||
|
||||
use crate::SimpleMetadata;
|
||||
|
||||
|
@ -568,8 +572,12 @@ mod tests {
|
|||
}
|
||||
"#;
|
||||
let data: SimpleJson = serde_json::from_str(response).unwrap();
|
||||
let simple_metadata =
|
||||
SimpleMetadata::from_files(data.files, &PackageName::from_str("pyflyby").unwrap());
|
||||
let base = BaseUrl::from(Url::from_str("https://pypi.org/simple/pyflyby/").unwrap());
|
||||
let simple_metadata = SimpleMetadata::from_files(
|
||||
data.files,
|
||||
&PackageName::from_str("pyflyby").unwrap(),
|
||||
&base,
|
||||
);
|
||||
let versions: Vec<String> = simple_metadata
|
||||
.iter()
|
||||
.map(|(version, _)| version.to_string())
|
||||
|
|
|
@ -108,11 +108,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
) -> Result<LocalWheel, DistributionDatabaseError> {
|
||||
match &dist {
|
||||
Dist::Built(BuiltDist::Registry(wheel)) => {
|
||||
let url = wheel
|
||||
.base
|
||||
.join_relative(&wheel.file.url)
|
||||
.map_err(|err| DistributionDatabaseError::Url(wheel.file.url.clone(), err))?;
|
||||
|
||||
// Download and unzip on the same tokio task.
|
||||
//
|
||||
// In all wheels we've seen so far, unzipping while downloading is
|
||||
|
@ -128,7 +123,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
// for downloading and unzipping (with a buffer in between) and switch
|
||||
// to rayon if this buffer grows large by the time the file is fully
|
||||
// downloaded.
|
||||
let reader = self.client.stream_external(&url).await?;
|
||||
let reader = self.client.stream_external(&wheel.file.url).await?;
|
||||
|
||||
// Download and unzip the wheel to a temporary directory.
|
||||
let temp_dir = tempfile::tempdir_in(self.cache.root())?;
|
||||
|
|
|
@ -98,13 +98,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.await?
|
||||
}
|
||||
SourceDist::Registry(registry_source_dist) => {
|
||||
let url = registry_source_dist
|
||||
.base
|
||||
.join_relative(®istry_source_dist.file.url)
|
||||
.map_err(|err| {
|
||||
SourceDistError::UrlParse(registry_source_dist.file.url.clone(), err)
|
||||
})?;
|
||||
|
||||
// For registry source distributions, shard by package, then by SHA.
|
||||
// Ex) `pypi/requests/a673187abc19fe6c`
|
||||
let cache_shard = self.build_context.cache().shard(
|
||||
|
@ -117,7 +110,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
self.url(
|
||||
source_dist,
|
||||
®istry_source_dist.file.filename,
|
||||
&url,
|
||||
®istry_source_dist.file.url,
|
||||
&cache_shard,
|
||||
None,
|
||||
)
|
||||
|
@ -161,13 +154,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.await?
|
||||
}
|
||||
SourceDist::Registry(registry_source_dist) => {
|
||||
let url = registry_source_dist
|
||||
.base
|
||||
.join_relative(®istry_source_dist.file.url)
|
||||
.map_err(|err| {
|
||||
SourceDistError::UrlParse(registry_source_dist.file.url.clone(), err)
|
||||
})?;
|
||||
|
||||
// For registry source distributions, shard by package, then by SHA.
|
||||
// Ex) `pypi/requests/a673187abc19fe6c`
|
||||
let cache_shard = self.build_context.cache().shard(
|
||||
|
@ -180,7 +166,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
self.url_metadata(
|
||||
source_dist,
|
||||
®istry_source_dist.file.filename,
|
||||
&url,
|
||||
®istry_source_dist.file.url,
|
||||
&cache_shard,
|
||||
None,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue