mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-01 20:31:12 +00:00
Use absolute urls in distribution_type::File (#917)
Previously, the url on file could either be a relative or an absolute url, depending on the index, and we would finalize it lazily. Now we finalize the url when converting `pypi_types::File` to `distribution_types::File`. This change is required to make the hashes on `File` optional (https://github.com/astral-sh/puffin/pull/910), which are currently the only unique field usable for caching.
This commit is contained in:
parent
6e18e56789
commit
a99e5e00f2
4 changed files with 41 additions and 41 deletions
|
|
@ -1,8 +1,19 @@
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use thiserror::Error;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
||||||
use pypi_types::{DistInfoMetadata, Hashes, Yanked};
|
use pypi_types::{BaseUrl, DistInfoMetadata, Hashes, Yanked};
|
||||||
|
|
||||||
|
/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum FileConversionError {
|
||||||
|
#[error("Invalid 'requires-python' value")]
|
||||||
|
VersionSpecifiersParseError(#[from] VersionSpecifiersParseError),
|
||||||
|
#[error("Failed to parse URL: {0}")]
|
||||||
|
Url(String, #[source] url::ParseError),
|
||||||
|
}
|
||||||
|
|
||||||
/// Internal analog to [`pypi_types::File`].
|
/// Internal analog to [`pypi_types::File`].
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|
@ -13,15 +24,13 @@ pub struct File {
|
||||||
pub requires_python: Option<VersionSpecifiers>,
|
pub requires_python: Option<VersionSpecifiers>,
|
||||||
pub size: Option<u64>,
|
pub size: Option<u64>,
|
||||||
pub upload_time: Option<DateTime<Utc>>,
|
pub upload_time: Option<DateTime<Utc>>,
|
||||||
pub url: String,
|
pub url: Url,
|
||||||
pub yanked: Option<Yanked>,
|
pub yanked: Option<Yanked>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<pypi_types::File> for File {
|
impl File {
|
||||||
type Error = VersionSpecifiersParseError;
|
|
||||||
|
|
||||||
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
|
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
|
||||||
fn try_from(file: pypi_types::File) -> Result<Self, Self::Error> {
|
pub fn try_from(file: pypi_types::File, base: &BaseUrl) -> Result<Self, FileConversionError> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
dist_info_metadata: file.dist_info_metadata,
|
dist_info_metadata: file.dist_info_metadata,
|
||||||
filename: file.filename,
|
filename: file.filename,
|
||||||
|
|
@ -29,7 +38,9 @@ impl TryFrom<pypi_types::File> for File {
|
||||||
requires_python: file.requires_python.transpose()?,
|
requires_python: file.requires_python.transpose()?,
|
||||||
size: file.size,
|
size: file.size,
|
||||||
upload_time: file.upload_time,
|
upload_time: file.upload_time,
|
||||||
url: file.url,
|
url: base
|
||||||
|
.join_relative(&file.url)
|
||||||
|
.map_err(|err| FileConversionError::Url(file.url.clone(), err))?,
|
||||||
yanked: file.yanked,
|
yanked: file.yanked,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -171,15 +171,16 @@ impl RegistryClient {
|
||||||
let bytes = response.bytes().await?;
|
let bytes = response.bytes().await?;
|
||||||
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
|
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
|
||||||
.map_err(|err| Error::from_json_err(err, url.clone()))?;
|
.map_err(|err| Error::from_json_err(err, url.clone()))?;
|
||||||
let metadata = SimpleMetadata::from_files(data.files, package_name);
|
|
||||||
let base = BaseUrl::from(url.clone());
|
let base = BaseUrl::from(url.clone());
|
||||||
|
let metadata =
|
||||||
|
SimpleMetadata::from_files(data.files, package_name, &base);
|
||||||
Ok((base, metadata))
|
Ok((base, metadata))
|
||||||
}
|
}
|
||||||
MediaType::Html => {
|
MediaType::Html => {
|
||||||
let text = response.text().await?;
|
let text = response.text().await?;
|
||||||
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
|
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
|
||||||
.map_err(|err| Error::from_html_err(err, url.clone()))?;
|
.map_err(|err| Error::from_html_err(err, url.clone()))?;
|
||||||
let metadata = SimpleMetadata::from_files(files, package_name);
|
let metadata = SimpleMetadata::from_files(files, package_name, &base);
|
||||||
Ok((base, metadata))
|
Ok((base, metadata))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -217,7 +218,7 @@ impl RegistryClient {
|
||||||
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata21, Error> {
|
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata21, Error> {
|
||||||
let metadata = match &built_dist {
|
let metadata = match &built_dist {
|
||||||
BuiltDist::Registry(wheel) => {
|
BuiltDist::Registry(wheel) => {
|
||||||
self.wheel_metadata_registry(&wheel.index, &wheel.base, &wheel.file)
|
self.wheel_metadata_registry(&wheel.index, &wheel.file)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
BuiltDist::DirectUrl(wheel) => {
|
BuiltDist::DirectUrl(wheel) => {
|
||||||
|
|
@ -248,7 +249,6 @@ impl RegistryClient {
|
||||||
async fn wheel_metadata_registry(
|
async fn wheel_metadata_registry(
|
||||||
&self,
|
&self,
|
||||||
index: &IndexUrl,
|
index: &IndexUrl,
|
||||||
base: &BaseUrl,
|
|
||||||
file: &File,
|
file: &File,
|
||||||
) -> Result<Metadata21, Error> {
|
) -> Result<Metadata21, Error> {
|
||||||
if self.index_urls.no_index() {
|
if self.index_urls.no_index() {
|
||||||
|
|
@ -256,14 +256,13 @@ impl RegistryClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the metadata file is available at its own url (PEP 658), download it from there.
|
// If the metadata file is available at its own url (PEP 658), download it from there.
|
||||||
let url = base.join_relative(&file.url)?;
|
|
||||||
let filename = WheelFilename::from_str(&file.filename)?;
|
let filename = WheelFilename::from_str(&file.filename)?;
|
||||||
if file
|
if file
|
||||||
.dist_info_metadata
|
.dist_info_metadata
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.is_some_and(pypi_types::DistInfoMetadata::is_available)
|
.is_some_and(pypi_types::DistInfoMetadata::is_available)
|
||||||
{
|
{
|
||||||
let url = Url::parse(&format!("{url}.metadata"))?;
|
let url = Url::parse(&format!("{}.metadata", file.url))?;
|
||||||
|
|
||||||
let cache_entry = self.cache.entry(
|
let cache_entry = self.cache.entry(
|
||||||
CacheBucket::Wheels,
|
CacheBucket::Wheels,
|
||||||
|
|
@ -287,7 +286,7 @@ impl RegistryClient {
|
||||||
// If we lack PEP 658 support, try using HTTP range requests to read only the
|
// If we lack PEP 658 support, try using HTTP range requests to read only the
|
||||||
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
|
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
|
||||||
// into the cache and read from there
|
// into the cache and read from there
|
||||||
self.wheel_metadata_no_pep658(&filename, &url, WheelCache::Index(index))
|
self.wheel_metadata_no_pep658(&filename, &file.url, WheelCache::Index(index))
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -452,7 +451,11 @@ impl SimpleMetadata {
|
||||||
self.0.iter()
|
self.0.iter()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName) -> Self {
|
fn from_files(
|
||||||
|
files: Vec<pypi_types::File>,
|
||||||
|
package_name: &PackageName,
|
||||||
|
base: &BaseUrl,
|
||||||
|
) -> Self {
|
||||||
let mut metadata = Self::default();
|
let mut metadata = Self::default();
|
||||||
|
|
||||||
// Group the distributions by version and kind
|
// Group the distributions by version and kind
|
||||||
|
|
@ -465,7 +468,7 @@ impl SimpleMetadata {
|
||||||
DistFilename::WheelFilename(ref inner) => &inner.version,
|
DistFilename::WheelFilename(ref inner) => &inner.version,
|
||||||
};
|
};
|
||||||
|
|
||||||
let file = match file.try_into() {
|
let file = match File::try_from(file, base) {
|
||||||
Ok(file) => file,
|
Ok(file) => file,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
// Ignore files with unparseable version specifiers.
|
// Ignore files with unparseable version specifiers.
|
||||||
|
|
@ -526,9 +529,10 @@ impl MediaType {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
use puffin_normalize::PackageName;
|
use puffin_normalize::PackageName;
|
||||||
use pypi_types::SimpleJson;
|
use pypi_types::{BaseUrl, SimpleJson};
|
||||||
|
|
||||||
use crate::SimpleMetadata;
|
use crate::SimpleMetadata;
|
||||||
|
|
||||||
|
|
@ -568,8 +572,12 @@ mod tests {
|
||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
let data: SimpleJson = serde_json::from_str(response).unwrap();
|
let data: SimpleJson = serde_json::from_str(response).unwrap();
|
||||||
let simple_metadata =
|
let base = BaseUrl::from(Url::from_str("https://pypi.org/simple/pyflyby/").unwrap());
|
||||||
SimpleMetadata::from_files(data.files, &PackageName::from_str("pyflyby").unwrap());
|
let simple_metadata = SimpleMetadata::from_files(
|
||||||
|
data.files,
|
||||||
|
&PackageName::from_str("pyflyby").unwrap(),
|
||||||
|
&base,
|
||||||
|
);
|
||||||
let versions: Vec<String> = simple_metadata
|
let versions: Vec<String> = simple_metadata
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(version, _)| version.to_string())
|
.map(|(version, _)| version.to_string())
|
||||||
|
|
|
||||||
|
|
@ -108,11 +108,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
||||||
) -> Result<LocalWheel, DistributionDatabaseError> {
|
) -> Result<LocalWheel, DistributionDatabaseError> {
|
||||||
match &dist {
|
match &dist {
|
||||||
Dist::Built(BuiltDist::Registry(wheel)) => {
|
Dist::Built(BuiltDist::Registry(wheel)) => {
|
||||||
let url = wheel
|
|
||||||
.base
|
|
||||||
.join_relative(&wheel.file.url)
|
|
||||||
.map_err(|err| DistributionDatabaseError::Url(wheel.file.url.clone(), err))?;
|
|
||||||
|
|
||||||
// Download and unzip on the same tokio task.
|
// Download and unzip on the same tokio task.
|
||||||
//
|
//
|
||||||
// In all wheels we've seen so far, unzipping while downloading is
|
// In all wheels we've seen so far, unzipping while downloading is
|
||||||
|
|
@ -128,7 +123,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
||||||
// for downloading and unzipping (with a buffer in between) and switch
|
// for downloading and unzipping (with a buffer in between) and switch
|
||||||
// to rayon if this buffer grows large by the time the file is fully
|
// to rayon if this buffer grows large by the time the file is fully
|
||||||
// downloaded.
|
// downloaded.
|
||||||
let reader = self.client.stream_external(&url).await?;
|
let reader = self.client.stream_external(&wheel.file.url).await?;
|
||||||
|
|
||||||
// Download and unzip the wheel to a temporary directory.
|
// Download and unzip the wheel to a temporary directory.
|
||||||
let temp_dir = tempfile::tempdir_in(self.cache.root())?;
|
let temp_dir = tempfile::tempdir_in(self.cache.root())?;
|
||||||
|
|
|
||||||
|
|
@ -98,13 +98,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
SourceDist::Registry(registry_source_dist) => {
|
SourceDist::Registry(registry_source_dist) => {
|
||||||
let url = registry_source_dist
|
|
||||||
.base
|
|
||||||
.join_relative(®istry_source_dist.file.url)
|
|
||||||
.map_err(|err| {
|
|
||||||
SourceDistError::UrlParse(registry_source_dist.file.url.clone(), err)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// For registry source distributions, shard by package, then by SHA.
|
// For registry source distributions, shard by package, then by SHA.
|
||||||
// Ex) `pypi/requests/a673187abc19fe6c`
|
// Ex) `pypi/requests/a673187abc19fe6c`
|
||||||
let cache_shard = self.build_context.cache().shard(
|
let cache_shard = self.build_context.cache().shard(
|
||||||
|
|
@ -117,7 +110,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
self.url(
|
self.url(
|
||||||
source_dist,
|
source_dist,
|
||||||
®istry_source_dist.file.filename,
|
®istry_source_dist.file.filename,
|
||||||
&url,
|
®istry_source_dist.file.url,
|
||||||
&cache_shard,
|
&cache_shard,
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
@ -161,13 +154,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
SourceDist::Registry(registry_source_dist) => {
|
SourceDist::Registry(registry_source_dist) => {
|
||||||
let url = registry_source_dist
|
|
||||||
.base
|
|
||||||
.join_relative(®istry_source_dist.file.url)
|
|
||||||
.map_err(|err| {
|
|
||||||
SourceDistError::UrlParse(registry_source_dist.file.url.clone(), err)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// For registry source distributions, shard by package, then by SHA.
|
// For registry source distributions, shard by package, then by SHA.
|
||||||
// Ex) `pypi/requests/a673187abc19fe6c`
|
// Ex) `pypi/requests/a673187abc19fe6c`
|
||||||
let cache_shard = self.build_context.cache().shard(
|
let cache_shard = self.build_context.cache().shard(
|
||||||
|
|
@ -180,7 +166,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
self.url_metadata(
|
self.url_metadata(
|
||||||
source_dist,
|
source_dist,
|
||||||
®istry_source_dist.file.filename,
|
®istry_source_dist.file.filename,
|
||||||
&url,
|
®istry_source_dist.file.url,
|
||||||
&cache_shard,
|
&cache_shard,
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue