Improve PEP 691 compatibility (#428)

[PEP 691](https://peps.python.org/pep-0691/#project-detail) has slightly
different, more relaxed rules around file metadata. These changes are
now reflected in the `File` struct. This will make it easier to support
alternative indices.

I had expected that i need to introduce a separate type for that, so i'm
happy it's two `Option`s more and an alias.

Part of #412
This commit is contained in:
konsti 2023-11-16 19:03:44 +01:00 committed by GitHub
parent 3a4988f999
commit 751f7fa9c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 53 additions and 27 deletions

1
Cargo.lock generated
View file

@ -2480,6 +2480,7 @@ dependencies = [
"puffin-git", "puffin-git",
"puffin-normalize", "puffin-normalize",
"pypi-types", "pypi-types",
"serde",
"serde_json", "serde_json",
"url", "url",
] ]

View file

@ -156,8 +156,8 @@ pub(crate) async fn sync_requirements(
continue; continue;
}; };
match &file.yanked { match &file.yanked {
Yanked::Bool(false) => {} None | Some(Yanked::Bool(false)) => {}
Yanked::Bool(true) => { Some(Yanked::Bool(true)) => {
writeln!( writeln!(
printer, printer,
"{}{} {dist} is yanked. Refresh your lockfile to pin an un-yanked version.", "{}{} {dist} is yanked. Refresh your lockfile to pin an un-yanked version.",
@ -165,7 +165,7 @@ pub(crate) async fn sync_requirements(
":".bold(), ":".bold(),
)?; )?;
} }
Yanked::Reason(reason) => { Some(Yanked::Reason(reason)) => {
writeln!( writeln!(
printer, printer,
"{}{} {dist} is yanked (reason: \"{reason}\"). Refresh your lockfile to pin an un-yanked version.", "{}{} {dist} is yanked (reason: \"{reason}\"). Refresh your lockfile to pin an un-yanked version.",

View file

@ -206,7 +206,10 @@ impl RegistryClient {
// If the metadata file is available at its own url (PEP 658), download it from there // If the metadata file is available at its own url (PEP 658), download it from there
let url = Url::parse(&file.url)?; let url = Url::parse(&file.url)?;
let filename = WheelFilename::from_str(&file.filename)?; let filename = WheelFilename::from_str(&file.filename)?;
if file.data_dist_info_metadata.is_available() { if file
.dist_info_metadata
.is_some_and(|dist_info_metadata| dist_info_metadata.is_available())
{
let url = Url::parse(&format!("{}.metadata", file.url))?; let url = Url::parse(&format!("{}.metadata", file.url))?;
let cache_dir = self.cache.join(WHEEL_METADATA_FROM_INDEX).join("pypi"); let cache_dir = self.cache.join(WHEEL_METADATA_FROM_INDEX).join("pypi");

View file

@ -18,5 +18,6 @@ pypi-types = { path = "../pypi-types" }
anyhow = { workspace = true } anyhow = { workspace = true }
fs-err = { workspace = true } fs-err = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true } serde_json = { workspace = true }
url = { workspace = true } url = { workspace = true }

View file

@ -263,7 +263,7 @@ impl RemoteSource for RegistryBuiltDist {
} }
fn size(&self) -> Option<usize> { fn size(&self) -> Option<usize> {
Some(self.file.size) self.file.size
} }
} }
@ -273,7 +273,7 @@ impl RemoteSource for RegistrySourceDist {
} }
fn size(&self) -> Option<usize> { fn size(&self) -> Option<usize> {
Some(self.file.size) self.file.size
} }
} }

View file

@ -116,9 +116,34 @@ async fn fetch(
let reader = client.stream_external(&url).await?; let reader = client.stream_external(&url).await?;
// If the file is greater than 5MB, write it to disk; otherwise, keep it in memory. // If the file is greater than 5MB, write it to disk; otherwise, keep it in memory.
let file_size = ByteSize::b(wheel.file.size as u64); let small_size = if let Some(size) = wheel.file.size {
if file_size >= ByteSize::mb(5) { let byte_size = ByteSize::b(size as u64);
debug!("Fetching disk-based wheel from registry: {dist} ({file_size})"); if byte_size < ByteSize::mb(5) {
Some(size)
} else {
None
}
} else {
None
};
if let Some(small_size) = small_size {
debug!(
"Fetching in-memory wheel from registry: {dist} ({})",
ByteSize::b(small_size as u64)
);
// Read into a buffer.
let mut buffer = Vec::with_capacity(small_size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(Download::Wheel(WheelDownload::InMemory(InMemoryWheel {
dist,
buffer,
})))
} else {
let size = small_size.map_or("unknown size".to_string(), |size| size.to_string());
debug!("Fetching disk-based wheel from registry: {dist} ({size})");
// Download the wheel to a temporary file. // Download the wheel to a temporary file.
let temp_dir = tempfile::tempdir_in(cache)?.into_path(); let temp_dir = tempfile::tempdir_in(cache)?.into_path();
@ -131,18 +156,6 @@ async fn fetch(
dist, dist,
path: wheel_file, path: wheel_file,
}))) })))
} else {
debug!("Fetching in-memory wheel from registry: {dist} ({file_size})");
// Read into a buffer.
let mut buffer = Vec::with_capacity(wheel.file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(Download::Wheel(WheelDownload::InMemory(InMemoryWheel {
dist,
buffer,
})))
} }
} }

View file

@ -6,7 +6,7 @@ use distribution_filename::{SourceDistFilename, WheelFilename};
use pep440_rs::Version; use pep440_rs::Version;
use platform_tags::{TagPriority, Tags}; use platform_tags::{TagPriority, Tags};
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::SimpleJson; use pypi_types::{SimpleJson, Yanked};
use crate::file::{DistFile, SdistFile, WheelFile}; use crate::file::{DistFile, SdistFile, WheelFile};
use crate::pubgrub::PubGrubVersion; use crate::pubgrub::PubGrubVersion;
@ -45,7 +45,7 @@ impl VersionMap {
// When resolving, exclude yanked files. // When resolving, exclude yanked files.
// TODO(konstin): When we fail resolving due to a dependency locked to yanked version, // TODO(konstin): When we fail resolving due to a dependency locked to yanked version,
// we should tell the user. // we should tell the user.
if file.yanked.is_yanked() { if file.yanked.as_ref().is_some_and(Yanked::is_yanked) {
continue; continue;
} }

View file

@ -12,21 +12,25 @@ pub struct SimpleJson {
pub versions: Vec<String>, pub versions: Vec<String>,
} }
/// A single (remote) file belonging to a package, generally either a wheel or a source dist.
///
/// <https://peps.python.org/pep-0691/#project-detail>
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")] #[serde(rename_all = "kebab-case")]
pub struct File { pub struct File {
pub core_metadata: Metadata, // Not PEP 691 compliant alias used by pypi
pub data_dist_info_metadata: Metadata, #[serde(alias = "data_dist_info_metadata")]
pub dist_info_metadata: Option<Metadata>,
pub filename: String, pub filename: String,
pub hashes: Hashes, pub hashes: Hashes,
/// Note: Deserialized with [`LenientVersionSpecifiers`] since there are a number of invalid /// Note: Deserialized with [`LenientVersionSpecifiers`] since there are a number of invalid
/// versions on pypi /// versions on pypi
#[serde(deserialize_with = "deserialize_version_specifiers_lenient")] #[serde(deserialize_with = "deserialize_version_specifiers_lenient")]
pub requires_python: Option<VersionSpecifiers>, pub requires_python: Option<VersionSpecifiers>,
pub size: usize, pub size: Option<usize>,
pub upload_time: String, pub upload_time: String,
pub url: String, pub url: String,
pub yanked: Yanked, pub yanked: Option<Yanked>,
} }
fn deserialize_version_specifiers_lenient<'de, D>( fn deserialize_version_specifiers_lenient<'de, D>(
@ -75,6 +79,10 @@ impl Yanked {
} }
} }
/// A dictionary mapping a hash name to a hex encoded digest of the file.
///
/// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we
/// only support SHA 256 atm.
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Hashes { pub struct Hashes {
pub sha256: String, pub sha256: String,