Avoid replicating core-metadata field on File struct (#12159)

## Summary

A long-standing TODO: we don't need to store three copies of this just
to ensure that Serde considers all three fields.
This commit is contained in:
Charlie Marsh 2025-03-14 07:03:09 -07:00 committed by GitHub
parent c806a627f3
commit d2b9ffdc9e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 676 additions and 704 deletions

View file

@ -213,8 +213,6 @@ impl SimpleHtml {
Ok(Some(File {
core_metadata,
dist_info_metadata: None,
data_dist_info_metadata: None,
yanked,
requires_python,
hashes,
@ -299,8 +297,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -357,8 +353,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: Some(
@ -418,8 +412,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -476,8 +468,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2+233fca715f49-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -534,8 +524,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -592,8 +580,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "torchtext-0.17.0+cpu-cp39-cp39-win_amd64.whl",
hashes: Hashes {
md5: None,
@ -648,8 +634,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -780,8 +764,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -836,8 +818,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -893,8 +873,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -951,8 +929,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -1026,8 +1002,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "jaxlib-0.1.52+cuda100-cp36-none-manylinux2010_x86_64.whl",
hashes: Hashes {
md5: None,
@ -1043,8 +1017,6 @@ mod tests {
},
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "jaxlib-0.1.52+cuda100-cp37-none-manylinux2010_x86_64.whl",
hashes: Hashes {
md5: None,
@ -1110,8 +1082,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Flask-0.1.tar.gz",
hashes: Hashes {
md5: None,
@ -1129,8 +1099,6 @@ mod tests {
},
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Flask-0.10.1.tar.gz",
hashes: Hashes {
md5: None,
@ -1148,8 +1116,6 @@ mod tests {
},
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "flask-3.0.1.tar.gz",
hashes: Hashes {
md5: None,
@ -1216,8 +1182,6 @@ mod tests {
files: [
File {
core_metadata: None,
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -1294,8 +1258,6 @@ mod tests {
true,
),
),
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -1315,8 +1277,6 @@ mod tests {
true,
),
),
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.3-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -1336,8 +1296,6 @@ mod tests {
false,
),
),
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.4-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -1357,8 +1315,6 @@ mod tests {
false,
),
),
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.5-py3-none-any.whl",
hashes: Hashes {
md5: None,
@ -1378,8 +1334,6 @@ mod tests {
true,
),
),
dist_info_metadata: None,
data_dist_info_metadata: None,
filename: "Jinja2-3.1.6-py3-none-any.whl",
hashes: Hashes {
md5: None,

View file

@ -47,8 +47,6 @@ impl File {
dist_info_metadata: file
.core_metadata
.as_ref()
.or(file.dist_info_metadata.as_ref())
.or(file.data_dist_info_metadata.as_ref())
.is_some_and(CoreMetadata::is_available),
filename: file.filename,
hashes: HashDigests::from(file.hashes),

View file

@ -35,23 +35,11 @@ fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<File>
/// A single (remote) file belonging to a package, either a wheel or a source distribution.
///
/// <https://peps.python.org/pep-0691/#project-detail>
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")]
#[derive(Debug, Clone)]
pub struct File {
// PEP 714-renamed field, followed by PEP 691-compliant field, followed by non-PEP 691-compliant
// alias used by PyPI.
//
// TODO(charlie): Use a single value here and move this into the deserializer, to save space.
pub core_metadata: Option<CoreMetadata>,
pub dist_info_metadata: Option<CoreMetadata>,
pub data_dist_info_metadata: Option<CoreMetadata>,
pub filename: SmallString,
pub hashes: Hashes,
/// There are a number of invalid specifiers on PyPI, so we first try to parse it into a
/// [`VersionSpecifiers`] according to spec (PEP 440), then a [`LenientVersionSpecifiers`] with
/// fixup for some common problems and if this still fails, we skip the file when creating a
/// version map.
#[serde(default, deserialize_with = "deserialize_version_specifiers_lenient")]
pub requires_python: Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>,
pub size: Option<u64>,
pub upload_time: Option<Timestamp>,
@ -59,43 +47,75 @@ pub struct File {
pub yanked: Option<Box<Yanked>>,
}
fn deserialize_version_specifiers_lenient<'de, D>(
deserializer: D,
) -> Result<Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>, D::Error>
where
D: Deserializer<'de>,
{
struct Visitor;
impl<'de> serde::de::Visitor<'de> for Visitor {
type Value = Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>;
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.write_str("a string representing a version specifier")
}
fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
Ok(Some(
LenientVersionSpecifiers::from_str(v).map(VersionSpecifiers::from),
))
}
fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
impl<'de> Deserialize<'de> for File {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(Visitor)
struct FileVisitor;
impl<'de> serde::de::Visitor<'de> for FileVisitor {
type Value = File;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a map containing file metadata")
}
fn visit_none<E>(self) -> Result<Self::Value, E>
fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
E: serde::de::Error,
M: serde::de::MapAccess<'de>,
{
Ok(None)
let mut core_metadata = None;
let mut filename = None;
let mut hashes = None;
let mut requires_python = None;
let mut size = None;
let mut upload_time = None;
let mut url = None;
let mut yanked = None;
while let Some(key) = access.next_key::<String>()? {
match key.as_str() {
"core-metadata" | "dist-info-metadata" | "data-dist-info-metadata" => {
if core_metadata.is_none() {
core_metadata = access.next_value()?;
} else {
let _: serde::de::IgnoredAny = access.next_value()?;
}
}
"filename" => filename = Some(access.next_value()?),
"hashes" => hashes = Some(access.next_value()?),
"requires-python" => {
requires_python = access.next_value::<Option<&str>>()?.map(|s| {
LenientVersionSpecifiers::from_str(s).map(VersionSpecifiers::from)
});
}
"size" => size = Some(access.next_value()?),
"upload-time" => upload_time = Some(access.next_value()?),
"url" => url = Some(access.next_value()?),
"yanked" => yanked = Some(access.next_value()?),
_ => {
let _: serde::de::IgnoredAny = access.next_value()?;
}
}
}
deserializer.deserialize_option(Visitor)
Ok(File {
core_metadata,
filename: filename
.ok_or_else(|| serde::de::Error::missing_field("filename"))?,
hashes: hashes.ok_or_else(|| serde::de::Error::missing_field("hashes"))?,
requires_python,
size,
upload_time,
url: url.ok_or_else(|| serde::de::Error::missing_field("url"))?,
yanked,
})
}
}
deserializer.deserialize_map(FileVisitor)
}
}
#[derive(Debug, Clone)]